/* 
This is stata code for defining Myocardial Infarction using administrative data
The algorithm is published in Tonelli et al BMC Med Inform Dec Making 2015;15:31
and based on the work in Austin et al Am Heart J 2002;144:290-296

1)  Define the variable names for the ICD-9 codes as hosp_icd9_codeX (X indicating the diagnosis code position)
	Define the maximum number of ICD-9 diagnosis code position as num_hosp_icd9
	Define the variable name for ICD-10 codes as hosp_icd10_codeX (X indicating the diagnosis code position)
	Define the maximum number of ICD-10 diagnosis code position as num_hosp_icd10
    Define the variable name of the hospitalization start date as hosp_start_date
	Define the variable names of the types of hospitalization as type

2)  Specify the directory of the output dataset

3)  Create a study cohort which includes the unique patient identifier, the study 
	start and end dates (e.g., when a patient turns 18 until they die or out-migrate)
*/

* File name and variable names for the hospitalizations dataset
global num_hosp_file = 2 // number of hospitalization data file
global data_hosp1 = "G:\Open Data\ICDC 2019\ICDC Source\hosp_02_19.dta"
global data_hosp2 = "G:\Open Data\ICDC 2019\ICDC Source\hosp_94_02.dta"
global hosp_icd9_code = "hosp_icd9dx_code"
global num_hosp_icd9 = 16
global hosp_icd10_code = "hosp_icd10dx_code"
global num_hosp_icd10 = 25
global hosp_start_date = "start_date"
global type = "diag_type"

	
* Output dataset directory
global out_source = "G:\Phoebe\MM30" 

/*
	Sample program call
	ICDmm30_ami cohort akdnid study_start_date study_end_date 
		'cohort' is the file name of the study cohort
		'id' is the unique patient identifier
		'study_start_date' is the study start date
		'study_end_date' is the study end date
*/

version 13.1
capture program drop ICDmm30_ami
program ICDmm30_ami
set more off
gettoken cohort 0:0
gettoken id 0:0
gettoken study_start_date 0:0
gettoken study_end_date 0:0

use `cohort', clear
duplicates drop
sort `id', stable
save cohort_ami, replace


* Myocardial infarction
local ICD9_ami 410
local ICD10_ami I21 I22

* One hospitalization
local v=${num_hosp_file}
forvalues nfile=1(1)`v' {
use "${data_hosp`nfile'}" , clear

	recast long ${hosp_start_date}, force
	sort `id', stable
	merge n:1 `id' using cohort_ami
	keep if _merge==3
	drop _merge
	keep if ${hosp_start_date}>=`study_start_date' & ${hosp_start_date}<=`study_end_date'

	local n=${num_hosp_icd9}
	forvalues nhos9=1(1)`n' {
	 capture replace ${hosp_icd9_code}`nhos9'= subinstr(${hosp_icd9_code}`nhos9', ".", "",.) 
	   }
	local m=${num_hosp_icd10}	
	forvalues nhos10=1(1)`m' {
	   capture replace ${hosp_icd10_code}`nhos10'= subinstr(${hosp_icd10_code}`nhos10', ".", "",.) 
	   }
	   
	gen type=""
	gen code=""	
	local n=${num_hosp_icd9}
	foreach i in `ICD9_ami'   {
		local len=strlen("`i'")
		forvalues dx=`n'(-1)1 {
			 capture replace code="`i'" if substr(${hosp_icd9_code}`dx',1,`len') =="`i'" & ${type}`dx'=="M"
			 capture  replace type=${type}`dx' if substr(${hosp_icd9_code}`dx',1,`len') =="`i'" & ${type}`dx'=="M" 
		}
	}		
	
	local m=${num_hosp_icd10}
	foreach j in `ICD10_ami' {
		local len=strlen("`j'")
		forvalues dx=`m'(-1)1 {			
		   capture replace code = "`j'" if substr(${hosp_icd10_code}`dx',1,`len') == "`j'" & ${type}`dx'=="M"
		   capture  replace type=${type}`dx' if substr(${hosp_icd10_code}`dx',1,`len')== "`j'" & ${type}`dx'=="M"
		}
	}

	keep if code!=""
	keep `id' ${hosp_start_date} type code
	rename ${hosp_start_date} ami_date
	sort `id' ami_date, stable
	bysort `id': keep if _n==1
	gen  source="hosp"
	sort `id'


	sort `id' ami_date, stable
	bysort `id': keep if _n==1	
	gen ami=1
	sort `id' ami_date 
    save "${out_source}\ICD_ami_temp`nfile'.dta", replace
}

** Combine the output files and keep the first event for each patient
use "${out_source}\ICD_ami_temp1.dta",clear
    local v=${num_hosp_file}
    if `v'>1 {
		forvalues nfile=2(1)`v' {
		    append using  "${out_source}\ICD_ami_temp`v'.dta"
			erase "${out_source}\ICD_ami_temp`v'.dta"
	}
	}

sort `id' ami_date, stable
bysort `id': keep if _n==1	
save "${out_source}\ICD_ami.dta", replace

erase cohort_ami.dta
erase "${out_source}\ICD_ami_temp1.dta"
end