/* This is stata code for defining Epilepsy using administrative data The algorithm is published in Tonelli et al BMC Med Inform Dec Making 2015;15:31 and based on the work in Jette et al Epilepsia 2010;51:62-69 1) Specify the file name for the hospitalization data Define the variable names for the ICD-9 codes as hosp_icd9_code Define the maximum number of ICD-9 diagnosis code position as num_hosp_icd9 Define the variable name for ICD-10 codes as hosp_icd10_code Define the maximum number of ICD-10 diagnosis code position as num_hosp_icd10 Define the variable name of the hospitalization start date as hosp_start_date Define the variable names of the types of hospitalization as type Define the value for the most responsible diagnosis code 2) Specify the file name for the physician claims data Define the variable names for the ICD-9 codes as claim_icd9_code Define the maximumf number of ICD-9 diagnosis code position as num_claim_icd9 Define the variable name of the physician claims date as claim_start_date 3) Specify the file name for the ACCS data Define the variable names for the ICD-9 codes as accs_icd9_code Define the maximum number of ICD-9 diagnosis code position as num_accs_icd9 Define the variable names for ICD-10 codes as accs_icd10_code Define the maximum number of ICD-10 diagnosis code position as num_accs_icd10 Define the variable name of the ACCS start date as accs_start_date Define the variable name of MIS functional center code as accs_start_date Define a value for the most responsible diagnosis code. The position for this code is hardcoded as 1 4) Specify the directory of the output dataset 5) Create study cohort which includes the unique patient identifier, the study start and end dates (e.g., when a patient turns 18 until they die or out-migrate) */ * File name and variable names for the hospitalizations dataset global data_hosp = "G:\Open Data\ICDC 2013\ICDC Source\Constant files\hosp94_2013.dta" global hosp_icd9_code = "hosp_icd9dx_code" global num_hosp_icd9 = 16 global hosp_icd10_code = "hosp_icd10dx_code" global num_hosp_icd10 = 25 global hosp_start_date = "start_date" global type = "diag_type" global hosp_most_responsible = "M" * File names and variable names for the physician claims dataset global data_claim = "G:\Open Data\ICDC 2013\ICDC Source\claims_94_13.dta" global claim_icd9_code = "hlth_dx_icd9x_code_" global num_claim_icd9 = 3 global claim_start_date = "start_date" * File names and variable names for the ACCS dataset global data_accs = "G:\Open Data\ICDC 2013\ICDC Source\Constant files\accs97_2013.dta" global accs_icd9_code = "accs_icd9dx_code" global accs_icd10_code = "accs_icd10dx_code" global accs_start_date = "start_date" global accs_most_responsible = "M" * Output dataset directory global out_source = "G:\Projects\Phoebe\MM30codes\kidtran" /* Sample program call ICDmm30_epilepsy cohort akdnid study_start_date study_end_date 'cohort' is the file name of the study cohort 'id' is the unique patient identifier 'study_start_date' is the study start date 'study_end_date' is the study end date */ version 13.1 capture program drop ICDmm30_epilepsy program ICDmm30_epilepsy set more off gettoken cohort 0:0 gettoken id 0:0 gettoken study_start_date 0:0 gettoken study_end_date 0:0 use `cohort', clear duplicates drop sort `id', stable save cohort_epilepsy, replace * Epilepsy local ICD9_epilepsy 345 local ICD10_epilepsy G40 G41 * One hospitalization use "${data_hosp}" , clear keep `id' ${hosp_start_date} ${hosp_icd9_code}1-${hosp_icd9_code}${num_hosp_icd9} ${hosp_icd10_code}1-${hosp_icd10_code}${num_hosp_icd10} /// ${type}1- ${type}16 ${type}17- ${type}${num_hosp_icd10} recast long ${hosp_start_date}, force sort `id', stable merge n:1 `id' using cohort keep if _merge==3 drop _merge keep if ${hosp_start_date}>=`study_start_date' & ${hosp_start_date}<=`study_end_date' local n=${num_hosp_icd9} forvalues nhos9=1(1)`n' { capture replace ${hosp_icd9_code}`nhos9'= subinstr(${hosp_icd9_code}`nhos9', ".", "",.) } local m=${num_hosp_icd10} forvalues nhos10=1(1)`m' { capture replace ${hosp_icd10_code}`nhos10'= subinstr(${hosp_icd10_code}`nhos10', ".", "",.) } gen type="" gen code="" local n=${num_hosp_icd9} foreach i in `ICD9_epilepsy' { local len=strlen("`i'") forvalues dx=`n'(-1)1 { capture replace code="`i'" if substr(${hosp_icd9_code}`dx',1,`len') =="`i'" capture replace type=${type}`dx' if substr(${hosp_icd9_code}`dx',1,`len') =="`i'" } } local m=${num_hosp_icd10} foreach j in `ICD10_epilepsy' { local len=strlen("`j'") forvalues dx=`m'(-1)1 { capture replace code = "`j'" if substr(${hosp_icd10_code}`dx',1,`len') == "`j'" capture replace type=${type}`dx' if substr(${hosp_icd10_code}`dx',1,`len')== "`j'" } } keep if code!="" keep if type=="${hosp_most_responsible}" keep `id' ${hosp_start_date} code gen source="hosp" keep `id' ${hosp_start_date} code source rename ${hosp_start_date} epilepsy_date sort `id' epilepsy_date, stable bysort `id': keep if _n==1 save "${out_source}\ICD_epilepsy.dta", replace * Two claims within 2 years use "${data_claim}",clear keep `id' ${claim_start_date} ${claim_icd9_code}1-${claim_icd9_code}${num_claim_icd9} recast long ${claim_start_date}, force sort `id', stable merge n:1 `id' using cohort_epilepsy keep if _merge==3 drop _merge keep if ${claim_start_date}>=`study_start_date' & ${claim_start_date}<=`study_end_date' local n=${num_claim_icd9} forvalues nclaim9=1(1)`n' { capture replace ${claim_icd9_code}`nclaim9'= subinstr(${claim_icd9_code}`nclaim9', ".", "",.) } gen code="" foreach i in `ICD9_epilepsy' { local len=strlen("`i'") local n=${num_claim_icd9} forvalues dx=`n'(-1)1 { capture replace code="`i'" if substr(${claim_icd9_code}`dx',1,`len') =="`i'" } } keep if code!="" sort `id' ${claim_start_date}, stable gen source="claim" keep `id' ${claim_start_date} code source rename ${claim_start_date} epilepsy_date sort `id' epilepsy_date bysort `id': gen claim=1 if epilepsy_date[_n+1]<=(epilepsy_date[_n]+round(365.25*2,1)) keep if claim==1 drop claim bysort `id': keep if _n==1 * Append hospitalizations with claims append using "${out_source}\ICD_epilepsy.dta" sort `id' epilepsy_date, stable save "${out_source}\ICD_epilepsy.dta", replace * One ACCS use "${data_accs}",clear keep `id' ${accs_start_date} ${accs_icd9_code}1 ${accs_icd10_code}1 recast long ${accs_start_date}, force sort `id', stable merge n:1 `id' using cohort keep if _merge==3 drop _merge keep if ${accs_start_date}>=`study_start_date' & ${accs_start_date}<=`study_end_date' capture replace ${accs_icd9_code}1= subinstr(${accs_icd9_code}1, ".", "",.) capture replace ${accs_icd10_code}1= subinstr(${accs_icd10_code}1, ".", "",.) gen code="" foreach i in `ICD9_epilepsy' { local len=strlen("`i'") capture replace code="`i'" if substr(${accs_icd9_code}1,1,`len') =="`i'" } foreach i in `ICD10_epilepsy' { local len=strlen("`i'") capture replace code="`i'" if substr(${accs_icd10_code}1,1,`len') =="`i'" } keep if code!="" gen type="${accs_most_responsible}" //PY modified sort `id' ${accs_start_date}, stable gen source="accs" keep `id' ${accs_start_date} code source rename ${accs_start_date} epilepsy_date sort `id' epilepsy_date bysort `id': keep if _n==1 * Append hospitalizations and claims with accs append using "${out_source}\ICD_epilepsy.dta" sort `id' epilepsy_date, stable sort `id' epilepsy_date bysort `id': keep if _n==1 gen epilepsy=1 keep `id' epilepsy_date epilepsy sort `id' epilepsy_date save "${out_source}\ICD_epilepsy.dta", replace erase cohort_epilepsy.dta end