/*****************************************************************************

PROGRAM:	CHF_APR94_MAR15.SAS
TITLE:		Defining the Congestive Heart Failure Cohort
CREATED BY:	Interdisciplinary Chronic Disease Collaboration (ICDC)/ Alberta Kidney Disease Network (AKDN), August 2015
Updated: Mar 2016

======================================================================

	TERMS OF USE:
   
   	This code was created by the ICDC/AKDN for its own use. 

   	Please be aware that the ICDC/AKDN DOES NOT:
		1) Provide any guarantees about this code
		2) Commit to the availability of any updates to the code
		3) Support any implementation outside of the ICDC/AKDN

	Please use this code for information purposes only.

======================================================================

Goal: 	Identify people in the Alberta Health and Wellness (AHW) administrative data 
		from April 1, 1994 to March 31, 2015 with congestive heart failure (CHF). 
		This will form a cohort of patients with CHF as a disease condition.

ICDC/AKDN Definition for CHF

DATASETS:
	1. Hospitalization Discharge Abstract Database (DAD), Apr. 1994 - Mar. 2015.
		Only CHF coded as the most responsible diagnosis (type='M'). 
 	2. Supplemental Enhanced Service Event (SESE) Database (Physician claims), Apr. 1994 - Mar. 2015, 
		all ICD diagnostic fields
		Visits with diagnostic prefix "X", "B" or "E" excluded (These represent labs, radiology,
        ophthalmologist, dental, and chiropractic visits).
	3. Ambulatory Care Classification System (ACCS) Database, Apr. 1994 - Mar. 2015.
		CHF coded as the most responsible diagnosis (type='M') from ER visits only.
		ER visits are defined by MIS codes 71310 (Emergency) or 7131020 (General Emergency).

CODES:
	CHF: 428 (ICD9) AND I50 (ICD10)

HOSPITALIZATION/ACCS TYPE:
	Most responsible diagnosis (type='M')

DATES TO BE USED: 
	All available data. 

ONSET DATE: 
	The date of first CHF occurrence between April 1, 1994 and March 31, 2015 will be 
	used as the 'onset' date.

WASH-OUT PERIOD: There will be a wash-out period of at least 3 years. Therefore, if the first CHF (by definition) 
	occurs on or after April 1, 1997, CHF will be considered an incident case. If the first CHF (by definition) 
	occurs between April 1, 1994 and March 31, 1997, there will not be a wash-	out period of at least 3 years available. 
	In this case, CHF will be considered prevalent.
	Note: ACCS data is available from 1997, whereas hospitalization and claims data are available from 1994. 
	If the first CHF (by definition) is identified from the ACCS dataset and occurs before April 1, 2000, 
	then CHF will be considered prevalent. This ensures at least a 3 year wash-out period for ACCS.

USAGE NOTES (for this program):
	The CHF_94_15_cohort dataset includes the patients with CHF identified from the claims (no claims containing
	a procedure code with prefix X,B, or E), hospitalizations (most responsible diagnosis), and ACCS 
	(most responsible diagnosis from ER visits only). This dataset can be used 	to identify a cohort of patients 
	with CHF as a disease condition. 

	Only the first incident CHF is included. CHF episodes following the first (incident event) are not 	included.

	The dataset includes both adults and non-adults at the time of first CHF.

	The dataset includes a DATA_SOURCE variable to identify the data source of the first CHF.

	PHN is the unique identifier used to link the administrative data.

******************************************************************************/

option threads nofmterr nonumber mlogic helpbrowser=sas;  * HELPBROWSER=SAS sets the SAS browser to default in the 64-bit OS --;

* Location of the AHW administrative datasets (Hospitalizations, Physician Claims, ACCS) --;
libname ahw 'G:\IDENTIFIED\AHW\EXTRACTION_1994_2015\DATA\CleanDataCombined_2013_2015\DataCombined_2013_2015';

* Location to store the CHF_94_15_cohort dataset --;
libname chf 'G:\IDENTIFIED\AHW\EXTRACTION_1994_2015\DATA\DERIVED_DATA\AHW_CHF\DATA';

* Location of the demographics dataset created by the ICDC/AKDN. Includes information on gender, and dates of birth, death,
  and out-migration from Alberta --;
libname demog 'G:\IDENTIFIED\AHW\EXTRACTION_1994_2015\DATA\DERIVED_DATA\AHW_DEMOGRAPHICS\DEMOGRAPHICS\DATA';

*********************************;
* MACROS USED IN THIS PROGRAM -- ;
*********************************;

* MACRO for defining Fiscal Year from April 1994 - March 2009 --;

%macro fy(in_t=,out_t=,date=);
	data &out_t;
		set &in_t;
		if not missing(&date) then do;
			if               &date<='31MAR1994'd then FY='----/1994';
			if '01APR1994'd<=&date<='31MAR1995'd then FY='1994/1995';
			if '01APR1995'd<=&date<='31MAR1996'd then FY='1995/1996';
			if '01APR1996'd<=&date<='31MAR1997'd then FY='1996/1997';
			if '01APR1997'd<=&date<='31MAR1998'd then FY='1997/1998';
			if '01APR1998'd<=&date<='31MAR1999'd then FY='1998/1999';
			if '01APR1999'd<=&date<='31MAR2000'd then FY='1999/2000';
			if '01APR2000'd<=&date<='31MAR2001'd then FY='2000/2001';
			if '01APR2001'd<=&date<='31MAR2002'd then FY='2001/2002';
			if '01APR2002'd<=&date<='31MAR2003'd then FY='2002/2003';
			if '01APR2003'd<=&date<='31MAR2004'd then FY='2003/2004';
			if '01APR2004'd<=&date<='31MAR2005'd then FY='2004/2005';
			if '01APR2005'd<=&date<='31MAR2006'd then FY='2005/2006';
			if '01APR2006'd<=&date<='31MAR2007'd then FY='2006/2007';
			if '01APR2007'd<=&date<='31MAR2008'd then FY='2007/2008';
			if '01APR2008'd<=&date<='31MAR2009'd then FY='2008/2009';
			if '01APR2009'd<=&date<='31MAR2010'd then FY='2009/2010';
			if '01APR2010'd<=&date<='31MAR2011'd then FY='2010/2011';
			if '01APR2011'd<=&date<='31MAR2012'd then FY='2011/2012';
			if '01APR2012'd<=&date<='31MAR2013'd then FY='2012/2013';
			if '01APR2013'd<=&date<='31MAR2014'd then FY='2013/2014';
			if '01APR2014'd<=&date<='31MAR2015'd then FY='2014/2015';
			if '01APR2015'd<=&date               then FY='2015/----';
		end;
		COUNT=1;
	run;
%mend fy;

* MACRO for sorting by &var--;
%macro srt(in_t=,out_t=,var=); 
  	proc sort data=&in_t out=&out_t; 
		by &var; 
	run; 
%mend srt; 

* MACRO to delete records in datasets occuring before dob and after death or outmigration --;
* This macro uses the demographics dataset and SRT macro, defines the AGE variable, and produces 
* frequencies of records to be deleted. It also creates the &out_t2 dataset with the records deleted --;

%macro demogcheck(in_t=,out_t1=,out_t2=,sourcedate=);
	data &out_t1;
		merge &in_t (in=in1) demog.ahw_demographics_2015(in=in2);
  		by phn;
  		if in1;
		age=(&sourcedate-PERS_dob)/365.25;
  		if (PERS_dob^=. and PERS_dob>&sourcedate) or PERS_dob=. then dobdelete=1; 
  		if death_date^=. and death_date<&sourcedate then deathdelete=1;
		if out_migrate_date^=. and out_migrate_date<&sourcedate then outdelete=1;
		if dobdelete=1 | deathdelete=1 | outdelete=1 then rec_delete=1;
	run;

	proc freq data=&out_t1;
		table dobdelete*deathdelete*outdelete*rec_delete/list missing;
	run;

	%srt(in_t=&out_t1,out_t=&out_t1,var=phn);

	data &out_t2;
		set &out_t1;
		if rec_delete=1 then delete;
	run;
%mend demogcheck;

* MACRO to count number of subjects --;

%macro subjects(ds=);
	proc sort data=&ds out=subj_&ds nodupkey;
		by phn;
	run;
%mend subjects;

* MACRO to extract ICD9 codes from the PHYSICIAN CLAIMS dataset --;
* 		The ICD9 codes from the physician claims database are available from 1 dataset covering
*		the fiscal years 1994-2015. There are 3 fields for ICD9 diagnosis codes.;

%macro claim_icd9(in_t=,out_t1=,out_t2=);
	data &out_t1;
		LENGTH CODE $7;
		set &in_t(keep=PHN END_DATE HLTH_DX_ICD9X_CODE_1-HLTH_DX_ICD9X_CODE_3);
		CODE=' ';
		ARRAY diag_icd9_ (*) $ HLTH_DX_ICD9X_CODE_1-HLTH_DX_ICD9X_CODE_3;
		do i=1 to DIM(diag_icd9_);
			if substr(diag_icd9_(i),1,3)='428'
			then do;
				CHF='1';
				SOURCE='CLAIM';
				if CODE=' ' then CODE=diag_icd9_(i);
				TYPE='NA';
			end;
		end;
		drop i;
		rename END_DATE=start_date;
	run;

	data &out_t2;
		retain PHN START_DATE CHF SOURCE TYPE CODE;
		LENGTH CODE $7;
		set &out_t1;
		if CHF='1';
		keep PHN START_DATE CHF SOURCE TYPE CODE;
	run;
%mend claim_icd9;

* MACRO to extract ICD9 codes from the HOSPITALIZATIONS datasets --;
* 		The ICD9 codes from the hospitalizations database are available from 3 datasets covering
*		the fiscal years 1994-1997, 1997-1999, and 1999-2002. There are 16 fields for ICD9
*		diagnosis codes.;

%macro hosp_icd9(in_t=,out_t1=,out_t2=);
	data &out_t1;
		set &in_t(keep=PHN START_DATE SEPI_DX_MR_ICD9CM SEPIMRSP_DXTYP_HMRIDXTYP);
		if substr(SEPI_DX_MR_ICD9CM,1,3)='428'
		then do;
			CHF='1';
			SOURCE='HOSP';
			CODE=SEPI_DX_MR_ICD9CM;
			TYPE=SEPIMRSP_DXTYP_HMRIDXTYP;
		end;
	run;

	data &out_t2;
		retain PHN START_DATE CHF SOURCE TYPE CODE;
		LENGTH CODE $7;
		set &out_t1;
		if CHF='1';
		keep PHN START_DATE CHF SOURCE TYPE CODE;
	run;
%mend hosp_icd9;

* MACRO to extract ICD10 codes from the HOSPITALIZATIONS datasets --;
* 		The ICD9 codes from the hospitalizations database are available from 1 dataset covering
*		the fiscal years 2002-2015. There are 25 fields for ICD10 diagnosis codes.;

%macro hosp_icd10(in_t=,out_t1=,out_t2=);
	data &out_t1;
		set &in_t(keep=PHN START_DATE DX1 TYPE1);
		if substr(DX1,1,3)='I50'
		then do;
			CHF='1';
			SOURCE='HOSP';
			CODE=DX1;
			TYPE=TYPE1;
		end;
	run;

	data &out_t2;
		retain PHN START_DATE CHF SOURCE TYPE CODE;
		set &out_t1;
		if CHF='1';
		keep PHN START_DATE CHF SOURCE TYPE CODE;
	run;
%mend hosp_icd10;

* MACRO to extract ICD9 codes from the ACCS datasets --;
* Only the most responsible
* diagnosis from ER visits is considered.;

%macro accs_icd9(in_t=,out_t1=,out_t2=);
	data &out_t1;
		set &in_t(where=(REPI_FCENTR_MIS in ('71310','7131020')) keep=PHN START_DATE SEPI_DX_MR_ICD9CM REPI_FCENTR_MIS);
		if substr(SEPI_DX_MR_ICD9CM,1,3)='428'
		then do;
			CHF='1';
			SOURCE='ACCS';
			CODE=SEPI_DX_MR_ICD9CM;
			TYPE='M';
		end;
	run;

	data &out_t2;
		retain PHN START_DATE CHF SOURCE TYPE CODE;
		LENGTH CODE $7;
		set &out_t1;
		if CHF='1';
		keep PHN START_DATE CHF SOURCE TYPE CODE;
	run;
%mend accs_icd9;

* MACRO to extract ICD10 codes from the ACCS datasets --;
* 		The ICD10 codes from the ACCS database are available from 1 dataset covering
*		the fiscal years 2002-2015. Only the most responsible
*		diagnosis from ER visits is considered.;

%macro accs_icd10(in_t=,out_t1=,out_t2=);
	data &out_t1;
		set &in_t(where=(SEPI_MIS_ACCT_CODE in ('713100000','713102000')) keep=PHN START_DATE ICD9 SEPI_MIS_ACCT_CODE);
		if substr(ICD9,1,3)='I50'
		then do;
			CHF='1';
			SOURCE='ACCS';
			CODE=ICD9;
			TYPE='M';
		end;
	run;

	data &out_t2;
		retain PHN START_DATE CHF SOURCE TYPE CODE;
		set &out_t1;
		if CHF='1';
		keep PHN START_DATE CHF SOURCE TYPE CODE;
	run;
%mend accs_icd10;

****************************************************************;
* Identify CHF from the hospitalization, accs, and claims files ;
****************************************************************;

* 1) 	Create datasets with all hospitalizations, accs, and claims containing the relevant codes --;
%claim_icd9(in_t=ahw.claims_noxbe_94_15,out_t1=chf._claims_94_15,out_t2=chf.claims_94_15);
%hosp_icd9(in_t=ahw.hosp_94_97,out_t1=chf._hosp_94_97,out_t2=chf.hosp_94_97);
%hosp_icd9(in_t=ahw.hosp_97_99,out_t1=chf._hosp_97_99,out_t2=chf.hosp_97_99);
%hosp_icd9(in_t=ahw.hosp_99_02,out_t1=chf._hosp_99_02,out_t2=chf.hosp_99_02);
%hosp_icd10(in_t=ahw.hosp_02_15,out_t1=chf._hosp_02_15,out_t2=chf.hosp_02_15);
%accs_icd9(in_t=ahw.accs_97_02,out_t1=chf._accs_97_02,out_t2=chf.accs_97_02);
%accs_icd10(in_t=ahw.accs_02_15,out_t1=chf._accs_02_15,out_t2=chf.accs_02_15);

* 2) 	Combine all data sources and sort by phn/start/end date --;

data chf1;
	set chf.claims_94_15 chf.hosp_94_97 chf.hosp_97_99 chf.hosp_99_02 chf.hosp_02_15 chf.accs_97_02 chf.accs_02_15;
proc sort;
		by phn start_date;
run;
	
	* Check patient numbers --;

	%subjects(ds=chf1)

* 3) 	Save dataset with all hospitalizations, accs, and claims records --;

data chf.chf_94_15_ALL;
	retain PHN START_DATE CHF SOURCE TYPE CODE;
	set chf1;
	run;

* 4) 	Delete hospitalizations after death date/out-migration date or before DOB --;

%demogcheck(in_t=chf.chf_94_15_all, out_t1=chf2, out_t2=chf3, sourcedate=start_date);

* 5) 	Sort by PHN, date, and DESCENDING SOURCE (to ensure that HOSP or CLAIM come before ACCS if on same date) --;

proc sort data=chf3;
	by phn start_date descending source ;
run;

* 6) 	Take first record per patient, define prevalence and incidence, and rename variables --;
proc print data=chf3(obs=10);
run;

data chf4;
	retain PHN CHF START_DATE SOURCE TYPE CODE;
	set chf3(drop=death death_date out_migrate out_migrate_date dobdelete deathdelete outdelete rec_delete PERS_GENDER_CODE PERS_dob);
	by phn start_date descending source;
	if first.phn;
	if start_date<'01APR1997'd then CASE='PREVALENT';
	if start_date>='01APR1997'd or age<3 then CASE='INCIDENT';
	if source='ACCS' and start_date<'01APR2000'd then CASE='PREVALENT';
	if age>=18 then ADULT=1; else ADULT=0;
	rename start_date=CHF_DATE; label start_date='Date of First CHF (by defn)';
	rename source=DATA_SOURCE;
proc freq data=chf4;
		table adult;
run;

* 7) 	Save CHF cohort --;

data chf.chf_94_15_COHORT(drop=code) chf_94_15_cohort;
	set chf4(drop=age adult);
run;

****************************************************************;
* DESCRIPTIVE STATISTICS FOR THE CHF COHORT, CHF_94_15_COHORT --;
****************************************************************;

* Add Fiscal Year --;

%fy(in_t=chf_94_15_cohort,out_t=test,date=chf_date)

* Flag patients with first CHF in 94/95 and find proportions of records per year contributed by these patients --;

data test1;
	set test(keep=phn fy);
	if fy='1994/1995';
	flag='  First in 94/95  ';
	proc sort;
		by phn;
run;

data test2;
	merge chf.chf_94_15_all(in=a) test1(in=b drop=fy);
	by phn;
	if flag=' ' then flag='First not in 94/95';
run;

%fy(in_t=test2,out_t=test3,date=start_date);

* Descriptives --;

ods rtf file="G:\IDENTIFIED\AHW\EXTRACTION_1994_2015\DATA\DERIVED_DATA\AHW_CHF\OUTPUT\CHF_cohortsummary.rtf" bodytitle;

title1 "PROC CONTENTS: CHF_94_15_COHORT";
proc contents data=chf.chf_94_15_cohort varnum;
	run;
title1;

title1 "Frequency of Subjects who are Adults (>=18 yrs) at time of First CHF (by Defn)";
proc freq data=chf4;
	table ADULT;
	run;
title1;

title1 "Frequency of Prevalent and Incident CHF (by Defn)";
proc freq data=test;
	table Case/missing;
	run;
title1;

title1 "Frequency of Prevalent and Incident CHF (by Defn) by Fiscal Year";
proc freq data=test;
	table Case*fy/list missing;
	run;
title1;

title1 "Frequency: First CHF (by Defn) by Fiscal Year";
proc freq data=test;
	table fy/missing out=testa;
	run;
title1;

title1 "Proportion of Records by FY Contributed by Patients with First CHF in 1994/1995";
proc freq data=test3;
	table fy*flag;
	run;
title1;

title1 "Frequency: Prevalent CHF (by Defn) by Fiscal Year";
proc freq data=test(where=(case='PREVALENT'));
	table fy*case/missing;
	run;
title1;

title1 "Frequency: Incident CHF (by Defn) by Fiscal Year";
proc freq data=test(where=(case='INCIDENT'));
	table fy*case/missing;
	run;
title1;

title1 "Frequency: First CHF (by Defn)- Data Source (CLAIM, HOSP, ACCS)";
proc freq data=test;
	table data_source/missing;
	run;
title1;

title1 "Frequency: First CHF (by Defn)- Data Source (CLAIM, HOSP, ACCS) by Fiscal Year";
proc freq data=test;
	table fy*data_source/missing;
	run;
title1;

title1 "Frequency: First CHF (by Defn)- ICD Code";
proc freq data=test;
	table code/missing;
	run;
title1;

title1 "Frequency: First CHF (by Defn)- ICD Code by Data Source";
proc freq data=test;
	table code*data_source/missing;
	run;
title1;

title1 "Frequency: First CHF (by Defn)- ICD Code by Fiscal Year";
proc freq data=test;
	table code*fy/missing;
	run;
title1;

* Create Graph- CHF (by Defn) by Fiscal Year --;

title1 "Graph: First CHF (by Defn) by Fiscal Year";
proc gplot data=testa;
	plot count*fy/ vaxis=0 TO 20000 BY 1000;
	symbol i=join v=circle c=black;
	label count="First CHF (by Defn)";
	label fy="Fiscal Year of First CHF (by Defn)";
run;

quit;

ods rtf close;