/************************************************************************************************************************************************************/ /************************************************************************************************************************************************************/ /* Valeri & VanderWeele regression-based approach to causal mediation analysis */ /* for continuous mediator and outcome and a multicategorical exposure (treatment) with four levels */ /************************************************************************************************************************************************************/ /* DATA MANIPULATION */ /* */ /* For the exposure variable A with 4 levels a0, a1, a2, a3, where a0 is the reference level, the user needs to create three dummy variables for */ /* the levels a1, a2, a3, respectively. */ /* Categorical adjustment variables need to be coded as a series of dummy variables before being entered as covariates in the mediator and outcome models. */ /* */ /* MACRO VARIABLES */ /* */ /* The main macro is %rb_mediation; to use it, the user needs to specify the values for the following macro variables: */ /* */ /* input_data: input data that include the dummy variables for the exposure levels (a1, a2, a3), the outcome and mediator variables, as well as the */ /* covariates to be adjusted for in the models; input data also has to include the CLUSTER VARIABLE if CLUSTER BOOTSTRAP is planned */ /* to be performed; */ /* M: the name of the continuous mediator variable; */ /* Y: the name of the continuous outcome variable; */ /* D1, D2, D3: names of the dummy variables for the exposure levels a1, a2, a3; */ /* interaction: the user needs to specify INTERACTION=0 or INTERACTION=1 for the outcome model without or with interaction between the exposure and */ /* the mediator, respectively; if INTERACTION=1, additional variables D1*M, D2*M, D3*M will be AUTOMATICALLY created by the macro; */ /* adjusted: the user needs to specify ADJUSTED=0 or ADJUSTED=1 to obtain unadjusted or adjusted NIE, NDE and TE, respectively; */ /* cvar_M: adjustment variables (covariates) for the mediator model; categorical variables need to be coded in input data as a series of dummy */ /* variables before being entered as covariates; */ /* cvar_Y: adjustment variables (covariates) for the outcome model; categorical variables need to be coded in input data as a series of dummy */ /* variables before being entered as covariates; */ /* boot: confidence intervals for NIE, NDE and TE are calculated BY DEFAULT via DELTA METHOD; to calculate confidence intervals also by */ /* bootstrapping, the user needs to specify BOOT=1; */ /* n: the user needs to specify the number of bootstrap samples if BOOT=1 was specified; */ /* SAMPLINGUNIT: the user needs to specify the name of the cluster variable (for cluster sampling) if BOOT=1 was specified for CLUSTER BOOTSTRAP. */ /* */ /* The user needs to specify the macro variable values ONLY for the main macro %rb_mediation. */ /* */ /* Sub-macro %point_estimates is called by the main macro %rb_mediation to provide regression outputs for : */ /* 1) crude outcome and mediator models by default; */ /* 2) adjusted outcome and mediator models by setting the macro variable ADJUSTED to 1 in the main macro %rb_mediation; */ /* 3) outcome model with or without interaction between the exposure and the mediator by setting correspondingly the macro variable INTERACTION to 1 or */ /* 0 in the main macro %rb_mediation. */ /* Sub-macro %CI is called by the main macro %rb_mediation to calculate bootstrap confidence intervals for NIE, NDE and TE if BOOT=1 was specified. */ /* */ /* SAS OUTPUT EXPLANATION */ /* After %rb_mediation macro execution, the following information is displayed in the SAS results viewer window: */ /* exposure (treatment) levels a1, a2, a3; */ /* names of the mediator and outcome; */ /* method used to calculate the confidence intervals for NIE, NDE and TE; */ /* type of analysis (crude or adjusted, with or without interaction); */ /* mediation effect point estimates (e.g., contrast_1 displays the mediation effects if the treatment changed from a0 to a1); */ /* mediation effect interval estimates (e.g., delta_1_low and delta_1_upp are the upper and lower bounds of the 95% confidence interval for */ /* the contrast_1 calculated by delta method; boot_1_low and boot_1_upp are the upper and lower bounds of the 95% confidence interval for */ /* the contrast_1 calculated by percentile bootstrap). */ /* */ /************************************************************************************************************************************************************/ %macro point_estimates(data); %if &adjusted = 1 %then %do; proc means data = &data mean noprint; var &cvar_M; output out=cvar_means_M (where=(_STAT_='MEAN')); run; proc reg data = &data outest=out_M (keep = Intercept &D1 &D2 &D3 &cvar_M) noprint; model &M = &D1 &D2 &D3 &cvar_M; run; quit; %if &interaction = 1 %then %do; proc reg data = &data outest=out_Y (keep = Intercept &D1 &D2 &D3 &M int1 int2 int3 &cvar_Y) noprint; model &Y = &D1 &D2 &D3 &M int1 int2 int3 &cvar_Y; run; quit; %end; %if &interaction = 0 %then %do; proc reg data = &data outest=out_Y (keep = Intercept &D1 &D2 &D3 &M &cvar_Y) noprint; model &Y = &D1 &D2 &D3 &M &cvar_Y ; run; quit; %end; %end; %if &adjusted = 0 %then %do; proc reg data = &data outest=out_M (keep = Intercept &D1 &D2 &D3) noprint; model &M = &D1 &D2 &D3; run; quit; %if &interaction = 1 %then %do; proc reg data = &data outest=out_Y (keep = Intercept &D1 &D2 &D3 &M int1 int2 int3) noprint; model &Y = &D1 &D2 &D3 &M int1 int2 int3; run; quit; %end; %if &interaction = 0 %then %do; proc reg data = &data outest=out_Y (keep = Intercept &D1 &D2 &D3 &M) noprint; model &Y = &D1 &D2 &D3 &M; run; quit; %end; %end; proc iml; use out_M; read all into coeffs_M; beta0 = coeffs_M[1]; beta11 = coeffs_M[2]; beta12 = coeffs_M[3]; beta13 = coeffs_M[4]; %if &adjusted=1 %then %do; use cvar_means_M; read all var {&cvar_M} into cov_means_M where(_STAT_='MEAN'); betaC = coeffs_M[5:ncol(coeffs_M)]; scal_product = cov_means_M*betaC; %end; %if &adjusted=0 %then %do; scal_product = 0; %end; use out_Y; read all into coeffs_Y; theta0 = coeffs_Y[1]; theta11 = coeffs_Y[2]; theta12 = coeffs_Y[3]; theta13 = coeffs_Y[4]; theta2 = coeffs_Y[5]; %if &interaction=1 %then %do; theta31 = coeffs_Y[6]; theta32 = coeffs_Y[7]; theta33 = coeffs_Y[8]; /* effects: NIE, NDE*/ NIE1 = (theta2+theta31)*beta11; NIE2 = (theta2+theta32)*beta12; NIE3 = (theta2+theta33)*beta13; NDE1 = theta11+theta31*(beta0+scal_product); NDE2 = theta12+theta32*(beta0+scal_product); NDE3 = theta13+theta33*(beta0+scal_product); %end; %if &interaction=0 %then %do; /* effects: NIE, NDE */ NIE1 = theta2*beta11; NIE2 = theta2*beta12; NIE3 = theta2*beta13; NDE1 = theta11; NDE2 = theta12; NDE3 = theta13; %end; /* effects: TE*/ TE1 = NIE1 + NDE1; TE2 = NIE2 + NDE2; TE3 = NIE3 + NDE3; NIE = NIE1||NIE2||NIE3; NDE = NDE1||NDE2||NDE3; TE = TE1 ||TE2 ||TE3; all=NIE||NDE||TE; create point_estimates from all [colname={"NIE1" "NIE2" "NIE3" "NDE1" "NDE2" "NDE3" "TE1" "TE2" "TE3"}]; append from all; close point_estimates; quit; /*end proc iml*/ %mend point_estimates; %macro CI(contrast); proc univariate data=boot noprint; var &contrast; output out=CI pctlpts=2.5 97.5 pctlpre=CI pctlname= _low _up ; run; data &contrast._CI; retain contrast CI_low CI_up; set CI; contrast="&contrast."; run; %mend; %macro rb_mediation(input_data,Y,M,D1,D2,D3,interaction,adjusted,cvar_M,cvar_Y,boot,n,SAMPLINGUNIT); %if &interaction = 1 %then %do; data mydata; set &input_data; int1 = &D1*&M; int2 = &D2*&M; int3 = &D3*&M; run; %end; %if &interaction = 0 %then %do; data mydata; set &input_data; run; %end; %point_estimates(data=mydata); /* DELTA */ %if &adjusted = 1 %then %do; %let pref = adjusted; %put &pref; %let cvar_count_M = %sysfunc(countw(&cvar_M)); %put &cvar_count_M; %let cvar_count_Y = %sysfunc(countw(&cvar_Y)); %put &cvar_count_Y; proc reg data = mydata covout outest=outM (keep = Intercept &D1 &D2 &D3 &cvar_M) noprint; model &M = &D1 &D2 &D3 &cvar_M; run; quit; %if &interaction = 1 %then %do; proc reg data = mydata covout outest=outY (keep = Intercept &D1 &D2 &D3 &M int1 int2 int3 &cvar_Y) noprint; model &Y = &D1 &D2 &D3 &M int1 int2 int3 &cvar_Y; run; quit; %end; %if &interaction = 0 %then %do; proc reg data = mydata covout outest=outY (keep = Intercept &D1 &D2 &D3 &M &cvar_Y) noprint; model &Y = &D1 &D2 &D3 &M &cvar_Y; run; quit; %end; %end; %if &adjusted = 0 %then %do; %let pref = crude; %put &pref; proc reg data = mydata covout outest=outM (keep = Intercept &D1 &D2 &D3) noprint; model &M = &D1 &D2 &D3; run; quit; %if &interaction = 1 %then %do; proc reg data = mydata covout outest=outY (keep = Intercept &D1 &D2 &D3 &M int1 int2 int3) noprint; model &Y = &D1 &D2 &D3 &M int1 int2 int3; run; quit; %end; %if &interaction = 0 %then %do; proc reg data = mydata covout outest=outY (keep = Intercept &D1 &D2 &D3 &M) noprint; model &Y = &D1 &D2 &D3 &M; run; quit; %end; %end; proc iml; /* start proc iml */ /*************************************************/ use outM; read point 1 into coeffs_M; read all into sigma_M; sigma_M=sigma_M[2:nrow(sigma_M),]; beta0 = coeffs_M[1]; beta11 = coeffs_M[2]; beta12 = coeffs_M[3]; beta13 = coeffs_M[4]; use outY; read point 1 into coeffs_Y; read all into sigma_Y; sigma_Y=sigma_Y[2:nrow(sigma_Y),]; theta0 = coeffs_Y[1]; theta11 = coeffs_Y[2]; theta12 = coeffs_Y[3]; theta13 = coeffs_Y[4]; theta2 = coeffs_Y[5]; %if &interaction=1 %then %do; theta31=coeffs_Y[6]; theta32=coeffs_Y[7]; theta33=coeffs_Y[8]; %end; %if &adjusted=1 %then %do; use cvar_means_M; read all var {&cvar_M} into cov_means_M where(_STAT_='MEAN'); betaC=coeffs_M[5:ncol(coeffs_M)]; scal_product = cov_means_M*betaC; %if &interaction=1 %then %do; theta31=coeffs_Y[6]; theta32=coeffs_Y[7]; theta33=coeffs_Y[8]; /* NIE1,NIE2,NIE3: gradient vectors*/ grad_NIE1= {0}||theta2+theta31||{0}||{0}||J(1,&cvar_count_M,0)||J(1,4,0)||beta11||beta11||{0}||{0}||J(1,&cvar_count_Y,0); grad_NIE2= {0}||{0}||theta2+theta32||{0}||J(1,&cvar_count_M,0)||J(1,4,0)||beta12||{0}||beta12||{0}||J(1,&cvar_count_Y,0); grad_NIE3= {0}||{0}||{0}||theta2+theta33||J(1,&cvar_count_M,0)||J(1,4,0)||beta13||{0}||{0}||beta13||J(1,&cvar_count_Y,0); /* NDE1,NDE2,NDE3: gradient vectors*/ grad_NDE1= theta31||J(1,3,0)||theta31*cov_means_M||{0}||{1}||{0}||{0}||{0}||beta0+scal_product||{0}||{0}||J(1,&cvar_count_Y,0); grad_NDE2= theta32||J(1,3,0)||theta32*cov_means_M||{0}||{0}||{1}||{0}||{0}||{0}||beta0+scal_product||{0}||J(1,&cvar_count_Y,0); grad_NDE3= theta33||J(1,3,0)||theta33*cov_means_M||{0}||{0}||{0}||{1}||{0}||{0}||{0}||beta0+scal_product||J(1,&cvar_count_Y,0); %end; %if &interaction=0 %then %do; /* NIE1,NIE2,NIE3: gradients */ grad_NIE1= {0}||theta2||{0}||{0}||J(1,&cvar_count_M,0)||J(1,4,0)||beta11||J(1,&cvar_count_Y,0); grad_NIE2= {0}||{0}||theta2||{0}||J(1,&cvar_count_M,0)||J(1,4,0)||beta12||J(1,&cvar_count_Y,0); grad_NIE3= {0}||{0}||{0}||theta2||J(1,&cvar_count_M,0)||J(1,4,0)||beta13||J(1,&cvar_count_Y,0); /* NDE1,NDE2,NDE3: gradients */ grad_NDE1= {0}||J(1,3,0)||J(1,&cvar_count_M,0)||{0}||{1}||{0}||{0}||{0}||J(1,&cvar_count_Y,0); grad_NDE2= {0}||J(1,3,0)||J(1,&cvar_count_M,0)||{0}||{0}||{1}||{0}||{0}||J(1,&cvar_count_Y,0); grad_NDE3= {0}||J(1,3,0)||J(1,&cvar_count_M,0)||{0}||{0}||{0}||{1}||{0}||J(1,&cvar_count_Y,0); %end; %end; %if &adjusted=0 %then %do; %if &interaction=1 %then %do; /* NIE1,NIE2,NIE3: gradient vectors*/ grad_NIE1= {0}||theta2+theta31||{0}||{0}||J(1,4,0)||beta11||beta11||{0}||{0}; grad_NIE2= {0}||{0}||theta2+theta32||{0}||J(1,4,0)||beta12||{0}||beta12||{0}; grad_NIE3= {0}||{0}||{0}||theta2+theta33||J(1,4,0)||beta13||{0}||{0}||beta13; /* NDE1,NDE2,NDE3: gradient vectors*/ grad_NDE1= theta31 ||J(1,3,0) ||{0} ||{1} ||{0} ||{0} ||{0} ||beta0 ||{0} ||{0}; grad_NDE2= theta32 ||J(1,3,0) ||{0} ||{0} ||{1} ||{0} ||{0} ||{0} ||beta0 ||{0}; grad_NDE3= theta33 ||J(1,3,0) ||{0} ||{0} ||{0} ||{1} ||{0} ||{0} ||{0} ||beta0; %end; %if &interaction=0 %then %do; /* NIE1,NIE2,NIE3: gradients */ grad_NIE1= {0}||theta2||{0}||{0}||J(1,4,0)||beta11; grad_NIE2= {0}||{0}||theta2||{0}||J(1,4,0)||beta12; grad_NIE3= {0}||{0}||{0}||theta2||J(1,4,0)||beta13; /* NDE1,NDE2,NDE3: gradients */ grad_NDE1= J(1,4,0)||{0}||{1}||{0}||{0}||{0}; grad_NDE2= J(1,4,0)||{0}||{0}||{1}||{0}||{0}; grad_NDE3= J(1,4,0)||{0}||{0}||{0}||{1}||{0}; %end; %end; /* TE1,TE2,TE3: gradients */ grad_TE1 = grad_NIE1 + grad_NDE1; grad_TE2 = grad_NIE2 + grad_NDE2; grad_TE3 = grad_NIE3 + grad_NDE3; sigma = block(sigma_M,sigma_Y); se_NIE1 = sqrt((grad_NIE1*sigma)*(grad_NIE1`)); se_NIE2 = sqrt((grad_NIE2*sigma)*(grad_NIE2`)); se_NIE3 = sqrt((grad_NIE3*sigma)*(grad_NIE3`)); se_NDE1 = sqrt((grad_NDE1*sigma)*(grad_NDE1`)); se_NDE2 = sqrt((grad_NDE2*sigma)*(grad_NDE2`)); se_NDE3 = sqrt((grad_NDE3*sigma)*(grad_NDE3`)); se_TE1 = sqrt((grad_TE1*sigma)*(grad_TE1`)); se_TE2 = sqrt((grad_TE2*sigma)*(grad_TE2`)); se_TE3 = sqrt((grad_TE3*sigma)*(grad_TE3`)); use point_estimates; read all into effects; NIE1 = effects[1,1]; NIE2 = effects[1,2]; NIE3 = effects[1,3]; NDE1 = effects[1,4]; NDE2 = effects[1,5]; NDE3 = effects[1,6]; TE1 = effects[1,7]; TE2 = effects[1,8]; TE3 = effects[1,9]; NIE1_low=NIE1-quantile("Normal",.975)*se_NIE1; NIE1_upp=NIE1+quantile("Normal",.975)*se_NIE1; NIE2_low=NIE2-quantile("Normal",.975)*se_NIE2; NIE2_upp=NIE2+quantile("Normal",.975)*se_NIE2; NIE3_low=NIE3-quantile("Normal",.975)*se_NIE3; NIE3_upp=NIE3+quantile("Normal",.975)*se_NIE3; NDE1_low=NDE1-quantile("Normal",.975)*se_NDE1; NDE1_upp=NDE1+quantile("Normal",.975)*se_NDE1; NDE2_low=NDE2-quantile("Normal",.975)*se_NDE2; NDE2_upp=NDE2+quantile("Normal",.975)*se_NDE2; NDE3_low=NDE3-quantile("Normal",.975)*se_NDE3; NDE3_upp=NDE3+quantile("Normal",.975)*se_NDE3; TE1_low=TE1-quantile("Normal",.975)*se_TE1; TE1_upp=TE1+quantile("Normal",.975)*se_TE1; TE2_low=TE2-quantile("Normal",.975)*se_TE2; TE2_upp=TE2+quantile("Normal",.975)*se_TE2; TE3_low=TE3-quantile("Normal",.975)*se_TE3; TE3_upp=TE3+quantile("Normal",.975)*se_TE3; NIE= NIE1 ||NIE1_low ||NIE1_upp ||NIE2 ||NIE2_low ||NIE2_upp ||NIE3 ||NIE3_low ||NIE3_upp; NDE= NDE1 ||NDE1_low ||NDE1_upp ||NDE2 ||NDE2_low ||NDE2_upp ||NDE3 ||NDE3_low ||NDE3_upp; TE= TE1 ||TE1_low ||TE1_upp ||TE2 ||TE2_low ||TE2_upp ||TE3 ||TE3_low ||TE3_upp; all=NDE//NIE//TE; create results from all [colname={ "contrast_1" "delta_1_low" "delta_1_upp" "contrast_2" "delta_2_low" "delta_2_upp" "contrast_3" "delta_3_low" "delta_3_upp"}]; append from all;close results; quit; /*end proc iml*/ data &pref._results_delta; retain effect; set results; if _n_=1 then effect="NDE"; if _n_=2 then effect="NIE"; if _n_=3 then effect="TE"; array _nums {*} _numeric_; do i = 1 to dim(_nums); _nums{i} = round(_nums{i},.0001); end; drop i; run; title color=blue "Treatment levels: &D1, &D2, &D3 "; title2 color=blue "Mediator: &M "; title3 color=blue "Outcome: &Y "; title4 " "; title5 color=red "CI: DELTA METHOD"; title6 "&pref analysis with 95% CI, interactions = &interaction"; proc print data= &pref._results_delta; run; title; /********************************************************************************************************/ /* BOOT OPTION */ /********************************************************************************************************/ %if &boot=1 %then %do; options nonotes nosource nosource2 errors=0; proc datasets library = WORK noprint; delete boot; run; quit; proc surveyselect data = mydata out=bootdata seed=1983 method=urs samprate=1 outhits rep=&n noprint; SAMPLINGUNIT &SAMPLINGUNIT; run; data bootdata; retain sample; set bootdata; sample=Replicate; drop Replicate &SAMPLINGUNIT; run; %do i=1 %to &n; %put iter=&i; data data_for_boot; set bootdata (where=(sample=&i)); run; %point_estimates(data=data_for_boot); proc append base= boot data= point_estimates force; run; %end; options nolabel; %CI(contrast=NDE1); %CI(contrast=NDE2); %CI(contrast=NDE3); %CI(contrast=NIE1); %CI(contrast=NIE2); %CI(contrast=NIE3); %CI(contrast=TE1); %CI(contrast=TE2); %CI(contrast=TE3); data CI; set NDE1_CI NDE2_CI NDE3_CI NIE1_CI NIE2_CI NIE3_CI TE1_CI TE2_CI TE3_CI; run; proc iml; use CI; read point 1 into NDE1_boot; read point 2 into NDE2_boot; read point 3 into NDE3_boot; read point 4 into NIE1_boot; read point 5 into NIE2_boot; read point 6 into NIE3_boot; read point 7 into TE1_boot; read point 8 into TE2_boot; read point 9 into TE3_boot; NDE_boot = NDE1_boot||NDE2_boot||NDE3_boot; NIE_boot = NIE1_boot||NIE2_boot||NIE3_boot; TE_boot = TE1_boot|| TE2_boot|| TE3_boot; boot_results = NDE_boot//NIE_boot//TE_boot; create boot_results from boot_results [colname={"boot_1_low" "boot_1_upp" "boot_2_low" "boot_2_upp" "boot_3_low" "boot_3_upp"}]; append from boot_results; close boot_results; quit; data &pref._results_boot; retain effect contrast_1 boot_1_low boot_1_upp contrast_2 boot_2_low boot_2_upp contrast_3 boot_3_low boot_3_upp; merge &pref._results_delta boot_results; array _nums {*} _numeric_; do i = 1 to dim(_nums); _nums{i} = round(_nums{i},.0001); end; drop i delta_1_low delta_1_upp delta_2_low delta_2_upp delta_3_low delta_3_upp; run; options notes source source2 errors=20; title color=red "CI: BOOTSTRAP, nboot=&n"; title2 "&pref analysis with 95% CI, interactions = &interaction"; proc print data= &pref._results_boot; run; title; %end; %mend rb_mediation;