FILENAME goptions URL "http://192.38.117.59/~linearpredictors/datafiles/goptions.sas"; %include goptions; FILENAME url URL "http://192.38.117.59/~linearpredictors/datafiles/readMelanoma.sas"; %include url; DATA in_file_1; SET melanoma; subj=_N_; t1=0; t2=years; z1=sex; z2=ulc; z3=thick-3; z4=(age-50)/10; dead=(dc NE 2); KEEP subj t1 t2 z1 z2 z3 z4 dead; RUN; /* The subsequent SAS code is adapted from that described by D.E.Schaubel and G. Wei (2007) Biometrical Journal, vol. 49, pp. 719-730 */ /* ------ Setting up global variables ------ */ %let in_file_1 = in_file_1; /* raw imput file */ %let Z = z1 z2 z3 z4; /* covariate vector */ %let S_ = S1_ S2_ S3_ S4_; /* for Schoenfeld residuals, S in (13); */ %let R_ = R1_ R2_ R_3 R_4; /* for Schoenfeld residuals, R in (16) */ /* R_ and S_ have same number of elements as Z */ /* raw input file contains subj, t1, t2, Z, dead, where subj = subject number (i, in manuscript) t1 = start of subinterval; t2 = end of subinterval; Z = covariate; dead = event indicator (delta) */ /* ID number 'subj' has to be numbered 1, .., n */ /* if covariate is time-dependent, assume that input file contains a separate record each time a subject's covariate vector changes */ /* counting number of subjects, n */ proc sort data=&in_file_1; by subj; run; data file1n; set &in_file_1; by subj; if first.subj; run; proc means data=file1n; var subj; output out=n_count n=n; run; /* creating global variable, &n */ data n_count; set n_count; call symput('n',n); run; /* Step 1: computing Schoenfeld residuals in equation (13) */ proc phreg data=&in_file_1; model t2*dead(0) = &Z / entry=t1 maxiter=0; output out=OLS1 (keep=dead &S_) ressch=&S_; run; /* Step 2: OLS1 data set contains matrix S, from eq (14), and vector of death indicators */ proc reg data=OLS1 outsscp=outsscp1; model dead = &S_ / noint; quit; /* matrix B equals SSCP matrix */ data B; set outSSCP1; if (_type_="SSCP" and _NAME_ not in ("Intercept", "dead")); keep &S_; run; /* Step 3: creating expanded data set */ proc sort data=&in_file_1; by t2; run; data expand1; set &in_file_1 (keep=t2); if (_n_=1) then t1=0; retain t1; output; t1=t2; keep t1 t2; run; data expand2; set expand1; do i=1 to &n; subj=i; output; end; keep subj t1 t2; run; proc sort data=&in_file_1; by subj; run; proc means data=&in_file_1 noprint; var t2; by subj; output out=outsubj1 (keep=subj Xi) max(t2)=Xi; run; proc sort data=expand2; by subj t1; run; data expand3; merge expand2 (in=in_exp) outsubj1; by subj; if (in_exp=0) then delete; if (t2 > Xi) then delete; dead1=1; keep subj t1 t2 dead1; run; proc sort data=&in_file_1; by subj t1; run; data expand4; merge expand3 (in=in_exp) &in_file_1 (in=in_1 keep=subj t1); by subj t1; if (in_exp=0) then delete; if (in_1=1) then t_Z=t1; if first.subj then t_Z_=t_Z; retain t_Z_; if (t_Z ne . ) then t_Z_=t_Z; else t_Z=t_Z_; drop t_Z; run; data expand5; merge expand4 (in=in_exp) &in_file_1 (keep=subj t1 &Z rename=(t1=t_Z_)); by subj t_Z_; if (in_exp=0) then delete; if (t1=t2) then delete; run; proc sort data=expand5; by subj t2; run; proc sort data=&in_file_1; by subj t2; run; data expand6; merge expand5 (in=in_exp) &in_file_1 (keep=subj t2 dead); by subj t2; if (dead= . ) then dead=0; dt=t2-t1; run; /* Step 4: computing Schoenfeld-type residuals, for matrix R in eq (16) */ proc phreg data=expand6; model t2*dead1(0) = &Z / maxiter=0 entry=t1; output out=outA (keep=&R_ dead t1 t2) ressch=&R_; id dead; run; /* Setting up WLS data set for Step 5 */ /* Computing elements of D matrix in eq (17) */ data wls1; set outA; dt=t2-t1; Y_ =dead/dt; run; /* Step 5: Computing estimate of theta; computing A via eq (18) */ proc reg data=wls1 outsscp=outsscpw1 covout outest=out1; model Y_ = &R_ / noint; weight dt; quit; /* extracting matrix A */ data A; set outSSCPW1; if (_type_="SSCP" and _NAME_ not in ("Intercept", "Y_")); keep &R_; run; /* extracting theta */ data out1theta; set out1; if (_type_="PARMS"); keep &R_; run; /* Step 6: final matrix operations to estimate standard errors */ proc iml; use out1theta; read all var _all_ into theta_hat; theta_hat=t(theta_hat); use A; read all var _all_ into A; use B; read all var _all_ into B; v_hat=inv(A)*B*inv(A); sd_hat=sqrt(vecdiag(v_hat)); print theta_hat sd_hat wald p; quit; /* estimation of baseline hazard not yet implemented */