FILENAME goptions URL "http://192.38.117.59/~linearpredictors/datafiles/goptions.sas";
%include goptions;

    
FILENAME url URL "http://192.38.117.59/~linearpredictors/datafiles/readMelanoma.sas";
%include url;

DATA in_file_1; SET melanoma;
   subj=_N_; t1=0; t2=years;
   z1=sex; z2=ulc; z3=thick-3; z4=(age-50)/10;
   dead=(dc NE 2);
   KEEP subj t1 t2 z1 z2 z3 z4 dead;
RUN;

/* The subsequent SAS code is adapted from that described by D.E.Schaubel and
   G. Wei (2007) Biometrical Journal, vol. 49, pp. 719-730 */ 

/* ------ Setting up global variables ------ */

%let in_file_1 = in_file_1; /* raw imput file */
%let Z = z1 z2 z3 z4; /* covariate vector */
%let S_ = S1_ S2_ S3_ S4_; /* for Schoenfeld residuals, S in (13); */
%let R_ = R1_ R2_ R_3 R_4; /* for Schoenfeld residuals, R in (16) */

/* R_ and S_ have same number of elements as Z */
/* raw input file contains subj, t1, t2, Z, dead, where subj =
subject number (i, in manuscript) t1 = start of subinterval; t2 =
end of subinterval; Z = covariate; dead = event indicator (delta) */
/* ID number 'subj' has to be numbered 1, .., n */
/* if covariate is time-dependent, assume that input file contains a
separate record each time a subject's covariate vector changes */
/* counting number of subjects, n */

proc sort data=&in_file_1; by subj; run;

data file1n; set &in_file_1; by subj;
    if first.subj; run;

proc means data=file1n;
var subj;
output out=n_count n=n;
run;

/* creating global variable, &n */
data n_count;
set n_count;
call symput('n',n);
run;

/* Step 1: computing Schoenfeld residuals in equation (13) */
proc phreg data=&in_file_1;
model t2*dead(0) = &Z / entry=t1 maxiter=0;
output out=OLS1 (keep=dead &S_) ressch=&S_;
run;

/* Step 2: OLS1 data set contains matrix S, from eq (14), and vector
of death indicators */

proc reg data=OLS1 outsscp=outsscp1;
model dead = &S_ / noint;
quit;

/* matrix B equals SSCP matrix */
data B; set outSSCP1;
if (_type_="SSCP" and _NAME_ not in ("Intercept", "dead"));
keep &S_;
run;

/* Step 3: creating expanded data set */
proc sort data=&in_file_1; by t2; run;

data expand1; set &in_file_1 (keep=t2);
if (_n_=1) then t1=0;
retain t1;
output;
t1=t2;
keep t1 t2;
run;

data expand2; set expand1;
do i=1 to &n;
subj=i;
output;
end;
keep subj t1 t2;
run;

proc sort data=&in_file_1; by subj; run;
proc means data=&in_file_1 noprint;
var t2;
by subj;
output out=outsubj1 (keep=subj Xi) max(t2)=Xi;
run;

proc sort data=expand2; by subj t1; run;
data expand3;
merge expand2 (in=in_exp) outsubj1; by subj;
if (in_exp=0) then delete;
if (t2 > Xi) then delete;
dead1=1;
keep subj t1 t2 dead1;
run;

proc sort data=&in_file_1; by subj t1; run;
data expand4;
merge expand3 (in=in_exp) &in_file_1 (in=in_1 keep=subj t1); by subj t1;
if (in_exp=0) then delete;
if (in_1=1) then t_Z=t1;
if first.subj then t_Z_=t_Z;
retain t_Z_;
if (t_Z ne . ) then t_Z_=t_Z; else t_Z=t_Z_;
drop t_Z;
run;

data expand5;
merge expand4 (in=in_exp) &in_file_1 (keep=subj t1 &Z rename=(t1=t_Z_));
by subj t_Z_;
if (in_exp=0) then delete;
if (t1=t2) then delete;
run;

proc sort data=expand5; by subj t2; run;

proc sort data=&in_file_1; by subj t2; run;

data expand6; merge expand5 (in=in_exp) &in_file_1 (keep=subj t2 dead);
by subj t2;
if (dead= . ) then dead=0;
dt=t2-t1;
run;

/* Step 4: computing Schoenfeld-type residuals, for matrix R in
eq (16) */

proc phreg data=expand6;
model t2*dead1(0) = &Z / maxiter=0 entry=t1;
output out=outA (keep=&R_ dead t1 t2) ressch=&R_;
id dead;
run;

/* Setting up WLS data set for Step 5 */
/* Computing elements of D matrix in eq (17) */

data wls1; set outA;
dt=t2-t1;
Y_ =dead/dt;
run;

/* Step 5: Computing estimate of theta; computing A via eq (18) */

proc reg data=wls1 outsscp=outsscpw1 covout outest=out1;
model Y_ = &R_ / noint;
weight dt;
quit;

/* extracting matrix A */

data A; set outSSCPW1;
if (_type_="SSCP" and _NAME_ not in ("Intercept", "Y_"));
keep &R_;
run;

/* extracting theta */

data out1theta; set out1;
if (_type_="PARMS");
keep &R_;
run;

/* Step 6: final matrix operations to estimate standard errors */

proc iml;
use out1theta;
read all var _all_ into theta_hat;
theta_hat=t(theta_hat);
use A;
read all var _all_ into A;
use B;
read all var _all_ into B;
v_hat=inv(A)*B*inv(A);
sd_hat=sqrt(vecdiag(v_hat));
print theta_hat sd_hat wald p;
quit;

/* estimation of baseline hazard not yet implemented */