/*SIMILAR TO SECTION 3.7 OF THE EXERCISES*/ *set directory* cd "G:\APC COURSE" clear all **Import the lung data with sex data*** infile sex A P C Y D A5 P5 C5 in 2/l using "lung-apc.txt" ***List data ot check the format*** list * in 1/10 **Tabulate sex** ta sex **Label the sex variable*** label define sex 1 "Males" 2 "Females" label values sex sex ***Check label worked*** ta sex ***Using apcfit for just males*** apcfit if sex==1, age(A) period(P) poprisktime(Y) cases(D) nper(100000) /// agef(agemales) perf(permales) cohf(cohmales) /// dfa(9) dfp(9) dfc(15) refc(1930) ***See what apcfit returns*** return list ***Save the reference Cohort** scalar define refcm=`r(refcoh)' ***apcfit generates the spline terms so we can refit the glm*** /* refit glm?*/ glm D _spA* _spP* _spC*, f(p) lnoffset(Y) nocons ***Store estimates*** estimates store lungmale estimates save lungmale, replace ***Using apcfit for just females*** apcfit if sex==2, age(A) period(P) poprisktime(Y) cases(D) nper(100000) /// agef(agefemales) perf(perfemales) cohf(cohfemales) /// dfa(9) dfp(9) dfc(15) refc(1930) estimates store lungfemale estimates save lungfemale, replace ***Save the reference Cohort** scalar define refcf=`r(refcoh)' ****DRAW THE GRAPHS FOR THE RESULTS FOR MALES**** twoway (rarea permales_uci permales_lci P, sort pstyle(ci) color(eltgreen) fintensity(inten50)) /// (line permales P, sort lc(emerald) clpattern(solid)) /// (rarea cohmales_uci cohmales_lci C, sort pstyle(ci) color(eltblue) fintensity(inten50)) /// (line cohmales C, sort lc(edkblue) clpattern(solid)) /// (scatteri 1 1930, msymbol(Oh) mcolor(edkblue)) /// , name(PCmales,replace) legend(off) /// xtitle("Calendar Time") ytitle("Rate Ratio") yscale(alt) yscale(log) ylabel(,angle(h)) scheme(sj) twoway (rarea agemales_uci agemales_lci A, sort pstyle(ci) color(orange) fintensity(inten50) ) /// (line agemales A, sort lcolor(cranberry) clpattern(solid)) /// , yscale(log) name(Amales,replace) legend(off) /// ylabel( 1 2 5 10 50 200, angle(h)) /// xtitle("Age") ytitle("Rate per 100,000 person-years") scheme(sj) ***Combine the two Graphs For Males** graph combine Amales PCmales, nocopies imargin(3 0 0 3) scheme(sj) title("APC MALES") name(APCm, replace) local refcf refcf ***Draw the female graphs*** twoway (rarea perfemales_uci perfemales_lci P, sort pstyle(ci) color(eltgreen) fintensity(inten50)) /// (line perfemales P, sort lc(emerald) clpattern(solid)) /// (rarea cohfemales_uci cohfemales_lci C, sort pstyle(ci) color(eltblue) fintensity(inten50)) /// (line cohfemales C, sort lc(edkblue) clpattern(solid)) /// (scatteri 1 1930, msymbol(Oh) mcolor(edkblue)) /// , name(PCfemales,replace) legend(off) /// xtitle("Calendar Time") ytitle("Rate Ratio") yscale(alt) yscale(log) ylabel(,angle(h)) scheme(sj) twoway (rarea agefemales_uci agefemales_lci A, sort pstyle(ci) color(orange) fintensity(inten50) ) /// (line agefemales A, sort lcolor(cranberry) clpattern(solid)) /// , yscale(log) name(Afemales,replace) legend(off) /// ylabel( 1 2 5 10 50 200, angle(h)) /// xtitle("Age") ytitle("Rate per 100,000 person-years") scheme(sj) ***Combine the two Graphs** graph combine Afemales PCfemales, nocopies imargin(3 0 0 3) scheme(sj) title("APC FEMALES") name(APCf, replace) ***Plot Age effect for Males and Females on the same Graph** twoway (rarea agemales_uci agemales_lci A, sort pstyle(ci) color(orange) fintensity(inten50) ) /// (line agemales A, sort lcolor(cranberry) clpattern(solid)) /// (rarea agefemales_uci agefemales_lci A, sort pstyle(ci) color(eltblue) fintensity(inten50) ) /// (line agefemales A, sort lc(edkblue) clpattern(solid)) /// , yscale(log) legend(order(1 "Males CI" 2 "Males" 3 "Females CI" 4 "Females")) /// ylabel( 1 2 5 10 50 200, angle(h)) /// xtitle("Age") ytitle("Rate per 100,000 person-years") scheme(sj) /// title("Age Effect for both Genders") name(AgeMF, replace) ***To Create the RR from the two separate models need the results to*** ***be divided by eachother and to create confidence intervals*** ***Trickier because Stata saves things as variables the females *** ***and males results are in different observations*** preserve keep sex age* per* coh* A C P egen id=seq(), from(1) to(5400) quietly reshape wide age* per* coh*, i(id) j(sex) foreach j in age per coh { quietly gen `j'lnRR=log(`j'males1)-log(`j'females2) quietly gen `j'RR=exp(log(`j'males1)-log(`j'females2)) quietly gen `j'diff_se=sqrt(((ln(`j'females_uci2)-ln(`j'females_lci2))/(3.92))^2 /// +((ln(`j'males_uci1)-ln(`j'males_lci1))/(3.92))^2) quietly gen `j'RR_uci=exp(`j'lnRR+1.96*`j'diff_se) quietly gen `j'RR_lci=exp(`j'lnRR-1.96*`j'diff_se) } ***PLOT THE RESULTS*** line ageRR ageRR_lci ageRR_uci A, sort yscale(log) name(AgeRR, replace) /// lc(black black black) lp(solid dash dash) legend(off) title("M/F RR for Age") line cohRR cohRR_lci cohRR_uci C, sort yscale(log) name(CohRR, replace) /// lc(black black black) lp(solid dash dash) legend(off) title("M/F RR for Cohort") line perRR perRR_lci perRR_uci P, sort yscale(log) name(PerRR, replace) /// lc(black black black) lp(solid dash dash) legend(off) title("M/F RR for Period") restore ***ANOTHER SOLUTION TO THE PROBLEM...*** **ANY OTHER IDEAS?!*** ***Because Stata saves things as variables the females and males results*** ***are in different observations*** ***This is a quick and ugly fix to ensure that they are not*** ***Must be a better way of doing it*** ***Probably using reshape somehow would be better*** foreach j in age per coh { quietly gen `j'femalesshift=`j'females[_n+5400] quietly gen `j'femalesshift_uci=`j'females_uci[_n+5400] quietly gen `j'femalesshift_lci=`j'females_lci[_n+5400] quietly gen `j'lnRR=log(`j'males)-log(`j'femalesshift) quietly gen `j'RR=exp(log(`j'males)-log(`j'femalesshift)) quietly gen `j'diff_se=sqrt(((ln(`j'femalesshift_uci)-ln(`j'femalesshift_lci))/(3.92))^2 /// +((ln(`j'males_uci)-ln(`j'males_lci))/(3.92))^2) quietly gen `j'RR_uci=exp(`j'lnRR+1.96*`j'diff_se) quietly gen `j'RR_lci=exp(`j'lnRR-1.96*`j'diff_se) drop `j'diff_se drop `j'lnRR drop *shift* } line ageRR ageRR_lci ageRR_uci A, sort yscale(log) name(AgeRR, replace) /// lc(black black black) lp(solid dash dash) legend(off) title("M/F RR for Age") line cohRR cohRR_lci cohRR_uci C, sort yscale(log) name(CohRR, replace) /// lc(black black black) lp(solid dash dash) legend(off) title("M/F RR for Cohort") line perRR perRR_lci perRR_uci P, sort yscale(log) name(PerRR, replace) /// lc(black black black) lp(solid dash dash) legend(off) title("M/F RR for Period") ********************** *****INTERACTIONS***** ********************** ***Fit apcfit to the whole dataset*** apcfit, age(A) period(P) poprisktime(Y) cases(D) nper(100000) /// dfa(9) dfp(9) dfc(15) refc(1930) estimates store lungboth ***Start fitting interactions between sex and the spline terms*** ***THE SPLINE TERMS ARE SAVED BY APCFIT SO WE CAN FIT MORE MODELS*** ***Firstly, fit full interaction*** glm D i.sex#c._spA* i.sex#c._spP* i.sex#c._spC*, f(p) lnoffset(Y) nocons baselevels estimates store fullint ***Fit full interaction excluding interaction with second order cohort effects*** glm D i.sex#c._spA* i.sex#c._spP* i.sex#c._spC1_ldrft _spC2-_spC15, f(p) lnoffset(Y) nocons baselevels estimates store intper ***Fit full interaction excluding interaction with second order period effects*** glm D i.sex#c._spA* c._spP* i.sex#c._spC*, f(p) lnoffset(Y) nocons baselevels estimates store intcoh ***Check which models are best*** estimates stats lungboth fullint intper intcoh lrtest fullint intper lrtest fullint intcoh ***LOOKS LIKE SECOND ORDER PERIOD TERMS COULD BE ASSUMED TO BE THE*** ***SAME FOR BOTH SEXES*** ***Fitting different interactions between sex and the spline terms*** ***Model forcing age effects to have same shape, allows different trend*** glm D c._spA* i.sex#c._spP* i.sex#c._spC* i.sex i.sex#c.A, f(p) lnoffset(Y) nocons estimates store age ***Model forcing the age effects to have the same shape and trend*** glm D c._spA* i.sex#c._spP* i.sex#c._spC* i.sex, f(p) lnoffset(Y) nocons estimates store ageln ***Check which models are best*** lrtest fullint age ***STRONG EVIDENCE AGAINST AGE EFFECTS BEING THE SAME IN TERMS SHAPE*** lrtest age ageln ***STRONGER EVIDENCE AGAINST AGE EFFECTS BEING THE *** ***SAME IN TERMS SHAPE AND LINEAR TREND TOO*** ***LET's REFIT OUR CHOSEN MODEl**** ***NO INTERACTION WITH SECOND ORDER PERIOD TERMS*** ***recode sex for convenience*** recode sex (1=0)(2=1) **Relabel the sex variable*** label define sexnew 0 "Males" 1 "Females" label values sex sexnew **Check everything still OK!*** ta sex ta sex, nolabel ***Generate interactions by hand to show two ways of doing it*** quietly gen sexC1=sex*_spC1_ldrft forvalues i = 2/15 { quietly gen sexC`i'=sex*_spC`i' } quietly gen sexA1=sex*_spA1_intct forvalues i = 2/10 { quietly gen sexA`i'=sex*_spA`i' } ***RR1 and RR2 are the same models fitted one using handmade interactions*** ***and the other factor variable specification for the interactions*** glm D c._spA* c._spP* c._spC* i.sex#c._spA* i.sex#c._spC*, f(p) lnoffset(Y) nocons estimates store RR1 partpred rrtester, for(i.sex#c._spA*) eform glm D c._spA* c._spP* c._spC* sexA* sexC*, f(p) lnoffset(Y) nocons estimates store RR2 ***Using partpred is nicer than predictnl*** partpred agemal if sex==0, for(_spA*) partpred agefem if sex==1, for(_spA* sexA*) partpred rrage, for(sexA*) ci(rrage_lci rrage_uci) eform **Test both models give same results - depends if you prefer to clutter** **up the dataset with your own interaction terms or not *** su rrtester rrage **Make it M/F ratio instead of F/M ratio*** gen rrageMF=1/rrage gen rrageMF_uci=1/rrage_lci gen rrageMF_lci=1/rrage_uci ***Plot the results*** line rrageMF rrageMF_lci rrageMF_uci A if sex==1, sort yscale(log) /// lc(black black black) lp(solid dash dash) legend(off) title("M/F RR for Age") **Estimate RR using partpred for Cohort Effects*** partpred rrCoh, for(sexC*) ci(rrCoh_lci rrCoh_uci) eform **Make it M/F ratio instead of F/M ratio*** gen rrCohMF=1/rrCoh gen rrCohMF_lci=1/rrCoh_uci gen rrCohMF_uci=1/rrCoh_lci ***Plot the results*** line rrCohMF rrCohMF_lci rrCohMF_uci C if sex==1, sort yscale(log) /// lc(black black black) lp(solid dash dash) legend(off) title("M/F RR for Cohort") ***QUICK SHIFT to put the results in the right place*** gen rrAges=rrageMF[_n+5400] gen rrCohs=rrCohMF[_n+5400] ***Plot the two different results from the two methods*** ***One Method - Two separate models for two sexes*** ***Second Method - Fit one Model with interaction terms - no int*** ***for the second-order period terms**** line rrAges ageRR A, sort yscale(log) /// legend(order(1 "Interaction Model (Same Period Shape)" 2 "Separate Models")) /// title("M/F RR for Age From Two Methods") name(agecomp, replace) line rrCohs cohRR C, sort yscale(log) /// legend(order(1 "Interaction Model (Same Period Shape)" 2 "Separate Models")) /// title("M/F RR for Age From Two Methods") name(cohortcomp, replace) **************** ******EXTRAS**** **************** ****POSSIBILITY OF REDUCED INTERACTIONS TO DESCRIBE RELATIVE EFFECTS**** **Drop terms to allow different reduced splines to be fit** capture drop rr*RC* capture drop *new* capture drop ref **Set reduced Degrees of Freedom*** local df 5 **Create new spline terms for cohort with lesser DF*** rcsgen C, gen(newC) df(`df') orthog return list ***STORE THE KNOTS AND THE ORTHOG MATRIX*** local cknots `r(knots)' matrix RmatC=r(R) forvalues i = 1/`df' { quietly gen sexnewC`i'=sex*newC`i' } ***MAKE SPLINE TERMS FOR THE REFERENCE COHORT*** gen ref=1930 rcsgen ref, gen(sexnewCref) knots(`cknots') rmatrix(RmatC) ***MAKE SPLINE TERMS RELATIVE TO REFERENCE*** forvalues i = 1/`df' { quietly replace sexnewC`i'=sexnewC`i'-sexnewCref`i' if sex==1 } drop sexnewCref* ***FIT NEW MODEL WITH "REDUCED" SPLINES*** glm D c._spA* c._spP* c._spC* sexA* sexnewC*, f(p) lnoffset(Y) nocons estimates store RRreducedC **PREDICT THE NEW COHORT M/F RR EFFECT** partpred rrCohRC, for(sexnewC*) eform gen rrCohMFRC=1/rrCohRC gen rrCohsRC=rrCohMFRC[_n+5400] **PLOT AGAINST THE OLD ONE*** line rrCohs rrCohsRC C, sort yscale(log) name(cohortreduced,replace)