source("http://192.38.117.59/~linearpredictors/datafiles/readFever.R") ## weights at 0 is really missing values fever$weight <- ifelse(fever$weight == 0, NA, fever$weight) fever$ga <- cut(fever$gwbirth, c(0,31:40, Inf)) ## cutting weight at the 5% percentile for each ga-group pct5 <- tapply(fever$weight, fever$ga, quantile, probs = .05, na.rm=T) ## Small gestational age? fever$sga <- (fever$weight < pct5[fever$ga]) ## Each observation without small gestational age is selected with probability 0.15 U <- runif(length(fever$sga[fever$sga == 0])) casecon <- rbind(subset(fever, sga == 1), subset(fever, sga == 0)[(U < 0.15),]) ## NB new control sample (and a bit different number of sga cases) ## makes the table look different to that in the book xtabs(~ parity + smoke + sga, data = casecon) xtabs(~ parity + sga, data = casecon) xtabs(~ smoke + sga, data = casecon)