R: 2009

Monday, October 12, 2009

GEE

### GEE ?
library(nlme)
lme(travel~1, random=~1|Rail, method="ML", data=Rail)

y<-Rail$travel
id<-Rail$Rail
time<-rep(1:3,6)

X<-rep(1,length(y))
mu<-solve(t(X)%*%X)%*%t(X)%*%y

Y<-cbind(y[time==1],y[time==2],y[time==3])
Res<-Y-matrix(rep(mu,18),6)
R<-cor(Res)
phi<-1/var(y)

V<-matrix(rep(1,9),3)
for(i in 1:3) for(j in 1:3) if(i!=j) V[i,j]=mean(c(R[1,2],R[1,3],R[2,3]))

solve(t(X)%*%solve(kronecker(V,diag(6)))%*%X)%*%t(X)%*%solve(kronecker(V,diag(6)))%*%y

alpha<-mean((Res[,1]*Res[,2]+Res[,2]*Res[,3])/2)/phi

Sunday, October 11, 2009

Logistic Regression - estimation

### GLM

## logistic regression
respire<-read.csv("F:/publish/data analysis using R/data/respire.csv")
respire2<-read.csv("F:/publish/data analysis using R/data/respire2.csv")

out<-glm(outcome ~ treat, weights=count, family=binomial, data=respire)

logist.lm<-function(beta){
p0<-exp(beta[1])/(1+exp(beta[1]))
p1<-exp(beta[1]+beta[2])/(1+exp(beta[1]+beta[2]))
ll<-16*log(p0)+48*log(1-p0)+40*log(p1)+20*log(1-p1)
return(-ll)
}
optim(c(-1,2),logist.lm)

library(rootSolve)

model<-function(beta){
x<-(respire$treat=="test")
y<-respire$outcome
mu<-exp(beta[1]+beta[2]*x)/(1+exp(beta[1]+beta[2]*x))
W<-respire$count
F1<-sum(W*(y-mu))
F2<-sum(W*(y-mu)*x)
c(F1=F1,F2=F2)
}
multiroot(f=model,start=c(-1,2))$root

Thursday, August 13, 2009

Mixed Models - REML

library(nlme)

# 6 Rails have 3 repeatitions each.
n<-6; r<-3;

# -log Restricted Maximum Likelihood
# beta[1]: mean, exp(beta[2]): sigma_a, exp(beta[3]): sigma
# exp is used to prevent negative estimator for variance
reml<-function(beta){
y<-Rail$travel
X<-rep(1,n*r)
Z<-kronecker(diag(n),rep(1,r))
V<-exp(beta[2])*Z%*%t(Z)+exp(beta[3])*diag(n*r)

ml<-log(det(V))/2+t(y-beta[1])%*%solve(V)%*%(y-beta[1])/2+log(det(t(X)%*%solve(V)%*%X))/2

return(ml)
}

# minimize reml function with optim()
optim(c(66.5, 5, 2),reml)

$par
[1] 66.505645 6.421954 2.782963
> sqrt(exp(c(6.421954,2.782963)))
[1] 24.803307 4.020802

# ML: 22.62435 4.020779
# REML: 24.80547 4.020779
lme(travel~1, random=~1|Rail, data=Rail, method="ML")
lme(travel~1, random=~1|Rail, data=Rail)

Tuesday, July 7, 2009

Simulation


### Functions
test_stat <- function(x){
  ts <- numeric()
  
  X <- matrix(x,r)
  rs <- rowSums(X)  # ni.
  cs <- colSums(X)  # n.j
  
  # E(n)  
  M <- outer(rs,cs)/sum(X)  
    
  # 1st test statistic (LRT)
  ts[1]<-1-pchisq(2*sum(X*log(X/M),na.rm=T),(r-1)*(c-1));

  # 2nd test statistic (Pearson's Chi-square)
  ts[2]<- 1-pchisq(sum((X-M)^2/M),(r-1)*(c-1));
  
  ts[3:12]<-exact(X,rs,cs);
  
  return(ts)
}
exact <- function(x,rs,cs){
  T <- r2dtable(iter,rs,cs)
  obs <- unlist(mmh(x))
  exp <- sapply(T,mmh)

  return(apply(exp>obs,1,mean))
}
mh<-function(x,i,j){
  N<-x[1,1]+x[1,j]+x[i,1]+x[i,j];
  return((x[i,j]*x[1,1]/N-x[i,1]*x[1,j]/N)/sqrt((x[1,1]+x[i,1])/N*(x[1,1]+x[1,j])/N*(x[i,j]+x[i,1])/(N-1)*(x[i,j]+x[1,j])))
}
mmh<-function(x){
  Z1<-mh(x,2,2);  Z2<-mh(x,2,3);  Z3<-mh(x,3,2);  Z4<-mh(x,3,3)
  Z_one<-max(Z1,Z2,Z3,Z4)
  Z_two<-max(abs(Z1),abs(Z2),abs(Z3),abs(Z4))
  return(list(Z_one,Z1,Z2,Z3,Z4,
              Z_two,abs(Z1),abs(Z2),abs(Z3),abs(Z4)))
}


### sample size and number of iteration
n <- 100; r<-3; c<-3; s <- 1000; iter <- 1000; 

### test statistics
num_ts<-12; names_ts <- c('QL', 'Qp',
                          'One-sided', 'MH22', 'MH23', 'MH32','MH33',
                          'Two-sided', 'MH22', 'MH23', 'MH32','MH33')

### parameters
alpha <- c(1,0.2,0.2); beta <- c(1,0.2,0.1); gamma <- matrix(c(1,1,1,1,3.,1.,1,1.,1.),r)

### s 2*2 random tables with n from multinomial distribution
tables <- rmultinom(s, n, outer(alpha,beta)*gamma)

p <- apply(tables,2,test_stat)
rownames(p)<-names_ts

### empirical tail probability
percent <- cbind(apply((p<=0.05),1,mean,na.rm=TRUE))
rownames(percent)<- names_ts

write.csv(rbind(alpha,beta,gamma),"E:/documents/maxor/out/sim.csv",append=TRUE)
write.csv(percent,"E:/documents/maxor/out/sim.csv",append=TRUE)

Wednesday, April 29, 2009

Nonlnear Models - GLS


### Indometh

ols<-nls(conc~SSbiexp(time,b1,b2,b3,b4),
              data=Indometh[Indometh$Subject==1,])
beta<-coef(ols)

for(i in 1:10){
  oldbeta<-beta
  out<-nls(conc~SSbiexp(time,b1,b2,b3,b4),
             data=Indometh[Indometh$Subject==1,],
             weights=1/SSbiexp(time,beta[1],beta[2],beta[3],beta[4])^2)
  beta<-coef(out)              
}

BETA<-matrix(rep(0,6*4),6)
for(i in 1:6){
  BETA[i,]<-coef(nls(conc~SSbiexp(time,b1,b2,b3,b4),
            data=Indometh[Indometh$Subject==i,]))
}
colnames(BETA)<-c('b1','b2','b3','b4')


### Theoph
coplot(conc~Time | Subject, data=Theoph)
nls(conc~SSfol(Dose,Time,lKe,lKa,lCl),data=Theoph)
ols<-nls(conc~SSfol(Dose,Time,lKe,lKa,lCl),
              data=Theoph[Theoph$Subject==1,])
beta<-coef(ols)

BETA<-matrix(rep(0,12*3),12)
for(i in 1:max(as.numeric(Theoph$Subject)))
  BETA[i,]<-coef(nls(conc~SSfol(Dose,Time,b1,b2,b3),
                 data=Theoph[Theoph$Subject==i,]))

Thursday, April 23, 2009

Poisson Regression


melanoma<-read.csv("U:/data/R/datasets/melanoma.csv")

out<-glm(cases~age+region, family=poisson(link="log"), 
                           offset=log(total), data=melanoma)
out1<-glm(cases~age, family=poisson(link="log"), 
                     offset=log(total), data=melanoma)
out2<-glm(cases~region, family=poisson(link="log"), 
                        offset=log(total), data=melanoma)

# type 3
anova(out1,out)
anova(out2,out)

# melanoma
Obs     age     region    cases     total  

  1    35-44    south       75      220407 
  2    45-54    south       68      198119 
  3    55-64    south       63      134084 
  4    65-74    south       45       70708 
  5    75+      south       27       34233 
  6    <35      south       64     1074246 
  7    35-44    north       76      564535 
  8    45-54    north       98      592983 
  9    55-64    north      104      450740 
 10    65-74    north       63      270908 
 11    75+      north       80      161850 
 12    <35      north       61     2880262

Saturday, April 11, 2009

Datasets

# PK
xyplot(conc~time|Subject, data=Indometh) # IV
xyplot(conc~Time|Subject, data=Theoph) # oral

# Linear
plot(dist~speed, data=cars)
plot(eruptions~waiting, data=faithful)
xyplot(weight~Time | Diet, data=ChickWeight)
lme(weight~Time+Diet, random=~1+Time |Chick, data=ChickWeight)
xyplot(height~age|Seed, data=Loblolly)

# Latin-square
OrchardSprays

# ANOVA
boxplot(weight~group, data=PlantGrowth)
boxplot(weight~feed, data=chickwts)

boxplot(breaks~wool+tension, data=warpbreaks)

# Nonlinear
xyplot(uptake~conc|Plant, data=CO2)
xyplot(density ~ conc | Run, data=DNase)
xyplot(rate~conc|state, data=Puromycin)
xyplot(circumference~age|Tree, data=Orange)
plot(pressure~temperature, data=pressure)

# multivariate
attitude
trees

# logistic regression
esoph
glm(cbind(ncases, ncontrols)~agegp+tobgp*alcgp,data=esoph,family=binomial())

# paired t-test
t.test(extra~group, paired=TRUE, data=sleep)

##### MASS
# Survival
Melanoma
VA
gehan
leuk

xyplot(BPchange~Dose | Animal+Treatment, data=Rabbit)
xyplot(size~Time | treat, data=Sitka)

# logistic regression
birthwt

# Poisson Regression
boxplot(log(y)~limit, data=Traffic)
summary(glm(y~limit, data=Traffic, family=poisson))
summary(glm(y~day+limit, data=Traffic, family=poisson))

# nonlinear
plot(steam)

# two-way anova
genotype

# ANACOVA
anova(lm(Postwt~Prewt+Treat, data=anorexia))

# paired t-test
write.csv(anorexia[anorexia$Treat=="CBT"2:3],"u:/data/R/datasets/paired_CBT.csv",row.names=FALSE)

Monday, March 30, 2009

McNemar's test

# simulate a dataset
group<-rep(c('case','cont'),rep(100,2))
pre<-rbinom(200,1,0.7)
post<-rbinom(200,1,0.8)
mcnemar.test(pre,post)

# create a variable post - pre
matched<-data.frame(pre,post,group)
matched$change<-post-pre

# McNemar's test from logistic regression (0 has to be deleted)
summary(glm(factor(change)~1, data=matched[matched$change!=0,],family=binomial))

summary(glm(factor(change)~group, data=matched[matched$change!=0,],family=binomial))

Friday, March 27, 2009

Multiple Comparisons

### Use warpbreaks dataset
amod<-aov(breaks~tension, data=warpbreaks)

### 1-way ANOVA (exact)
library(multcomp)
summary(glht(amod, linfct=mcp(tension="Dunnett")))
summary(glht(amod, linfct=mcp(tension="Tukey")))

# CI
confint(glht(amod, linfct=mcp(tension="Dunnett")), level=0.9)

# create a contrast
contr<-rbind("M-L"=c(-1,1,0), "H-L"=c(-1,0,1), "H-M"=c(0,-1,1))
glht(amod, linfct=mcp(tension=contr))

# Tukey test
TukeyHSD(chickwts.out,"feed")

# Closed test
pairwise.t.test(warpbreaks$breaks, warpbreaks$tension, p.adj="none")
pairwise.t.test(warpbreaks$breaks, warpbreaks$tension, p.adj="bonf")
pairwise.t.test(warpbreaks$breaks, warpbreaks$tension, p.adj="hommel")

Wednesday, March 25, 2009

ROC curve

library(ROC)

R1 <- rocdemo.sca( rbinom(40,1,.3), rnorm(40), caseLabel="new case", markerLabel="demo Marker" );

plot(R1, line=TRUE, show.thresh=TRUE);

plot(R1, line=TRUE);

R