R

Thursday, December 23, 2010

Monday, May 24, 2010

Graphics:


### Normal distribution
curve(dnorm(x),xlim=c(-4,4),ylab="density",main="Normal Distribution")

# 2 normal curves
curve(dnorm(x,mean=0,sd=5), xlim=c(-20,50), col="red")
curve(dnorm(x,mean=25,sd=5), add=TRUE, col="blue")


### y^2 = 4*x
y = seq(0, 5, by=0.01)
y.upper = 2*sqrt(x)
y.lower = -2*sqrt(x)
y.max = max(y.upper)
y.min = min(y.lower)
plot(c(-2,5), c(y.min,y.max), type="n", xlab="x", ylab="y")
lines(x, y.upper)
lines(x, y.lower)
abline(v=-1)
points(1,0)
text(1,0,"focus (1,0)", pos=4)
text(-1,y.min,"directrix x = -1", pos=4)
title("The parabola y^2 = 4*x")

### x^2 - y^2/3 = 1
plot(c(-max(x),max(x)),c(-max(y),max(y)),type='n',xlab='x',ylab='y')
x = seq(1,5,by=0.01)
y = sqrt(3*x^2-3)
lines(x,y); lines(-x,y); lines(x,-y); lines(-x,-y);
abline(0,sqrt(3)); abline(0,-sqrt(3))
points(2,0); points(-2,0)
text(2,0,"focus (2,0)",pos=4)
text(-2,0,"focus (2,0)",pos=2)
text(5,8.5,"asymptote y = sqrt(3)*x", pos=2)

### 
opar1 = par(las=1, mar=c(4,4,3,2))
plot(cars$speed,cars$dist,axes=FALSE,xlab="",ylab="",type="n")
points(cars$speed, cars$dist,
       col=ifelse(cars$speed>9,"darkseagreen4","red"),
       pch=ifelse(cars$speed>9,1,3))
axis(1);axis(2)

opars = par(las=0)
mtext("Speed (mph)",side=1,line=3)
mtext("Dist (ft)",side=2,line=3)

box()

legend(x=10,y=100,c("Low","High"),
       col=c("darkseagreen3","red"), 
       pch=c(3,1),bty="n")

### 
curve(100*(x^3-x^2)+15, from=0, to=1,
      xlab=expression(alpha),
      ylab=expression(100 %*% (alpha^3 - alpha^2) +15),
      main=expression(paste("Function : ", 
           f(alpha) == 100 %*% (alpha^3 - alpha^2) + 15)))
myMu = 0.5; mySigma = 0.25
par(usr=c(0,1,0,1))
text(0.1,0.1,bquote(sigma[alpha] == .(mySigma)), cex=1.25)
text(0.6,0.6,paste("(The mean is ", myMu, ")", sep=""), cex=1.25)
text(0.5,0.9,bquote(paste("sigma^2 = ", sigma^2 == .(format(mySigma^2,2)))))

Monday, October 12, 2009

### GEE ?
library(nlme)
lme(travel~1, random=~1|Rail, method="ML", data=Rail)

y<-Rail$travel
id<-Rail$Rail
time<-rep(1:3,6)

X<-rep(1,length(y))
mu<-solve(t(X)%*%X)%*%t(X)%*%y

Y<-cbind(y[time==1],y[time==2],y[time==3])
Res<-Y-matrix(rep(mu,18),6)
R<-cor(Res)
phi<-1/var(y)

V<-matrix(rep(1,9),3)
for(i in 1:3) for(j in 1:3) if(i!=j) V[i,j]=mean(c(R[1,2],R[1,3],R[2,3]))

solve(t(X)%*%solve(kronecker(V,diag(6)))%*%X)%*%t(X)%*%solve(kronecker(V,diag(6)))%*%y

alpha<-mean((Res[,1]*Res[,2]+Res[,2]*Res[,3])/2)/phi

Sunday, October 11, 2009

Logistic Regression - estimation

### GLM

## logistic regression
respire<-read.csv("F:/publish/data analysis using R/data/respire.csv")
respire2<-read.csv("F:/publish/data analysis using R/data/respire2.csv")

out<-glm(outcome ~ treat, weights=count, family=binomial, data=respire)

logist.lm<-function(beta){
p0<-exp(beta[1])/(1+exp(beta[1]))
p1<-exp(beta[1]+beta[2])/(1+exp(beta[1]+beta[2]))
ll<-16*log(p0)+48*log(1-p0)+40*log(p1)+20*log(1-p1)
return(-ll)
}
optim(c(-1,2),logist.lm)

library(rootSolve)

model<-function(beta){
x<-(respire$treat=="test")
y<-respire$outcome
mu<-exp(beta[1]+beta[2]*x)/(1+exp(beta[1]+beta[2]*x))
W<-respire$count
F1<-sum(W*(y-mu))
F2<-sum(W*(y-mu)*x)
c(F1=F1,F2=F2)
}
multiroot(f=model,start=c(-1,2))$root

Thursday, August 13, 2009

Mixed Models - REML

library(nlme)

# 6 Rails have 3 repeatitions each.
n<-6; r<-3;

# -log Restricted Maximum Likelihood
# beta[1]: mean, exp(beta[2]): sigma_a, exp(beta[3]): sigma
# exp is used to prevent negative estimator for variance
reml<-function(beta){
y<-Rail$travel
X<-rep(1,n*r)
Z<-kronecker(diag(n),rep(1,r))
V<-exp(beta[2])*Z%*%t(Z)+exp(beta[3])*diag(n*r)

ml<-log(det(V))/2+t(y-beta[1])%*%solve(V)%*%(y-beta[1])/2+log(det(t(X)%*%solve(V)%*%X))/2

return(ml)
}

# minimize reml function with optim()
optim(c(66.5, 5, 2),reml)

$par
[1] 66.505645 6.421954 2.782963
> sqrt(exp(c(6.421954,2.782963)))
[1] 24.803307 4.020802

# ML: 22.62435 4.020779
# REML: 24.80547 4.020779
lme(travel~1, random=~1|Rail, data=Rail, method="ML")
lme(travel~1, random=~1|Rail, data=Rail)

Tuesday, July 7, 2009

Simulation


### Functions
test_stat <- function(x){
  ts <- numeric()
  
  X <- matrix(x,r)
  rs <- rowSums(X)  # ni.
  cs <- colSums(X)  # n.j
  
  # E(n)  
  M <- outer(rs,cs)/sum(X)  
    
  # 1st test statistic (LRT)
  ts[1]<-1-pchisq(2*sum(X*log(X/M),na.rm=T),(r-1)*(c-1));

  # 2nd test statistic (Pearson's Chi-square)
  ts[2]<- 1-pchisq(sum((X-M)^2/M),(r-1)*(c-1));
  
  ts[3:12]<-exact(X,rs,cs);
  
  return(ts)
}
exact <- function(x,rs,cs){
  T <- r2dtable(iter,rs,cs)
  obs <- unlist(mmh(x))
  exp <- sapply(T,mmh)

  return(apply(exp>obs,1,mean))
}
mh<-function(x,i,j){
  N<-x[1,1]+x[1,j]+x[i,1]+x[i,j];
  return((x[i,j]*x[1,1]/N-x[i,1]*x[1,j]/N)/sqrt((x[1,1]+x[i,1])/N*(x[1,1]+x[1,j])/N*(x[i,j]+x[i,1])/(N-1)*(x[i,j]+x[1,j])))
}
mmh<-function(x){
  Z1<-mh(x,2,2);  Z2<-mh(x,2,3);  Z3<-mh(x,3,2);  Z4<-mh(x,3,3)
  Z_one<-max(Z1,Z2,Z3,Z4)
  Z_two<-max(abs(Z1),abs(Z2),abs(Z3),abs(Z4))
  return(list(Z_one,Z1,Z2,Z3,Z4,
              Z_two,abs(Z1),abs(Z2),abs(Z3),abs(Z4)))
}


### sample size and number of iteration
n <- 100; r<-3; c<-3; s <- 1000; iter <- 1000; 

### test statistics
num_ts<-12; names_ts <- c('QL', 'Qp',
                          'One-sided', 'MH22', 'MH23', 'MH32','MH33',
                          'Two-sided', 'MH22', 'MH23', 'MH32','MH33')

### parameters
alpha <- c(1,0.2,0.2); beta <- c(1,0.2,0.1); gamma <- matrix(c(1,1,1,1,3.,1.,1,1.,1.),r)

### s 2*2 random tables with n from multinomial distribution
tables <- rmultinom(s, n, outer(alpha,beta)*gamma)

p <- apply(tables,2,test_stat)
rownames(p)<-names_ts

### empirical tail probability
percent <- cbind(apply((p<=0.05),1,mean,na.rm=TRUE))
rownames(percent)<- names_ts

write.csv(rbind(alpha,beta,gamma),"E:/documents/maxor/out/sim.csv",append=TRUE)
write.csv(percent,"E:/documents/maxor/out/sim.csv",append=TRUE)

Wednesday, April 29, 2009

Nonlnear Models - GLS


### Indometh

ols<-nls(conc~SSbiexp(time,b1,b2,b3,b4),
              data=Indometh[Indometh$Subject==1,])
beta<-coef(ols)

for(i in 1:10){
  oldbeta<-beta
  out<-nls(conc~SSbiexp(time,b1,b2,b3,b4),
             data=Indometh[Indometh$Subject==1,],
             weights=1/SSbiexp(time,beta[1],beta[2],beta[3],beta[4])^2)
  beta<-coef(out)              
}

BETA<-matrix(rep(0,6*4),6)
for(i in 1:6){
  BETA[i,]<-coef(nls(conc~SSbiexp(time,b1,b2,b3,b4),
            data=Indometh[Indometh$Subject==i,]))
}
colnames(BETA)<-c('b1','b2','b3','b4')


### Theoph
coplot(conc~Time | Subject, data=Theoph)
nls(conc~SSfol(Dose,Time,lKe,lKa,lCl),data=Theoph)
ols<-nls(conc~SSfol(Dose,Time,lKe,lKa,lCl),
              data=Theoph[Theoph$Subject==1,])
beta<-coef(ols)

BETA<-matrix(rep(0,12*3),12)
for(i in 1:max(as.numeric(Theoph$Subject)))
  BETA[i,]<-coef(nls(conc~SSfol(Dose,Time,b1,b2,b3),
                 data=Theoph[Theoph$Subject==i,]))

R