Saturday, October 27, 2007

Collapse table and calculate p-value by Monte Carol Method


# colapse rows and columns whose cell counts < 4
rcbind <- function(tt)
{
# rearraign table
cc <- names(sort(tt[names(sort(rowSums(tt==max(tt)), decreasing=T))[1],], decreasing=T))
rr <- names(sort(tt[,names(sort(colSums(tt==max(tt)), decreasing=T))[1]], decreasing=T))
tt <- tt[rr,cc]

# p-value for Consensus Test (binomial test)
m1=max(tt)
m2=max(tt[1,2:dim(tt)[2]],tt[2:dim(tt)[1],1])
p <- binom.test(m1,m1+m2)$p.value # 2-sided

if(p>=0.05){
print("There is no concensus.")
return(list(0,tt))
} else
if(sum(tt[2:dim(tt)[1],2:dim(tt)[2]]>=4)==0){
print("There is no double mutation cell >=4.")
return(list(0,tt))
} else {
# inner table
inner <- tt[2:dim(tt)[1],2:dim(tt)[2]]
rrr <-rowSums(inner>=4)
ccc <-colSums(inner>=4)

# colnames and rownames whose cell counts are larger than 4
rnames <- c(rownames(tt)[1],names(rrr[rrr>=1]))
cnames <- c(colnames(tt)[1],names(ccc[ccc>=1]))

# colnames and rownames which is collapsed
mr <- rownames(tt)[-charmatch(rnames,rownames(tt))]
mc <- colnames(tt)[-charmatch(cnames,colnames(tt))]

# colapsing rows and columns whose cell counts are smaller than 4
return(list(1,rbind(cbind(tt[rnames,cnames],rowSums(tt[rnames,mc]))
,c(colSums(tt[mr,cnames]),sum(tt[mr,mc])))))
}
}

test_stat <- function(x){
# Pearson's Chi-square test
expect <- (matrix(rowSums(x))%*%t(matrix(colSums(x))))/sum(x)
x2 <- sum((x-expect)^2/expect)

# LRT
temp <- x*log(x/expect)
g2 <- 2*sum(temp[x!=0])

# Maximal Odds Ratio & Maximal MH
or <- rep(0,(dim(x)[1]-1)*(dim(x)[2]-1))
dim(or) <- c(dim(x)[1]-1,dim(x)[2]-1)
diff <- or
for(i in 2:dim(x)[1])
for(j in 2:dim(x)[2]){
or[i-1,j-1] <- abs(log(x[i,j]*x[1,1]/x[i,1]/x[1,j])
/sqrt(1/x[1,1]+1/x[1,j]+1/x[i,1]+1/x[i,j]))
diff[i-1,j-1] <- abs(x[i,j]*x[1,1]-x[i,1]*x[1,j])
/sqrt((x[1,1]+x[i,1])*(x[1,1]+x[1,j])*(x[1,j]+x[i,j])
*(x[i,1]+x[i,j])/(x[i,j]+x[1,1]+x[i,1]+x[1,j]-1))

}
# df <- (dim(x)[1]-1)*(dim(x)[2]-1)
# 1-pchisq(c(x2,g2),df)

c(x2,g2,max(or),max(diff))

}



exact <- function(x){

ts_o <- test_stat(x)

reference <- r2dtable(iter, rowSums(x), colSums(x))

ts_e <- sapply(reference, test_stat)

p_value <- c(fisher.test(x)$p.value,apply(ts_e>ts_o,1,mean,na.rm=T))
names(p_value) <- c("Exact","X2","LRT","Maximal OR","Maximal MH")

return(p_value)

}


nsi<-read.csv("g:/programming/R/nsi.csv")

iter =10000
position1 <- nsi$x18
position2 <- nsi$x19

ss <- rcbind(table(position1,position2))

if(ss[[1]]) p_value <- exact(ss[[2]])

ss[[2]]
p_value

aa <- r2dtable(1,c(90,50,40),c(90,45,45))
exact(aa[[1]])

No comments: