expdat <- read.table(file="gene2go.mm.6Dec06.txt",header=FALSE,sep="\t",quote="") expgene <- unique(expdat$V2) totgene <- length(expgene) expterm <- unique(expdat$V3) #calculate univariate frequencies count <- numeric() for (i in 1:length(expterm)) {obsb <- expdat[expdat$V3==expterm[i],] obsb <- unique(obsb$V2) count <- append(count,length(obsb))} expcounts <- data.frame(expterm,count) write.table(expcounts,"expcounts.txt",row.names=FALSE,sep="\t",col.names=FALSE) #create matrix of joint counts vec <- vector(mode="numeric",length=length(expterm)) for (j in 1:length(expterm)) {expc <- expdat[expdat$V3==expterm[1] | expdat$V3==expterm[j],2:3] expd <- unique(expc) expe <- unique(expd$V2) vec[j] <- dim(expd)[1] - length(expe)} dat <- vec dat <- t(dat) write(dat,file="joint.txt",ncolumns=length(expterm)) for (i in 2:length(expterm)) {vec <- vector(mode="numeric",length=length(expterm)) for (j in 1:length(expterm)) {expc <- expdat[expdat$V3==expterm[i] | expdat$V3==expterm[j],2:3] expd <- unique(expc) expe <- unique(expd$V2) vec[j] <- dim(expd)[1] - length(expe)} dat <- read.table(file="joint.txt") dat <- rbind(dat,vec) dat <- t(dat) write(dat,file="joint.txt",ncolumns=length(expterm))} #calculate covariances #expcounts <- read.table(file="expcounts.txt",header=FALSE) #for (i in 1:length(expterm)) # {for (j in 1:length(expterm)) # {dat2[i,j] <- #dat2[i,j]/totgene-((expcounts[expcounts$V1==expterm[i],2]/totgene)*(expcounts[expcounts$V1==expterm[j],2]/totgene))}} #calculate variances #vari <- vector(mode="numeric",length=length(expterm)) #for (i in 1:length(expterm)) # {vari[i] <- #(expcounts[expcounts$V1==expterm[i],2]/totgene)*((totgene-expcounts[expcounts$V1==expterm[i],2])/totgene)} #calculate correlations #for (i in 1:length(expterm)) # {for (j in 1:length(expterm)) # {if (i==j) {dat2[i,j] <- 1.0} else {dat2[i,j] <- dat2[i,j]/sqrt(vari[i]*vari[j])}}} #dat2 <- t(dat2) #write(dat2,file="corr.txt") #see correl.c for converting the joint probability matrix into the correlation matrix