########Required packages#####3
library(GenomicFeatures)
library(rtracklayer)
library(Rsamtools)
library(GenomicRanges)
library(edgeR)
library(Mfuzz)
library(DESeq)
library(DESeq2)
library(org.Hs.eg.db)
library(goseq)
library(SPIA)
library(ggplot2)
library(GO.db)
library(RColorBrewer)
library(igraph)
library(RCytoscape)
############parallel computing###########
library(foreach)
library(parallel)
library(multicore)
library(doMC)
######set number of cores for parallel computing#####################################################
######a majority of the functions require at least 2 cores###########################################
######we typically run with 20 cores out of 24 and we have 48GB of RAM###############################
######and some functions are too memory intensive so we run less cores during those analyses#########
registerDoMC(cores=20)


########################################################
######create databases and objects need for analysis#####
########################################################


###########create annotation file objects and databases######
human19<-makeTranscriptDbFromUCSC("hg19","knownGene")
h19e<-exons(human19)
h19eg<-exonsBy(human19, "gene")
load("lincRNAbase")
RNAs<-c(h19eg,lincRNA)

#############make an object of gene lengths########
IDS<-names(RNAs)

genelengths<-foreach(i=1:length(IDS),.combine='c') %dopar% {
y<-RNAs[i]
y<-y@unlistData
y<-y@ranges
w<-y@width
sum(w)
}
names(genelengths)<-IDS

#############make an object of exon lengths########
IDS.ex<-h19e@elementMetadata$exon_id
y<-h19e@ranges
exonlengths<-y@width
names(exonlengths)<-IDS.ex

##############create an index for exon to gene identification ###################
blue<-foreach(i=1:length(h19eg)) %dopar%{	
	x<-h19eg[[i]]
	x<-x@elementMetadata
	x<-x[,1]}
	
names(blue)<-names(h19eg)

exon.to.gene.ids<-foreach(i=1:length(blue), .combine='rbind') %dopar% {
	y<-cbind(blue[[i]],rep(names(blue[i]),length(blue[[i]])))}

y<-exon.to.gene.ids[,2]
names(y)<-exon.to.gene.ids[,1]
exon.to.gene.ids<-y

#######create disease database for R from http://diseases.jensenlab.org/Entity?textmining=10&type1=9606&type2=-26&id1=ENSP00000362296 #######
########of gene disease associations with a z-score higer than or equal to 1.952790 which is equivalent to p<0.5######
x<-read.delim("disease.tsv", header=F)
y<-which(x[,5]>=1.952790)
x<-x[y,]
y<-as.character(unique(x[,2]))
xs<-x[,2]
z<-as.character(x[,4])
w<-foreach(i=1:length(y)) %dopar% {
m<-which(xs==y[i])
z[m]}
names(w)<-y
disease<-w
z<-names(SYMBOLS)
names(z)<-SYMBOLS
names(disease)<-z[names(disease)]

#######Create a marker list from zeng et al (Cell, 2012) paper out of allen brain institute##########
#######gleaned from figure 6 and made into a .csv###############
#########converted matrix into a list of list#######
allen<-read.delim("allen Layer markers.csv",header=TRUE)
rownames(allen)<-allen[,1]
allen<-allen[,2:7]

markers<-foreach(i=1:dim(allen)[1]) %dopar% {
x<-allen[i,]
y<-which(x>0)
colnames(allen)[y]}
names(markers)<-rownames(allen)


#########make a disease to gene list###############
disease.names<-goseq:::reversemapping(disease)


#################################################
######make co-ciatation network################
#################################################

 
###get genes to pmids#####
y<-as.list(org.Hs.egPMID)
x<-intersect(names(y),names(h19eg))
PMID<-foreach(i=1:length(x)) %dopar% {
xs<-unlist(y[which(x[i]==names(y))])}
names(PMID)<-x

######remove genes without citations######
x<-unlist(PMID)
y<-unique(x)
genes.per.article<-foreach(i=1:length(y),.combine='c') %dopar% {
length(which(y[i]==x))}
names(genes.per.article)<-y

z<-c(names(genes.per.article[which(genes.per.article<=1)]),names(genes.per.article[which(genes.per.article>1000)]))
x<-foreach(i=1:length(PMID)) %dopar% {
setdiff(PMID[[i]],z)}
names(x)<-names(PMID)
PMID.short<-x

##########make vector of papers per gene####################

PMID.len<-foreach(i=1:length(PMID.short),.combine='c') %dopar% {
length(PMID.short[[i]])}
names(PMID.len)<-names(PMID.short)

y<-names(h19eg)
x<-intersect(names(PMID.len),names(h19eg))
b<-setdiff(y,x)
z<-(rep(0,length(b)))
names(z)<-b
PMID.len<-c(PMID.len,z)


######create vector of genes per paper#######
x<-goseq:::reversemapping(PMID.short)
paper.len<-foreach(i=1:length(x),.combine='c') %dopar% {
length(x[[i]])}
names(paper.len)<-names(x)

##################################################

y<-names(h19eg)
green<-names(paper.len)

x<-paper.len/1000
weight<-1-x

connections<-foreach(b=1:length(y),.combine='rbind') %dopar% {
blue<-PMID.short[[y[b]]]
x<-foreach(i=1:length(y),.combine='c') %do% {
z<-intersect(blue,unlist(PMID.short[y[[i]]]))
sum(weight[z])
}
}

rownames(connections)<-y
colnames(connections)<-y

z<-foreach(i=1:length(y),.combine='c') %dopar% {
connections[i,i]}
names(z)<-rownames(connections)
b<-which(z>0.997)
connect<-connections[b,b]
for(b in 1:dim(connect)[1]) {
connect[b,b]<-0}

connect.gr<-graph.adjacency(connect,"undirected",weighted=TRUE,add.rownames=TRUE)


######################################
#####Functions defined by user########
######################################

#########################################################
##########functions for enrichment testing###############
#########################################################

#########uses goseq package to find enrichment of diseases in a list of gene vectors (entrez ids) with an FDR<=0.05#####
#######needs an annotation object produced by 'exonsBy'########
#######needs a vector of the gene lengths#####
######needs a disease database #####
dis.cluster.enrich<-function(list.gene.vec,disease,anno.obj){
y<-names(anno.obj)
genlen<-genelengths[sort(y)]
magic<-foreach(i=1:length(list.gene.vec)) %dopar%{
x<-intersect(list.gene.vec[[i]],y)
z<-setdiff(y,x)
zs<-rep(0,length(z))
xs<-rep(1,length(x))
names(zs)<-z
names(xs)<-x
zt<-c(zs,xs)
zt<-zt[sort(y)]
pwf<-nullp(zt,bias.data=genlen,plot.fit=FALSE)
blue<-goseq(pwf,gene2cat=disease)
FDR<-p.adjust(blue[,2],"fdr")
green<-cbind(blue[which(FDR<=0.05),1:2],FDR[which(FDR<=0.05)])
}
}


###########function to get genes driving a disease from your gene vector (entrez ids) and write table to file################
##########requires a matrix of the significantly changing genes#######################
###########requires a normalized matrix of gene counts################################

driven<-function(name.of.disease,your.gene.vector) {
x<-disease.names[[name.of.disease]]
blur<-intersect(x,your.gene.vector)
z<-SYMBOLS[blur]
x<-ave.cts.ceg[blur,]
b<-cbind(edge.genes[blur,23],edge.genes[blur,28])
fin<-cbind(z,x,b)
colnames(fin)<-c("Gene Symbols",colnames(ave.cts.ceg),"Fold Difference","FDR")
write.csv(fin,paste(name.of.disease,".csv",sep=""))}


#######uses goseq package to find enrichment of GO cats in a list of gene vectors (entrez ids)########
#######genome must be from UCSC genome browser and as character i.e. "hg19"############
######cats must be "GO:CC","GO:BP","GO:MF", or "KEGG"######################
#######needs an annotation object ########
go.cluster.enrich<-function(list.gene.vec,cats,genome,anno.obj){
y<-names(anno.obj)
GO<-getgo(y,genome,"knownGene",cats)
genlen<-genelengths[sort(y)]
magic<-foreach(i=1:length(list.gene.vec)) %dopar%{
x<-intersect(list.gene.vec[[i]],y)
z<-setdiff(y,x)
zs<-rep(0,length(z))
xs<-rep(1,length(x))
names(zs)<-z
names(xs)<-x
zt<-c(zs,xs)
zt<-zt[sort(y)]
pwf<-nullp(zt,bias.data=genlen,plot.fit=FALSE)
blue<-goseq(pwf,gene2cat=GO)
FDR<-p.adjust(blue[,2],"fdr")
green<-cbind(blue[which(FDR<=0.05),1:2],FDR[which(FDR<=0.05)])
}
}

##############Use SPIA package to find enrichment of KEGG patways in a list of gene vectors (entrez ids)############
#########memory intensive#########
########requires a fold difference between max and min values over timecourse#############
#######requires a fold change over threshold##########

SPIAcluster<-function(list.gene.vec){
fin<-foreach (i=1:length(lis)) %dopar% {
x<-FD[list.gene.vec[[i]]]
z<-FC[list.gene.vec[[i]]]
if (length(which(z==Inf))>=1){
b<-which(z==Inf)
blue<-replace(z,b,x[names(b)]) 
spia(de=blue,all=IDS,organism="hsa",nB=2000,verbose=FALSE)} else {
spia(de=z,all=IDS,organism="hsa",nB=2000,verbose=FALSE)}
}
}


#################################################################
######network analysis functions utilizing igraph package#########
#################################################################


########takes two gene vectors (entrez ids) and finds common neighbors in a graph object########
##########requires the adjacency matrix connected to the graph object###########################
#########deg=degrees away from each node to establish neighborhood##############################
two.cluster.gr<-function(graph.object,v1,v2,deg,adj.matrix) {
x1<-neighborhood(graph.object,deg,intersect(b,rownames(adj.matrix)))
x2<-neighborhood(graph.object,deg,intersect(z,rownames(adj.matrix)))
y1<-foreach(i=1:length(x1),.combine='c') %dopar% {
xs<-x1[[i]]
ys<-foreach(b=1:length(x1),.combine='c') %do%{
intersect(x1[[b]],xs)}
}
y2<-foreach(i=1:length(x2),.combine='c') %dopar% {
xs<-x2[[i]]
ys<-foreach(b=1:length(x2),.combine='c') %do%{
intersect(x2[[b]],xs)}
}
y<-intersect(y1,y2)
z<-induced.subgraph(graph.object,y)
}

#########makes a list of lists containing the eigenvector and Betweenness centrality scores for a list of gene vectors (entrez ids)########
########normalizes betweenness scoers to max betweenness, i.e. betweenness.score/max.betweenness.score for easy comparison############
##########requires the adjacency matrix connected to the graph object###########################
vital.nodes.cl<-function(graph.object,list.gene.vec,deg,adj.matrix){
x<-foreach(i=1:length(list.gene.vec)) %dopar% {
z<-neighborhood(graph.object,deg,intersect(list.gene.vec[[i]],rownames(adj.matrix)))
y1<-foreach(m=1:length(z),.combine='c') %do% {
xs<-z[[m]]
xsn<-z[-m]
ys<-foreach(b=1:length(xsn),.combine='c') %do%{
intersect(xsn[[b]],xs)}
}
y1<-c(V(graph)$names[y1],intersect(list.gene.vec[[i]],rownames(adj.matrix)))
y1<-unique(y1)
y<-induced.subgraph(graph.object,y1)
blue<-evcent(y)$vector
blue<-sort(blue[which(blue>0)])
red<-sort(betweenness(y,weight=1/E(y)$weight),decreasing=TRUE)
red<-red/max(red)
z<-list(blue,red)
names(z)<-c("Eigenvector Centralities", "Betweenness")
z}
}

###################################################################################
##########functions to write .csv files for the various objects generated##########
###################################################################################

#########write a table from an object produced by dis.cluster.enrich################
write.dis.cl<-function(lis,name){
blue<-foreach(i=1:length(lis), .combine='rbind') %dopar% {
x<-lis[[i]]
y<-rep(names(lis[i]),dim(x)[1])
cbind(y,x)}
colnames(blue)<-c("Cluster", "Disease","p-value","FDR")
write.csv(blue,paste(name,".csv",sep=""))}

#######write a table from object produced by go.cluster.enrich when using clusters derived from mfuzz########
write.go.cl<-function(lis,clusters,name){
blue<-foreach(i=1:length(lis), .combine='rbind') %dopar% {
x<-lis[[i]]
if (dim(x)[1]==0) {} else{
genevec<-clusters[[i]]
y<-rep(names(lis[i]),dim(x)[1])
z<-GOnames[x[,1]]
names(z)<-rownames(x)
poo<-foreach(b=1:length(z),.combine='rbind') %do% {
m<-length(intersect(GOids[[x[b,1]]],genevec))
j<-length(GOids[[x[b,1]]])
cbind(m,j,m/j*100)}
cbind(y,z,x,poo)}}
colnames(blue)<-c("Cluster", "GO name","GO ID","p-value","FDR","# Genes","Size of GO category","% of category")
write.csv(blue,paste(name,".csv",sep=""))}

#######write a table from object produced by go.cluster.enrich for stages########
write.go.stage<-function(lis,stages,name){
blue<-foreach(i=1:length(lis), .combine='rbind') %dopar% {
x<-lis[[i]]
if (dim(x)[1]==0) {} else{
genevec<-stages[[i]]
y<-rep(names(lis[i]),dim(x)[1])
z<-GOnames[x[,1]]
poo<-foreach(b=1:length(z),.combine='rbind') %do% {
m<-length(intersect(GOids[[x[b,1]]],genevec))
j<-length(GOids[[x[b,1]]])
cbind(m,j,m/j*100)}
cbind(y,z,x,poo)}}
colnames(blue)<-c("Stage", "GO name","GO ID","p-value","FDR","# Genes","Size of GO category","% of category")
write.csv(blue,paste(name,".csv",sep=""))}

#########write a table from object produced by goseq function##########
write.GO<-function(lis,genevec,name) {
x<-GOnames[lis[,1]]
y<-foreach(i=1:length(x),.combine='rbind') %dopar% {
z<-length(intersect(GOids[[lis[i,1]]],genevec))
b<-length(GOids[[lis[i,1]]])
cbind(z,b,z/b*100)}
blue<-cbind(x,lis,y)
colnames(blue)<-c("GO name","GO ID","p-value","FDR","# Genes","Size of GO category","% of category")
write.csv(blue,paste(name,".csv",sep=""))}


########write kegg with zscore for stages list.gene.vectors####

write.kegg.stag<-function(lis,name) {
blue<-foreach(i=1:length(lis), .combine='rbind') %dopar% {
x<-lis[[i]]
if (dim(x)[1]==0) {} else{
y<-rep(names(lis[i]),dim(x)[1])
z<-qnorm(1-(x[,10]/2))
cbind(x,y,z)}}
colnames(blue)<-c(colnames(lis[[1]]),"Stage", "Z-Score")
write.csv(blue,paste(name,".csv",sep=""))}

#######write gene table#####
full.table<-function(mat,ave.mat,lis,name){
blue<-foreach(i=1:length(lis), .combine='rbind') %dopar% {
x<-lis[[i]]
y<-names(lis[i])
y<-rep(y,length(x))
cbind(SYMBOLS[x],y,FD[x],FC[x],ave.mat[x,],mat[x,])}
colnames(blue)<-c("Gene Symbols","Stages","Fold Difference (x/threshold)","Fold Difference (max/min)",colnames(ave.mat),colnames(mat))
write.csv(blue,paste(name,".csv",sep=""))}

###########write pdf showing cluster centers and number of genes in each cluster#####
#####needs an object from mfuzz function output#######
write.cl.pdf<-function(cluster.obj,times,name){
x<-as.character(1:dim(cluster.obj[[1]])[1])
xn<-as.character(cluster.obj[[2]])
ysg<-cluster.obj[[1]]
pdf(paste(name,".pdf",sep=""),onefile=T,width=11)
for(i in 1:length(x)) {
y<-paste("Cluster ",x[i],sep="")
y<-paste(y,"\n",sep="")
z<-paste("Composed of ",xn[i],sep="")
z<-paste(z," Genes",sep="")
z<-paste(y,z)
plot(times,ysg[i,], type="line",col="darkblue",xlab="Days post Induction",ylab="Relative Change in Expression",main=z)}
dev.off()}

##############################
#####start analysis#########
#############################

#########read in bam files###########
########shown below is an example all files were read in the same way################
###########makes big files so it is best to get rid of excess files between samples or you will rum out of memory#########
x<-readGappedAlignments("sample.ID.bam",format="BAM")
exon.sample.ID<-summarizeOverlaps(h19e,x,mode="IntersectionNotEmpty")
RNA.sample.ID<-summarizeOverlaps(RNAs,x,mode="IntersectionNotEmpty")
rm(x)
counts.exon.sample.ID<-assay(exon.sample.ID)
rm(exon.sample.ID)
counts.RNA.sample.ID<-assay(RNA.sample.ID)
rm(RNA.sample.ID)
###############counts obtained were bound into matrices with columns being samples and row being genes#########
############so i now have two matrices named RNA.raw and ex.raw containing the raw counts for each gene or exon########


########calculating gene thresholds using the following formula#############
####threshold=(sum("gene lengths")/"read length")/mean("effective library size")*"gene lengths"/"read length")######
#########uses DEseq package to calculate "effective library size"################

treat<-factor(c("Day 0","Day 0","Day 0","Day 0","Day 7","Day 7","Day 7","Day 7","Day 12","Day 12","Day 19","Day 19","Day 19","Day 19","Day 26","Day 26","Day 33","Day 33","Day 49","Day 49","Day 63"," Day 63","Day 77","Day 77"))

y<-DGEList(counts=RNA.raw,group=treat)
y<-calcNormFactors(y)
els<-y$samples$lib.size*y$samples$norm.factors
thresholds<-(sum(genelengths)/50)/mean(els)*(genelengths/50)

########calculating exon thresholds using the following formula#############
####threshold=(sum("exon lengths")/"read length")/mean("effective library size")*"exon lengths"/"read length")######
#########uses DEseq package to calculate "effective library size"################


y<-h19e@ranges
exonlengths<-y@width
names(exonlengths)<-IDS.ex

treat<-factor(c("Day 0","Day 0","Day 0","Day 0","Day 7","Day 7","Day 7","Day 7","Day 12","Day 12","Day 19","Day 19","Day 19","Day 19","Day 26","Day 26","Day 33","Day 33","Day 49","Day 49","Day 63"," Day 63","Day 77","Day 77"))

y<-DGEList(counts=ex.raw,group=treat)
y<-calcNormFactors(y)
els<-y$samples$lib.size*y$samples$norm.factors
thresholds.ex<-(sum(exonlengths)/50)/mean(els)*(exonlengths/50)


#############normalize RNA.raw for visulaization and comparison purposes using DEseq package#######################

bob<-data.frame(rownames=colnames(RNA.raw),condition=c("Day 0","Day 0","Day 0","Day 0","Day 7","Day 7","Day 7","Day 7","Day 12","Day 12","Day 19","Day 19","Day 19","Day 19","Day 26","Day 26","Day 33","Day 33","Day 49","Day 49","Day 63"," Day 63","Day 77","Day 77"))
Sue<-newCountDataSet(RNA.raw,bob$condition)
x<-estimateSizeFactors(Sue)
norm.RNA.cts<-t(t(RNA.raw)/sizeFactors(x))


#######Calculate fold differences from threshold and max/min###########
x<-apply(norm.red,1,max)
FD<-x/thresholds

z<-apply(norm.red,1,max)
b<-apply(norm.red,1,min)
FC<-z/b

#######Check sample replicates using Pearson correlations#########
########print heatmapp################
y<-cor(norm.RNA.cts,method="pearson")

colnames(y)<-c("Day 0-A","Day 0-B","Day 0-C","Day 0-D","Day 7-A","Day 7-B","Day 7-C","Day 7-D","Day 12-A","Day 12-B","Day 19-A","Day 19-B","Day 19-C","Day 19-D","Day 26-A","Day 26-B","Day 33-C","Day 33-D","Day 49-C","Day 49-D","Day 63-C","Day 63-D","Day 77-C","Day 77-D")
rownames(y)<-c("Day 0-A","Day 0-B","Day 0-C","Day 0-D","Day 7-A","Day 7-B","Day 7-C","Day 7-D","Day 12-A","Day 12-B","Day 19-A","Day 19-B","Day 19-C","Day 19-D","Day 26-A","Day 26-B","Day 33-C","Day 33-D","Day 49-C","Day 49-D","Day 63-C","Day 63-D","Day 77-C","Day 77-D")
heatmap(y,,Colv=NA,Rowv=NA,col=brewer.pal(6,"Blues")) 

postscript("Corr heatmap.eps", horizontal = FALSE, onefile = TRUE, paper = "special", height=11,width=8.5)
heatmap(y,Colv=NA,Rowv=NA,col=brewer.pal(6,"Blues")) 
dev.off()


###########Check sample replicates using Pearson correlations comparing the two runs###########
#############normalize genes and correlations for comparison of runs#######################

blue<-cbind(RNA.raw[,1:8],RNA.raw[,11:14])


bob<-data.frame(rownames=colnames(blue),condition=c("Day 0","Day 0","Day 0","Day 0","Day 7","Day 7","Day 7","Day 7","Day 19","Day 19","Day 19","Day 19"))
Sue<-newCountDataSet(blue,bob$condition)
x<-estimateSizeFactors(Sue)
norm.blue<-t(t(blue)/sizeFactors(x))


y<-cor(norm.blue,method="pearson")

colnames(y)<-c("Day 0-C","Day 0-D","Day 0-A","Day 0-B","Day 7-C","Day 7-D","Day 7-A","Day 7-B","Day 19-C","Day 19-D","Day 19-A","Day 19-B")
rownames(y)<-c("Day 0-C","Day 0-D","Day 0-A","Day 0-B","Day 7-C","Day 7-D","Day 7-A","Day 7-B","Day 19-C","Day 19-D","Day 19-A","Day 19-B")
heatmap(y,Colv=NA,Rowv=NA,col=brewer.pal(6,"Blues")) 

postscript("Corr Run comp heatmap.eps", horizontal = FALSE, onefile = TRUE, paper = "special", height=11,width=8.5)
heatmap(y,Colv=NA,Rowv=NA,col=brewer.pal(5,"Blues")) 
dev.off()


##########find genes with significant changes using edgeR package#############
############apply a 2 fold change cutoff##############
###########apply threshold to max value###########
treat<-factor(c("Day 0","Day 0","Day 0","Day 0","Day 7","Day 7","Day 7","Day 7","Day 12","Day 12","Day 19","Day 19","Day 19","Day 19","Day 26","Day 26","Day 33","Day 33","Day 49","Day 49","Day 63"," Day 63","Day 77","Day 77"))

y<-DGEList(counts=RNA.raw,group=treat)
y<-calcNormFactors(y)

times<-factor(c(1,1,1,1,2,2,2,2,3,3,4,4,4,4,5,5,6,6,7,7,8,8,9,9))
design<-model.matrix(~times)

y<-estimateGLMCommonDisp(y,design)
y<-estimateGLMTagwiseDisp(y,design,prior.n=90)
fit<-glmFit(y,design)
lrt<-glmLRT(y, fit)
topTags(lrt)
summary(dt<-decideTestsDGE(lrt))
eR.DE.ID<-rownames(y)[as.logical(dt)]
eR.DEgenes<-norm.RNA.cts[eR.DE.ID,]
de.pval<-p.adjust(lrt$table[,4],"fdr")
names(de.pval)<-rownames(lrt$table)
x<-which(de.pval<=0.05)
y<-as.matrix(lrt$table)
z<-y[intersect(names(x),rownames(y)),]
fdr<-de.pval[x]
FoldDiff<-FC[names(x)]
edgeR.genes<-cbind(eR.DEgenes,FoldDiff,z,fdr)
x<-which(FoldDiff>=2)
edgeR.genes<-edgeR.genes[names(x),]

y<-apply(norm.RNA.cts,1,max)
x<-which(y>thresholds)

z<-intersect(names(x),rownames(edgeR.genes))
edgeR.genes<-edgeR.genes[z,]


##############find genes with significant changes using DEseq2 package###############
times<-c(0,0,0,0,7,7,7,7,12,12,19,19,19,19,26,26,33,33,49,49,63,63,77,77)
times<-data.frame(times)
Sue<-DESeqDataSetFromMatrix(RNA.raw,colData=times,design=~times)
colData(Sue)$times<-factor(colData(Sue)$times,levels=c(0,7,12,19,26,33,49,63,77))
colnames(Sue)<-colnames(RNA.raw)
Sue<-estimateSizeFactors(Sue)
Sue<-estimateDispersions(Sue)
Sue<-nbinomWaldTest(Sue,pAdjustMethod="fdr")
x<-as.matrix(results(Sue))
x<-x[which(x[,5]<=0.05),]
DEseq.genes<-cbind(norm.RNA.cts[rownames(x),],x)
colnames(DEseq.genes)<-c(colnames(norm.RNA.cts),colnames(x))


#########Find union of deseq and edge genes##########
#########to create a significant gene matrix#############
x<-intersect(rownames(edgeR.genes),rownames(DEseq.genes))

sig.genes<-cbind(edgeR.genes[x,],DEseq.genes[x,29])
colnames(sig.genes)<-c(colnames(sig.genes[,1:29]),"EdgeR FDR","DEseq FDR")


#########clean up R session###############

rm(x)
rm(y)
rm(z)
rm(fit)
rm(fdr)
rm(dt)
rm(design)
rm(de.pval)
rm(FoldDiff)
rm(lrt)
rm(treat)
rm(times)
rm(eR.DEgenes)
rm(eR.DE.ID)
rm(bob)
rm(Sue)
rm(blue)

#######use Mfuzz to uncover clusters of gene with similar expression###############
#######possible shapes =2^(9-1)=256################################################
########divided by 4 to give more genes per cluster =64############################
########first created average cts from norm.RNA.cts for each timepoint##########
ave.RNA.cts<-cbind(apply(norm.RNA.cts[,1:4],1,mean),apply(norm.RNA.cts[,5:8],1,mean),apply(norm.RNA.cts[,9:10],1,mean),apply(norm.RNA.cts[,11:14],1,mean),apply(norm.RNA.cts[,15:16],1,mean),apply(norm.RNA.cts[,17:18],1,mean),apply(norm.RNA.cts[,19:20],1,mean),apply(norm.RNA.cts[,21:22],1,mean),apply(norm.RNA.cts[,23:24],1,mean))
colnames(ave.RNA.cts)<-c("DO","D7","D12","D19","D26","D33","D49","D63","D77")

x<-new("ExpressionSet",exprs=ave.RNA.cts[rownames(sig.genes),])
z<-standardise(x)
cluster.obj<-mfuzz(z,64,1.25)
yc<-cluster.obj[[1]]
b<-cluster.obj[[3]]
b<-sort(b)

clusters<-foreach(i=1:64) %do% {
	x<-names(b[which(b==i)])}
names(clusters)<-foreach(i=1:64,.combine='c') %dopar% {paste("Cluster ",i,sep="")}

write.cl.pdf(cluster.obj,c(0,7,12,19,26,33,49,63,77),"First Clusters")

###############################################################################
##########Check cluster enrichments using above functions and datbases#########
###############################################################################

#######Find diseases enriched in clusters##########
##########then write table##################
disease.cluster<-dis.cluster.enrich(clusters,disease)
x<-foreach(i=1:64,.combine='c') %dopar% {paste("Cluster ",i,sep="")}
names(disease.cluster)<-x

write.dis.cl(disease.cluster,"Disease First table")

######Find Diseases enriched in all significant genes####
#########then write table###################
genlen<-genelengths[sort(names(h19eg))]
z<-setdiff(names(h19eg),rownames(sig.genes))
zs<-rep(0,length(z))
xs<-rep(1,length(sig.genes[,1]))
names(zs)<-z
names(xs)<-rownames(sig.genes)
zt<-c(zs,xs)
zt<-zt[sort(names(h19eg))]
pwf<-nullp(zt,bias.data=genlen)
blue<-goseq(pwf,gene2cat=disease)
FDR<-p.adjust(blue[,2],"fdr")
green<-cbind(blue[which(FDR<=0.05),1:3],FDR[which(FDR<=0.05)])
all.disease<-green
write.csv(all.disease,"all disease.csv")

#######Find all GO categories and "GO:Biological Processes" categories enriched in clusters##########
##########then write tables##################

GO.cluster<-go.cluster.enrich(clusters,c("GO:CC","GO:BP","GO:MF"))
GO.BP.cluster<-go.cluster.enrich(clusters,"GO:BP")
x<-foreach(i=1:64,.combine='c') %dopar% {paste("Cluster ",i,sep="")}
names(GO.cluster)<-x
names(GO.BP.cluster)<-x


write.go.cl(GO.cluster,clusters,"GO clusters")
write.go.cl(GO.BP.first.cluster,clusters,"BP GO clusters")


#######Find all GO categories and "GO:Biological Processes" categories enriched in all significant genes##########
##########then write tables##################

y<-names(h19eg)
GO<-getgo(y,"hg19","knownGene")
genlen<-genelengths[sort(y)]
x<-intersect(y,rownames(sig.genes))
z<-setdiff(y,x)
zs<-rep(0,length(z))
xs<-rep(1,length(x))
names(zs)<-z
names(xs)<-x
zt<-c(zs,xs)
zt<-zt[sort(y)]
pwf<-nullp(zt,bias.data=genlen)
blue<-goseq(pwf,gene2cat=GO)
FDR<-p.adjust(blue[,2],"fdr")
green<-cbind(blue[which(FDR<=0.05),1:2],FDR[which(FDR<=0.05)])
all.GO.first<-green


write.GO(all.GO.first,rownames(sig.genes),"ALL GO")


y<-names(h19eg)
GO<-getgo(y,"hg19","knownGene","GO:BP")
genlen<-genelengths[sort(y)]
x<-intersect(y,rownames(sig.genes))
z<-setdiff(y,x)
zs<-rep(0,length(z))
xs<-rep(1,length(x))
names(zs)<-z
names(xs)<-x
zt<-c(zs,xs)
zt<-zt[sort(y)]
pwf<-nullp(zt,bias.data=genlen)
blue<-goseq(pwf,gene2cat=GO)
FDR<-p.adjust(blue[,2],"fdr")
green<-cbind(blue[which(FDR<=0.05),1:2],FDR[which(FDR<=0.05)])
all.GO.BP<-green


write.GO(all.GO.BP,rownames(sig.genes),"ALL GO BP")


#######Find KEGG pathways enriched in clusters##########
##########then write table##################
########memory intensive so need to cut down number of cores being used#######
#######return to 20 cores after analysis#################
registerDoMC(cores=16)
Kegg.cluster<-SPIAcluster(clusters)
x<-foreach(i=1:64,.combine='c') %dopar% {paste("Cluster ",i,sep="")}
names(Kegg.cluster)<-x
registerDoMC(cores=20)

######Find Kegg pathways enriched in all significant genes####
#########then write table###################

x<-FD[rownames(sig.genes)]
z<-FC[rownames(sig.genes)]
b<-which(z==Inf)
blue<-replace(z,b,x[names(b)]) 
all.Kegg.first<-spia(de=blue,all=IDS,organism="hsa",nB=2000,verbose=FALSE)
write.csv(all.Kegg.first,"All Kegg.csv")


########Look for enrichments in allen brain markers in each cluster#############


mark.cluster<-dis.cluster.enrich(clusters,markers)
x<-foreach(i=1:64,.combine='c') %dopar% {paste("Cluster ",i,sep="")}
names(mark.cluster)<-x


######Analyzing clusters utilizing co-citation network for central genes#########
net.cluster<-vital.nodes.cl(connect.gr,cluster.first,1)
names(net.cluster)<-foreach(i=1:64,.combine='c') %dopar% {paste("Cluster ",i,sep="")}

#########clean up R session###############
rm(b)
rm(w)
rm(x)
rm(y)
rm(xs)
rm(yc)
rm(z)
rm(zs)
rm(zt)
rm(green)
rm(blue)
rm(genlen)
rm(i)
rm(pwf)
rm(GO)
rm(FDR)

###################################################################
##############Analyze data for splicing changes####################
###################################################################

#############normalize ex.raw for visulaization and comparison purposes using DEseq package#######################
IDS.ex<-h19e@elementMetadata$exon_id
rownames(ex.raw)<-IDS.ex
bob<-data.frame(rownames=colnames(ex.raw),condition=c("Day 0","Day 0","Day 0","Day 0","Day 7","Day 7","Day 7","Day 7","Day 12","Day 12","Day 19","Day 19","Day 19","Day 19","Day 26","Day 26","Day 33","Day 33","Day 49","Day 49","Day 63"," Day 63","Day 77","Day 77"))
Sue<-newCountDataSet(ex.raw,bob$condition)
x<-estimateSizeFactors(Sue)
norm.ex.cts<-t(t(ex.raw)/sizeFactors(x))

###############Use DEXSeq to uncover differential exon splicing###############
y<-unique(names(exon.to.gene.ids))
x<-norm.ex.first[y,]
y<-apply(x,1,max)
z<-which(y>thresholds.ex[rownames(x)])
x<-ex.raw[z,]
des<-c("Day 0","Day 0","Day 0","Day 0","Day 7","Day 7","Day 7","Day 7","Day 12","Day 12","Day 19","Day 19","Day 19","Day 19","Day 26","Day 26","Day 33","Day 33","Day 49","Day 49","Day 63"," Day 63","Day 77","Day 77")
exonCounts<-newExonCountSet(x,des,exon.to.gene.ids[rownames(x)],rownames(x))
exonCounts<-estimateSizeFactors(exonCounts)
exonCounts<-estimateDispersions(exonCounts,nCores=16,minCount=5,maxExon=100)
exonCounts<-fitDispersionFunction(exonCounts)

b<-rowMeans(counts(exonCounts))
plot(b, fData(exonCounts)$dispBeforeSharing, log = "xy",
main="mean vs CR dispersion")
x<-0.01:max(b)
y<-exonCounts@dispFitCoefs[1] + exonCounts@dispFitCoefs[2]/x
lines(x, y, col = "red")

exonCounts<-testForDEU(exonCounts)
exon.res<-DEUresultTable(exonCounts)
table(exon.res$padjust<0.05)
z<-norm.ex.first[as.character(exon.res[,2]),]
x<-apply(z,1,max)
y<-apply(z,1,min)
blue<-x/y
x<-which(poo>=2)
y<-exon.res[x,]
x<-which(y$padjust<0.05)
z<-y[x,]
exon.res<-z
exon.res<-cbind(exon.res,blue[exon.res[,2]])
colnames(exon.res)<-c(colnames(exon.res[,1:6]),"MAX/MIN")

x<-unique(exon.res$geneID)
y<-rownames(sig.genes)

sig.diff.exon<-norm.RNA.cts[intersect(x,y),]


#########clean up R session###############
rm(x)
rm(y)
rm(z)
rm(blue)
rm(bob)
rm(Sue)
###########enrichment analysis of spliced genes##########
exon.cluster<-foreach(i=1:64) %dopar% {
x<-intersect(rownames(sig.diff.exon),clusters[[i]])
}
names(exon.cluster)<-names(clusters)


exonperclus<-foreach(i=1:64, .combine='c') %dopar% {
x<-length(exon.cluster[[i]])/length(cluster[[i]])
x*100
}

go.cluster.exon<-go.cluster.enrich(exon.cluster,c("GO:CC","GO:BP","GO:MF"))
BP.go.cluster.exon<-go.cluster.enrich(exon.cluster,"GO:BP")
write.go.cl(BP.go.cluster.exon,"BP go exon")


disease.cluster.exon<-dis.cluster.enrich(exon.cluster,disease)
names(disease.cluster.exon)<-names(exon.cluster)


write.dis.cl(disease.cluster.exon,"Exon Diseases")


kegg.exon<-SPIAcluster(exon.cluster)


###########################################################
#############establishing stages##############################
###########run each cluster through a set of rules for each stage############
##########1 = have a positive rise (for pluri a negative rise) or be a continuation of earlier peak####################
##########2 = check check percentage of rise out of total range#####################
###########3 = have three people review the determined stages and rule if it fits or not########
############################################################################################

#########make a matrix of diffeerences in rise between timepoints##############
b<-cluster.obj[[1]]

x<-cbind(b[,2]-b[,1],b[,3]-b[,2],b[,4]-b[,3],b[,5]-b[,4],b[,6]-b[,5],b[,7]-b[,6],b[,8]-b[,7],b[,9]-b[,8])

######find pluirpotency stage genes###########
y<-which(x[,1]<(-0.5))
###gives 5  6  7  9 10 14 15 20 22 24 26 27 31 35 42 45 46 47 50 51 53 57 58 59 60 61 63#####
m<-which(x[names(y),1]>-1)
########bottom four 6 10 45 46 ####
#####kept 6 and 10#######


######ND stage##########
y<-which(x[,1]>0.0)
#### gives 1  2  3  4  8 11 12 13 16 17 18 19 21 23 25 28 29 30 32 33 34 36 37 38 39 40 41 43 44 48 49 52 54 55 56 62 64 ####
z<-b[y,]
z<-(apply(z,1,max)-apply(z,1,min))
which((x[y,1]/z)>0.10)
######## 2  3  4 12 13 16 18 19 21 23 25 28 29 32 33 34 36 37 38 40 41 43 44 48 49 52 55 56 64#########
##########bottom two left 28 and 49, kept 28######

######CS stage##########
m<-apply(b[,3:4],1,max)
m<-m-b[,2]
y<-m[which(m>0.1)]

y1<-x[which(x[,3]>0.25),3]
y2<-x[which(x[,2]>0.25),2]

blue<-unique(c(names(y),names(y1),names(y2)))
#### gives 1  2  3  4  8 9 10 11 12 14 16 17 18 21 23 24 26 28 30 32 33 34 36 38 40 42 43 44 45 46 49 51 53 54 55 56 57 58 59 60 61 62 63 ####
m[blue]
x[blue,2]
x[blue,3]

z<-b[names(y),]
z<-(apply(z,1,max)-apply(z,1,min))
(y-b[names(y),2])/z

z<-b[names(y1),]
z<-(apply(z,1,max)-apply(z,1,min))
(y1-b[names(y1),3])/z

z<-b[names(y2),]
z<-(apply(z,1,max)-apply(z,1,min))
(y2-b[names(y2),2])/z
#####reviewed and kept 1  2  3  4  8 9 10 12 14 17 18 21 24 26 28 30 32 33 34 36 38 40 42 43 44 45 46 49 51 53 54 55 56 57 61 62 63#########

####################Deep Layer stage###################################
m<-apply(b[,5:7],1,max)
m<-m-b[,2]
y<-m[which(m>0.1)]

y1<-x[which(x[,4]>0.25),4]
y2<-x[which(x[,5]>0.25),5]
y3<-x[which(x[,6]>0.25),6]

blue<-unique(c(names(y),names(y1),names(y2),names(y3)))
######1  2  4  6  8  9 10 11 12 13 16 17 19 20 21 23 24 25 26 28 $29 30 32 33 34 #########
######35 36 38 39 40 43 44 45 46 47 49 51 53 54 55 56 57 $58 59 60 $61 $62 63##############
m[blue]
x[blue,4]
x[blue,5]
x[blue,6]

z<-b[names(y),]
z<-(apply(z,1,max)-apply(z,1,min))
(y-b[names(y),4])/z

z<-b[names(y1),]
z<-(apply(z,1,max)-apply(z,1,min))
(y1-b[names(y1),4])/z

z<-b[names(y2),]
z<-(apply(z,1,max)-apply(z,1,min))
(y2-b[names(y2),5])/z

z<-b[names(y3),]
z<-(apply(z,1,max)-apply(z,1,min))
(y3-b[names(y3),6])/z

######kept 2  4  6  11 16 17 19 20 21 23 24 25 26 30 32 33 36 38 43 44 45 46 47 49 51 53 54 55  57  59 60 added 3 ##############

####################Upper Layer stage###################################
m<-apply(b[,8:9],1,max)
m<-m-b[,2]
y<-m[which(m>0.1)]

y1<-x[which(x[,7]>0.1),7]
y2<-x[which(x[,8]>0.1),8]

blue<-unique(c(names(y),names(y1),names(y2)))

#######1  2  $3  4  $5  7  8 $11 $12 13 16 17 $18 19 23 24 28 29 30 $32 $33 $34 35 $36 38 39 40 43 $44 45 $49 $52#########
#######53 $54 $55 56 57 58 $59 $60 61 62######################
m[blue]
x[blue,7]
x[blue,8]


z<-b[names(y),]
z<-(apply(z,1,max)-apply(z,1,min))
(y-b[names(y),7])/z

z<-b[names(y1),]
z<-(apply(z,1,max)-apply(z,1,min))
(y1-b[names(y1),7])/z

z<-b[names(y2),]
z<-(apply(z,1,max)-apply(z,1,min))
(y2-b[names(y2),8])/z

####### kept 1  2  4  7  8 13 16 17 19 23 24 28 29 30 35 38 39 40 43 45 53 56 57 58 61 62######################

#####create weieghted averages of stages to check them###############
#######group clusters from each stage into how many stages each cluster belongs to#############
#######weighted ave. = sum("correction factor"*"sizes of clusters"* "cluster center at each timepoint")/sum("sizes of clusters").../4 ######
########correction factor =1-(membership-1)#############
x<-cluster.obj[[1]]
y<-cluster.obj[[2]]


pl4<-c(53,57)
pl2<-c(6,7,9,10,20,35,42,47,59,60,63)
pl3<-c(26,51)
pl1<-c(5,15,22,27,31,50)

pluri<-foreach(i=1:9,.combine='c') %dopar% {
xp1<-x[pl1,]
xp2<-x[pl2,]
xp3<-x[pl3,]
xp4<-x[pl4,]
yp1<-y[pl1]
yp2<-y[pl2]
yp3<-y[pl3]
yp4<-y[pl4]

(sum(yp1*xp1[,i])/sum(yp1)+sum(0.75*yp2*xp2[,i])/sum(yp2)+sum(0.5*yp3*xp3[,i])/sum(yp3)+sum(0.25*yp4*xp4[,i])/sum(yp4))/4}


plot(c(0,7,12,19,26,33,49,63,77),pluri,"line")

pl4<-c(2,4,38,43)
pl3<-c(3,16,19,21,23,32,33,36,40,44,49,55,56)
pl2<-c(12,13,18,25,29,34)
pl1<-c(37,41,48,52,64)

nd<-foreach(i=1:9,.combine='c') %dopar% {
xp1<-x[pl1,]
xp2<-x[pl2,]
xp3<-x[pl3,]
xp4<-x[pl4,]
yp1<-y[pl1]
yp2<-y[pl2]
yp3<-y[pl3]
yp4<-y[pl4]

(sum(yp1*xp1[,i])/sum(yp1)+sum(0.75*yp2*xp2[,i])/sum(yp2)+sum(0.5*yp3*xp3[,i])/sum(yp3)+sum(0.25*yp4*xp4[,i])/sum(yp4))/4}

plot(c(0,7,12,19,26,33,49,63,77),nd,"line")

pl4<-c(2,4,38,43,53,57)
pl3<-c(3,17,21,24,26,30,33,36,40,44,45,49,51,55,56)
pl2<-c(1,8,9,10,12,18,28,34,42,46,54,61,62,63)
pl1<-c(14)

cs<-foreach(i=1:9,.combine='c') %dopar% {
xp1<-x[pl1,]
xp2<-x[pl2,]
xp3<-x[pl3,]
xp4<-x[pl4,]
yp1<-y[pl1]
yp2<-y[pl2]
yp3<-y[pl3]
yp4<-y[pl4]

(sum(yp1*xp1[i])/sum(yp1)+sum(0.75*yp2*xp2[,i])/sum(yp2)+sum(0.5*yp3*xp3[,i])/sum(yp3)+sum(0.25*yp4*xp4[,i])/sum(yp4))/4}

plot(c(0,7,12,19,26,33,49,63,77),cs,"line")


pl4<-c(2,4,38,43,53,57)
pl3<-c(3,16,17,19,21,23,24,26,30,32,33,36,44,45,49,51,55)
pl2<-c(6,20,25,46,47,54,59,60)
pl1<-c(11)

dl<-foreach(i=1:9,.combine='c') %dopar% {
xp1<-x[pl1,]
xp2<-x[pl2,]
xp3<-x[pl3,]
xp4<-x[pl4,]
yp1<-y[pl1]
yp2<-y[pl2]
yp3<-y[pl3]
yp4<-y[pl4]

(sum(yp1*xp1[i])/sum(yp1)+sum(0.75*yp2*xp2[,i])/sum(yp2)+sum(0.5*yp3*xp3[,i])/sum(yp3)+sum(0.25*yp4*xp4[,i])/sum(yp4))/4}
plot(c(0,7,12,19,26,33,49,63,77),dl,"line")


pl4<-c(2,4,38,43,53,57)
pl3<-c(16,17,19,23,24,30,40,45,56)
pl2<-c(1,7,8,13,18,28,29,35,42,46,61,62)
pl1<-c(39,58)

ul<-foreach(i=1:9,.combine='c') %dopar% {
xp1<-x[pl1,]
xp2<-x[pl2,]
xp3<-x[pl3,]
xp4<-x[pl4,]
yp1<-y[pl1]
yp2<-y[pl2]
yp3<-y[pl3]
yp4<-y[pl4]

(sum(yp1*xp1[,i])/sum(yp1)+sum(0.75*yp2*xp2[,i])/sum(yp2)+sum(0.5*yp3*xp3[,i])/sum(yp3)+sum(0.25*yp4*xp4[,i])/sum(yp4))/4}
plot(c(0,7,12,19,26,33,49,63,77),ul,"line")

w.centers<-rbind(pluri,nd,cs,dl,ul)
write.csv(w.centers,"weighted centers.csv")
Stage.names<-c("Pluirpotency","Neural Diff", "Cortical Specification","Deep layers","Upper Layers")

pdf("Ave Centers.pdf",onefile=T,width=11)
for(i in 1:5) {
z<-Stage.names[i]
plot(c(0,7,12,19,26,33,49,63,77),w.centers[i,], type="line",col="darkblue",xlab="Days post Induction",ylab="Relative Change in Expression",main=z)}
dev.off()


################set up stages list################
Stages<-list(unique(c(cluster.first[[5]],cluster.first[[6]],cluster.first[[7]],cluster.first[[9]],cluster.first[[10]],cluster.first[[15]],cluster.first[[20]],cluster.first[[22]],cluster.first[[26]],cluster.first[[27]],cluster.first[[31]],cluster.first[[35]],cluster.first[[42]],cluster.first[[47]],cluster.first[[50]],cluster.first[[51]],cluster.first[[53]],cluster.first[[57]],cluster.first[[59]],cluster.first[[60]],cluster.first[[63]])),

unique(c(cluster.first[[2]],cluster.first[[3]],cluster.first[[4]],cluster.first[[12]],cluster.first[[13]],cluster.first[[16]],cluster.first[[18]],cluster.first[[19]],cluster.first[[21]],cluster.first[[23]],cluster.first[[25]],cluster.first[[29]],cluster.first[[32]],cluster.first[[33]],cluster.first[[34]],cluster.first[[36]],cluster.first[[37]],cluster.first[[38]],cluster.first[[40]],cluster.first[[41]],cluster.first[[43]],cluster.first[[44]],cluster.first[[48]],cluster.first[[49]],cluster.first[[52]],cluster.first[[55]],cluster.first[[56]],cluster.first[[64]])),

unique(c(cluster.first[[1]],cluster.first[[2]],cluster.first[[3]],cluster.first[[4]],cluster.first[[8]],cluster.first[[9]],cluster.first[[10]],cluster.first[[12]],cluster.first[[14]],cluster.first[[17]],cluster.first[[18]],cluster.first[[21]],cluster.first[[24]],cluster.first[[26]],cluster.first[[28]],cluster.first[[30]],cluster.first[[32]],cluster.first[[33]],cluster.first[[34]],cluster.first[[36]],cluster.first[[38]],cluster.first[[40]],cluster.first[[42]],cluster.first[[43]],cluster.first[[44]],cluster.first[[45]],cluster.first[[46]],cluster.first[[49]],cluster.first[[51]],cluster.first[[53]],cluster.first[[54]],cluster.first[[55]],cluster.first[[56]],cluster.first[[57]],cluster.first[[61]],cluster.first[[62]],cluster.first[[63]])),

unique(c(cluster.first[[2]],cluster.first[[3]],cluster.first[[4]],cluster.first[[6]],cluster.first[[11]],cluster.first[[16]],cluster.first[[17]],cluster.first[[19]],cluster.first[[20]],cluster.first[[21]],cluster.first[[23]],cluster.first[[24]],cluster.first[[25]],cluster.first[[26]],cluster.first[[30]],cluster.first[[32]],cluster.first[[33]],cluster.first[[36]],cluster.first[[38]],cluster.first[[43]],cluster.first[[44]],cluster.first[[45]],cluster.first[[46]],cluster.first[[47]],cluster.first[[49]],cluster.first[[51]],cluster.first[[53]],cluster.first[[54]],cluster.first[[55]],cluster.first[[57]],cluster.first[[59]],cluster.first[[60]])),

unique(c(cluster.first[[1]],cluster.first[[2]],cluster.first[[4]],cluster.first[[7]],cluster.first[[8]],cluster.first[[13]],cluster.first[[16]],cluster.first[[17]],cluster.first[[19]],cluster.first[[23]],cluster.first[[24]],cluster.first[[28]],cluster.first[[29]],cluster.first[[30]],cluster.first[[35]],cluster.first[[38]],cluster.first[[39]],cluster.first[[40]],cluster.first[[43]],cluster.first[[45]],cluster.first[[53]],cluster.first[[56]],cluster.first[[57]],cluster.first[[58]],cluster.first[[61]],cluster.first[[62]])))

names(Stages)<-Stage.names

#############stages with exon changing genes###########
Stages.ex<-foreach(i=1:5) %dopar% {
intersect(rownames(sig.diff.exon),Stages[[i]])}

######clean up R Session####
rm(b)
rm(x)
rm(z)
rm(y)
rm(y1)
rm(y2)
rm(y3)
rm(blue)
rm(xp1)
rm(xp2)
rm(xp3)
rm(xp4)
rm(yp1)
rm(yp2)
rm(yp3)
rm(yp4)
rm(ul)
rm(dl)
rm(cs)
rm(nd)
rm(pluri)
rm(pl1)
rm(pl2)
rm(pl3)
rm(pl4)

#################Enrichment analysis with stages for Disease, GO, and Kegg databases###########

stage.disease<-dis.cluster.enrich(Stages,disease)
names(stage.disease)<-Stage.names

stage.disease.ex<-dis.cluster.enrich(Stages.ex,disease)
names(stage.disease.ex)<-Stage.names

stage.go<-go.cluster.enrich(Stages,c("GO:CC","GO:BP","GO:MF"))
names(stage.go)<-Stage.names

stage.go.BP<-go.cluster.enrich(Stages,"GO:BP")
names(stage.go.BP)<-Stage.names

stage.go.ex<-go.cluster.enrich(Stages.ex,c("GO:CC","GO:BP","GO:MF"))
names(stage.go)<-Stage.names

stage.go.BP.ex<-go.cluster.enrich(Stages.ex,"GO:BP")
names(stage.go.BP)<-Stage.names

kegg.stages<-SPIAcluster(Stages)
names(kegg.stages)<-Stage.names

############calculating z score from p values for KEGG.stages###################
write.csv(cbind(all.Kegg.first,qnorm(1-(all.Kegg.first[,10]/2))),"Kegg table Zscore.csv")

###########write tables for stage enrichments##############
write.dis.cl(stage.disease,"Stage Diseases")
write.dis.cl(stage.disease.ex,"Stage Diseases exon")
write.go.stage(stage.go.BP,Stages,"GO BP stages")
write.go.stage(stage.go.BP.ex,Stages,"GO BP stages exon")
write.kegg.stag(kegg.stages,"Kegg stages")


########Analyzing stages utilizing co-citation network for central genes###########
net.stages<-vital.nodes.cl(connect.gr,Stages,1)
names(net.stages)<-Stage.names


###################################################
###############making figures#####################
##################################################


###########Kegg cluster panel############

blue<-read.delim("Kegg clusters for fig.csv",row.names=1)
blue<-data.frame(blue)
p<-ggplot(blue,aes(Cluster,Name,colour=Color,shape=Status)) 
p<-p + scale_colour_manual(name="Color",values=c("Other"="black","Immune and Infectious Disease"="red3","Cancer"="purple","Canonical Signaling"="green2","Cell Processes"="orange","Nervous System"="blue3"))
p<-p +layer(geom="point",size=3)
p<-p + ylab("")
p<-p +scale_x_continuous(breaks=c(0,5,10,15,20,25,30,35,40,45,50,55,60,64),limits=c(0,64))
p<-p + theme(panel.grid.minor=element_line(color="white",size=1))
p<-p + theme(axis.text=element_text(size="10", color="black"))
p<-p + theme(panel.grid.minor=element_line(color="grey90",size=0.5))
p<-p + theme(panel.grid.major=element_line(color="grey90",size=1))
p<-p + theme(axis.title.x=element_text(size="12", color="black"))
p<-p + theme(legend.text=element_text(size="12", color="black"))
p<-p + theme(legend.title=element_text(size="12", color="black"))
p<-p + theme(panel.background=element_rect(fill="ivory"))
p<-p + theme(panel.background=element_rect(size=0.5))
p<-p + theme(panel.background=element_rect(colour="grey36"))

postscript("KEGG clusters.eps", horizontal = FALSE, onefile = TRUE, paper = "special", height=11,width=8.5)
plot(p)
dev.off()

########Disease cluster panel##########
blue<-read.delim("Disease cluster for fig.csv",row.names=1)
blue<-data.frame(blue)
p<-ggplot(blue,aes(Cluster,Disease,colour=Color)) 
p<-p + scale_colour_manual(name="Color",values=c("Other "="black","Cancer"="red3","Eye Diseases"="purple","Nervous System Diseases"="blue"))
p<-p +layer(geom="point",size=3)
p<-p + ylab("")
p<-p +scale_x_continuous(breaks=c(0,5,10,15,20,25,30,35,40,45,50,55,60,64),limits=c(0,64))
p<-p + theme(panel.grid.minor=element_line(color="white",size=1))
p<-p + theme(axis.text=element_text(size="10", color="black"))
p<-p + theme(panel.grid.minor=element_line(color="grey90",size=0.5))
p<-p + theme(panel.grid.major=element_line(color="grey90",size=1))
p<-p + theme(axis.title.x=element_text(size="12", color="black"))
p<-p + theme(legend.text=element_text(size="12", color="black"))
p<-p + theme(legend.title=element_text(size="12", color="black"))
p<-p + theme(panel.background=element_rect(fill="ivory"))
p<-p + theme(panel.background=element_rect(size=0.5))
p<-p + theme(panel.background=element_rect(colour="grey36"))

postscript("Disease clusters.eps", horizontal = FALSE, onefile = TRUE, paper = "special", height=11,width=8.5)
plot(p)
dev.off()

#####GO:BP cluster panel#######
x<-as.matrix(read.csv("Goslim for fig.csv"))


blue<-foreach(i=1:length(GO.BP.cluster), .combine='rbind') %dopar% {
x<-GO.BP.cluster[[i]]
if (dim(x)[1]==0){}else{
y<-GOnames[x[,1]]
names(y)<-rownames(x)
cluster<-rep(i,dim(x)[1])
cbind(y,x,cluster)}}

y<-intersect(x[,1],blue[,2])
z<-x[,2]
names(z)<-x[,1]
blue<-foreach(i=1:length(y),.combine='rbind') %dopar% {
x<-blue[which(blue[,2]==y[i]),]
Group<-z[y[i]]
cbind(x,Group)
}


blue<-data.frame(blue)
p<-ggplot(blue,aes(cluster,category,colour=Group)) 
p<-p + scale_colour_manual(name="Color",values=c("Other Developmental Processes"="black","Immune system development"="purple","Circulatory system development"="red3","Embryo development"="green2","Urogenital system development"="gold","Nervous system development"="blue3", "CNS development"="lightblue1"))
p<-p +layer(geom="point",size=3)
p<-p + ylab("")
p<-p +scale_x_continuous(breaks=c(0,5,10,15,20,25,30,35,40,45,50,55,60,64),limits=c(0,64))
p<-p + theme(panel.grid.minor=element_line(color="white",size=1))
p<-p + theme(axis.text=element_text(size="10", color="black"))
p<-p + theme(panel.grid.minor=element_line(color="grey90",size=0.5))
p<-p + theme(panel.grid.major=element_line(color="grey90",size=1))
p<-p + theme(axis.title.x=element_text(size="12", color="black"))
p<-p + theme(legend.text=element_text(size="12", color="black"))
p<-p + theme(legend.title=element_text(size="12", color="black"))
p<-p + theme(panel.background=element_rect(fill="ivory"))
p<-p + theme(panel.background=element_rect(size=0.5))
p<-p + theme(panel.background=element_rect(colour="grey36"))

postscript("GO clusters.eps", horizontal = FALSE, onefile = TRUE, paper = "special", height=11,width=8.5)
plot(p)
dev.off()


#######Exon Disease clusters panel###########
blue<-read.delim("Exon Diseases.csv",row.names=1)
blue<-data.frame(blue)
p<-ggplot(blue,aes(Cluster,Disease,colour=Color)) 
p<-p + scale_colour_manual(name="Color",values=c("Other"="black","Cancer"="red3","Eye Diseases"="purple","Nervous System Diseases"="blue"))
p<-p +layer(geom="point",size=3)
p<-p + ylab("")
p<-p +scale_x_continuous(breaks=c(0,5,10,15,20,25,30,35,40,45,50,55,60,64),limits=c(0,64))
p<-p + theme(panel.grid.minor=element_line(color="white",size=1))
p<-p + theme(axis.text=element_text(size="10", color="black"))
p<-p + theme(panel.grid.minor=element_line(color="grey90",size=0.5))
p<-p + theme(panel.grid.major=element_line(color="grey90",size=1))
p<-p + theme(axis.title.x=element_text(size="12", color="black"))
p<-p + theme(legend.text=element_text(size="12", color="black"))
p<-p + theme(legend.title=element_text(size="12", color="black"))
p<-p + theme(panel.background=element_rect(fill="ivory"))
p<-p + theme(panel.background=element_rect(size=0.5))
p<-p + theme(panel.background=element_rect(colour="grey36"))

postscript("Disease clusters exon.eps", horizontal = FALSE, onefile = TRUE, paper = "special", height=11,width=8.5)
plot(p)
dev.off()

############GO:BP exon cluster panel################
x<-foreach(i=1:64,.combine='rbind') %dopar% {
x<-BP.go.cluster.exon[[i]]
if(dim(x)[1]>=1) {
z<-(rep(i,dim(x)[1]))
cbind(x,z)} else{}
}
names(x)<-c(names(BP.go.cluster.exon),"cluster")
blue<-data.frame(x)
blue<-cbind(blue,GOnames[blue[,1]])
colnames(blue)<-c("GO","p value","fdr","Cluster","Name")

blue<-data.frame(blue)
p<-ggplot(blue,aes(Cluster,Name)) 

p<-p +layer(geom="point",size=3)
p<-p + ylab("")
p<-p +scale_x_continuous(breaks=c(0,5,10,15,20,25,30,35,40,45,50,55,60,64),limits=c(0,64))
p<-p + theme(panel.grid.minor=element_line(color="white",size=1))
p<-p + theme(axis.text=element_text(size="10", color="black"))
p<-p + theme(panel.grid.minor=element_line(color="grey90",size=0.5))
p<-p + theme(panel.grid.major=element_line(color="grey90",size=1))
p<-p + theme(axis.title.x=element_text(size="12", color="black"))
p<-p + theme(legend.text=element_text(size="12", color="black"))
p<-p + theme(legend.title=element_text(size="12", color="black"))
p<-p + theme(panel.background=element_rect(fill="ivory"))
p<-p + theme(panel.background=element_rect(size=0.5))
p<-p + theme(panel.background=element_rect(colour="grey36"))

postscript("GO clusters exon.eps", horizontal = FALSE, onefile = TRUE, paper = "special", height=11,width=8.5)
plot(p)
dev.off()

#############take autism genes and make a graph file (.gml)######
b<-driven("Autism",rownames(sig.genes))
b<-intersect(rownames(connect),b)
x<-induced.subgraph(connect.gr,b)

V(x)$label<-SYMBOLS[V(x)$name]
m<-V(x)$name
z<-intersect(m,Diffstages.st[[5]])
b<-foreach(i=1:length(z), .combine=c) %dopar% {which(m==z[i])}
m<-replace(m,b,"UL")
z<-intersect(m,Diffstages.st[[4]])
b<-foreach(i=1:length(z), .combine=c) %dopar% {which(m==z[i])}
m<-replace(m,b,"DL")
z<-intersect(m,Diffstages.st[[3]])
b<-foreach(i=1:length(z), .combine=c) %dopar% {which(m==z[i])}
m<-replace(m,b,"CS")
z<-intersect(m,Diffstages.st[[2]])
b<-foreach(i=1:length(z), .combine=c) %dopar% {which(m==z[i])}
m<-replace(m,b,"ND")
z<-intersect(m,Diffstages.st[[1]])
b<-foreach(i=1:length(z), .combine=c) %dopar% {which(m==z[i])}
m<-replace(m,b,"Pl")
z<-V(x)$name
z<-intersect(m,z)
b<-foreach(i=1:length(z), .combine=c) %dopar% {which(m==z[i])}
m<-replace(m,b,"Multi")

V(x)$color<-m


write.graph(x,"autism.GML","gml")