This report has goseq results for NGT versus T2D when:

  1. Genes are marked as differentially expressed with q value < 0.05
  2. Genes with positive effect from 1 marked as differentially expressed
  3. Genes with negative effect from 1 marked as differentially expressed

This report was generated on June 15 2015

Goseq results also saved in csv files located on snowwhite in directory: /net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/12junReps/csv

Step 1: Load in all the necessary data/libraries

library(goseq)
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
## Loading required package: DBI
library(qvalue)

fName <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/ngt_vs_t2d/peer_k03_all_genes.txt" 
outFile <- "ngt_t2d2"

data <- read.table(fName, as.is=T, header=T)

gene_length_file <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/length.composite.gene.models.gencode.v19"
gene_lengths = read.table(gene_length_file, header=T, as.is=T);
gene_lengths$gene = sapply(gene_lengths$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });

data$gene <- sapply(data$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });

data <- merge(data, gene_lengths, by="gene", all.x=T)
data <- data[order(data$p.value),]
data$q.value <- qvalue(data$p.value)$qvalues
data$rank <- seq(1,length(data[,1]))

minRow <- 20

Step 2: Create genes vectors

The first vector simply marks the genes with FDR < 0.1 as differentially expressed. The second and third vectors mark the genes with positive or negative effect in this list as differentially expressed.

genes <- as.numeric(data$q.value <= 0.05)
genesPos <- as.numeric(data$q.value <= 0.05 & data$effect > 0)
genesNeg <- as.numeric(data$q.value <= 0.05 & data$effect < 0)

names(genes) <- data$gene
names(genesPos) <- data$gene
names(genesNeg) <- data$gene

There are 15 DE genes with postive effect and 8 DE genes with negative effect.

Step 3: PWFs

pwf <- nullp(genes,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfPos=nullp(genesPos,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfNeg=nullp(genesNeg,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)

Step 4: run goseq

go <- goseq(pwf,"hg19","ensGene",test.cats=c("GO:BP"))
goPos <- goseq(pwfPos,"hg19","ensGene",test.cats=c("GO:BP"))
goNeg <- goseq(pwfNeg,"hg19","ensGene",test.cats=c("GO:BP"))

rownames(go) <- NULL
rownames(goPos) <- NULL
rownames(goNeg) <- NULL

# Fix problem with some p-values being slightly more than 1
go$over_represented_pvalue[go$over_represented_pvalue>1]=1;
go$under_represented_pvalue[go$under_represented_pvalue>1]=1;
goPos$over_represented_pvalue[goPos$over_represented_pvalue>1]=1;
goPos$under_represented_pvalue[goPos$under_represented_pvalue>1]=1;
goNeg$over_represented_pvalue[goNeg$over_represented_pvalue>1]=1;
goNeg$under_represented_pvalue[goNeg$under_represented_pvalue>1]=1;

go$q.value <- qvalue(go$over_represented_pvalue)$qvalues
goPos$q.value=qvalue(goPos$over_represented_pvalue)$qvalues
goNeg$q.value=qvalue(goNeg$over_represented_pvalue)$qvalues

go$q.value2 <- qvalue(go$under_represented_pvalue)$qvalues
goPos$q.value2=qvalue(goPos$under_represented_pvalue)$qvalues
goNeg$q.value2=qvalue(goNeg$under_represented_pvalue)$qvalues

FDR < 0.05

Over enriched categories (0)

rowN <- max(minRow, sum(go$q.value<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
category term numInCat numDEInCat q.value
GO:0021572 rhombomere 6 development 1 1 1
GO:0045646 regulation of erythrocyte differentiation 37 2 1
GO:0036034 mediator complex assembly 1 1 1
GO:2001176 regulation of mediator complex assembly 1 1 1
GO:2001178 positive regulation of mediator complex assembly 1 1 1
Other NA 1443 7 1
GO:0021730 trigeminal sensory nucleus development 1 1 1
GO:0021740 principal sensory nucleus of trigeminal nerve development 1 1 1
GO:2000795 negative regulation of epithelial cell proliferation involved in lung morphogenesis 1 1 1
GO:0048857 neural nucleus development 54 2 1
GO:0061141 lung ciliated cell differentiation 2 1 1
GO:0035283 central nervous system segmentation 3 1 1
GO:0035284 brain segmentation 3 1 1
GO:0021571 rhombomere 5 development 3 1 1
GO:2000790 regulation of mesenchymal cell proliferation involved in lung development 2 1 1
GO:2000791 negative regulation of mesenchymal cell proliferation involved in lung development 2 1 1
GO:0035563 positive regulation of chromatin binding 3 1 1
GO:0060486 Clara cell differentiation 3 1 1
GO:1901029 negative regulation of mitochondrial outer membrane permeabilization involved in apoptotic signaling pathway 5 1 1
GO:0032844 regulation of homeostatic process 279 3 1

Under enriched (0)

go <- go[order(go$under_represented_pvalue),]
rowN <- max(minRow, sum(go$q.value2<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
category term numInCat numDEInCat q.value2
697 GO:0008150 biological_process 13156 13 1
696 GO:0009987 cellular process 11765 12 1
695 GO:0044699 single-organism process 10473 10 1
5312 GO:0033036 macromolecule localization 2031 0 1
694 GO:0006950 response to stress 2911 1 1
2707 GO:0008104 protein localization 1760 0 1
693 GO:0051179 localization 4340 3 1
692 GO:0044763 single-organism cellular process 9512 10 1
744 GO:0045184 establishment of protein localization 1422 0 1
691 GO:0019538 protein metabolic process 4124 3 1
3568 GO:0015031 protein transport 1320 0 1
688 GO:0008152 metabolic process 9727 11 1
686 GO:0071704 organic substance metabolic process 8973 10 1
687 GO:0050896 response to stimulus 6127 6 1
2997 GO:0009719 response to endogenous stimulus 1136 0 1
718 GO:0070727 cellular macromolecule localization 1129 0 1
2399 GO:0006952 defense response 1181 0 1
729 GO:0034613 cellular protein localization 1124 0 1
2110 GO:0006508 proteolysis 1110 0 1
1612 GO:0003008 system process 1112 0 1

Positive Effect

Over enriched categories (0)

rowN <- max(minRow, sum(goPos$q.value<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
category term numInCat numDEInCat q.value
GO:0045646 regulation of erythrocyte differentiation 37 2 1
GO:0036034 mediator complex assembly 1 1 1
GO:2001176 regulation of mediator complex assembly 1 1 1
GO:2001178 positive regulation of mediator complex assembly 1 1 1
GO:0021572 rhombomere 6 development 1 1 1
GO:0035563 positive regulation of chromatin binding 3 1 1
GO:0021571 rhombomere 5 development 3 1 1
GO:0035283 central nervous system segmentation 3 1 1
GO:0035284 brain segmentation 3 1 1
GO:0030218 erythrocyte differentiation 95 2 1
GO:0034101 erythrocyte homeostasis 101 2 1
GO:1901029 negative regulation of mitochondrial outer membrane permeabilization involved in apoptotic signaling pathway 5 1 1
GO:0002262 myeloid cell homeostasis 117 2 1
GO:0006357 regulation of transcription from RNA polymerase II promoter 1306 5 1
GO:0021546 rhombomere development 6 1 1
GO:0043619 regulation of transcription from RNA polymerase II promoter in response to oxidative stress 8 1 1
GO:0046886 positive regulation of hormone biosynthetic process 8 1 1
GO:0042090 interleukin-12 biosynthetic process 8 1 1
GO:0045075 regulation of interleukin-12 biosynthetic process 8 1 1
GO:0035561 regulation of chromatin binding 8 1 1

Under enriched (0)

goPos <- goPos[order(goPos$under_represented_pvalue),]
rowN <- max(minRow, sum(goPos$q.value2<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
category term numInCat numDEInCat q.value2
441 GO:0008150 biological_process 13156 9 1
440 GO:0044699 single-organism process 10473 6 1
439 GO:0050896 response to stimulus 6127 2 1
437 GO:0009987 cellular process 11765 8 1
438 GO:0051179 localization 4340 1 1
434 GO:0044763 single-organism cellular process 9512 6 1
433 GO:0051716 cellular response to stimulus 4976 2 1
11466 GO:1901564 organonitrogen compound metabolic process 2215 0 1
436 GO:0051234 establishment of localization 3588 1 1
432 GO:0044710 single-organism metabolic process 4810 2 1
435 GO:0006810 transport 3504 1 1
12713 GO:0033036 macromolecule localization 2031 0 1
10235 GO:0071702 organic substance transport 1991 0 1
491 GO:0035556 intracellular signal transduction 1996 0 1
431 GO:0007154 cell communication 4561 2 1
11359 GO:1901135 carbohydrate derivative metabolic process 1916 0 1
429 GO:0023052 signaling 4498 2 1
430 GO:0044700 single organism signaling 4498 2 1
424 GO:0008152 metabolic process 9727 7 1
12722 GO:0019637 organophosphate metabolic process 1768 0 1

Negative Effect

Over enriched categories (0)

category term numInCat numDEInCat q.value
GO:0021730 trigeminal sensory nucleus development 1 1 1
GO:0021740 principal sensory nucleus of trigeminal nerve development 1 1 1
GO:2000795 negative regulation of epithelial cell proliferation involved in lung morphogenesis 1 1 1
GO:0061141 lung ciliated cell differentiation 2 1 1
GO:2000790 regulation of mesenchymal cell proliferation involved in lung development 2 1 1
GO:2000791 negative regulation of mesenchymal cell proliferation involved in lung development 2 1 1
GO:0060486 Clara cell differentiation 3 1 1
GO:0060916 mesenchymal cell proliferation involved in lung development 4 1 1
GO:0007196 adenylate cyclase-inhibiting G-protein coupled glutamate receptor signaling pathway 4 1 1
GO:0061179 negative regulation of insulin secretion involved in cellular response to glucose stimulus 6 1 1
GO:0021960 anterior commissure morphogenesis 5 1 1
GO:0072201 negative regulation of mesenchymal cell proliferation 6 1 1
GO:0060510 Type II pneumocyte differentiation 6 1 1
GO:0060509 Type I pneumocyte differentiation 6 1 1
GO:0071679 commissural neuron axon guidance 6 1 1
GO:2000794 regulation of epithelial cell proliferation involved in lung morphogenesis 7 1 1
GO:0007216 G-protein coupled glutamate receptor signaling pathway 7 1 1
GO:0060502 epithelial cell proliferation involved in lung morphogenesis 8 1 1
GO:0061140 lung secretory cell differentiation 10 1 1
GO:2000647 negative regulation of stem cell proliferation 13 1 1

Under enriched (0)

category term numInCat numDEInCat q.value2
415 GO:0008150 biological_process 13156 4 1
414 GO:0043170 macromolecule metabolic process 7144 1 1
3863 GO:0019538 protein metabolic process 4124 0 1
413 GO:0044260 cellular macromolecule metabolic process 6487 1 1
448 GO:0044267 cellular protein metabolic process 3491 0 1
2151 GO:0006950 response to stress 2911 0 1
447 GO:0043412 macromolecule modification 2854 0 1
1827 GO:0006464 cellular protein modification process 2739 0 1
6022 GO:0036211 protein modification process 2739 0 1
2179 GO:0006996 organelle organization 2485 0 1
412 GO:0009987 cellular process 11765 4 1
5156 GO:0033036 macromolecule localization 2031 0 1
411 GO:0060255 regulation of macromolecule metabolic process 4403 1 1
406 GO:0071704 organic substance metabolic process 8973 3 1
410 GO:0010467 gene expression 4325 1 1
1031 GO:0002376 immune system process 1939 0 1
6871 GO:0044085 cellular component biogenesis 1825 0 1
409 GO:0090304 nucleic acid metabolic process 4168 1 1
403 GO:0044238 primary metabolic process 8723 3 1
2477 GO:0008104 protein localization 1760 0 1

Final Step: csv output

write.csv(go,file=paste("csv/", outFile,"_main.csv",sep=''), row.names=FALSE)
write.csv(goPos,file=paste("csv/", outFile,"Pos.csv",sep=''), row.names=FALSE)
write.csv(goNeg,file=paste("csv/", outFile,"Neg.csv",sep=''), row.names=FALSE)