This report has goseq results for 60 minute glucose when:

  1. Top 1000 genes are marked as differentially expressed
  2. Top genes with positive effect in top 1000 overall genes are marked as differentially expressed
  3. Top genes with negative effect in top 1000 overall genes are marked as differentially expressed

This report was generated on June 21 2015

Goseq results also saved in csv files located on snowwhite in directory: /net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/12junReps/csv

Step 1: Load in all the necessary data/libraries

library(goseq)
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
## Loading required package: DBI
library(qvalue)

fName <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/traits/peer_k01_GL60_all_genes.txt"
outFile <- "GL60"

data <- read.table(fName, as.is=T, header=T)

gene_length_file <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/length.composite.gene.models.gencode.v19"
gene_lengths = read.table(gene_length_file, header=T, as.is=T);
gene_lengths$gene = sapply(gene_lengths$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });

data$gene <- sapply(data$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });

data <- merge(data, gene_lengths, by="gene", all.x=T)
data <- data[order(data$p.value),]
data$q.value <- qvalue(data$p.value)$qvalues
data$rank <- seq(1,length(data[,1]))

minRow <- 20

Step 2: Create genes vectors

The first vector simply marks the top 1000 genes as differentially expressed. The second and third vectors mark the genes with positive or negative effect in the top 1000 as differentially expressed.

genes <- as.numeric(data$rank <= 1000)
genesPos <- as.numeric(data$rank <= 1000 & data$effect > 0)
genesNeg <- as.numeric(data$rank <= 1000 & data$effect < 0)

names(genes) <- data$gene
names(genesPos) <- data$gene
names(genesNeg) <- data$gene

There are 535 DE genes with postive effect and 465 DE genes with negative effect.

Step 3: PWFs

pwf <- nullp(genes,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfPos=nullp(genesPos,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfNeg=nullp(genesNeg,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)

Step 4: run goseq

go <- goseq(pwf,"hg19","ensGene",test.cats=c("GO:BP"))
goPos <- goseq(pwfPos,"hg19","ensGene",test.cats=c("GO:BP"))
goNeg <- goseq(pwfNeg,"hg19","ensGene",test.cats=c("GO:BP"))

rownames(go) <- NULL
rownames(goPos) <- NULL
rownames(goNeg) <- NULL

# Fix problem with some p-values being slightly more than 1
go$over_represented_pvalue[go$over_represented_pvalue>1]=1;
go$under_represented_pvalue[go$under_represented_pvalue>1]=1;
goPos$over_represented_pvalue[goPos$over_represented_pvalue>1]=1;
goPos$under_represented_pvalue[goPos$under_represented_pvalue>1]=1;
goNeg$over_represented_pvalue[goNeg$over_represented_pvalue>1]=1;
goNeg$under_represented_pvalue[goNeg$under_represented_pvalue>1]=1;

go$q.value <- qvalue(go$over_represented_pvalue)$qvalues
goPos$q.value=qvalue(goPos$over_represented_pvalue)$qvalues
goNeg$q.value=qvalue(goNeg$over_represented_pvalue)$qvalues

go$q.value2 <- qvalue(go$under_represented_pvalue)$qvalues
goPos$q.value2=qvalue(goPos$under_represented_pvalue)$qvalues
goNeg$q.value2=qvalue(goNeg$under_represented_pvalue)$qvalues

go <- go[which(go$numInCat < 1000),]
goPos <- goPos[which(goPos$numInCat < 1000),]
goNeg <- goNeg[which(goNeg$numInCat < 1000),]

Top 1000 Results

Over enriched categories (8)

rowN <- max(minRow, sum(go$q.value<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
category term numInCat numDEInCat q.value
1 GO:0055114 oxidation-reduction process 896 87 0.0008603
2 GO:0045333 cellular respiration 158 25 0.0029023
3 GO:0015980 energy derivation by oxidation of organic compounds 319 40 0.0029023
4 GO:0006091 generation of precursor metabolites and energy 394 45 0.0064656
5 GO:0022904 respiratory electron transport chain 105 18 0.0085634
6 GO:0022900 electron transport chain 106 18 0.0085634
7 GO:0071482 cellular response to light stimulus 74 15 0.0240740
8 GO:0071478 cellular response to radiation 116 19 0.0383190
10 GO:0034644 cellular response to UV 47 11 0.0512798
11 GO:0042773 ATP synthesis coupled electron transport 54 11 0.0512798
12 GO:0042775 mitochondrial ATP synthesis coupled electron transport 54 11 0.0512798
14 GO:0006120 mitochondrial electron transport, NADH to ubiquinone 42 9 0.1252777
15 GO:0006119 oxidative phosphorylation 70 12 0.1277451
16 GO:0033762 response to glucagon 38 9 0.1727691
17 GO:0050878 regulation of body fluid levels 579 54 0.1727691
18 GO:0031960 response to corticosteroid 109 16 0.1727691
20 GO:0035924 cellular response to vascular endothelial growth factor stimulus 31 8 0.1861038
21 GO:0071384 cellular response to corticosteroid stimulus 25 7 0.1861038
23 GO:0051384 response to glucocorticoid 101 15 0.1861038
26 GO:0042060 wound healing 595 54 0.2277454

Under enriched (0)

go <- go[order(go$under_represented_pvalue),]
rowN <- max(minRow, sum(go$q.value2<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
category term numInCat numDEInCat q.value2
5681 GO:0051090 regulation of sequence-specific DNA binding transcription factor activity 304 7 1
5680 GO:0006643 membrane lipid metabolic process 148 2 1
7806 GO:0006368 transcription elongation from RNA polymerase II promoter 82 0 1
5672 GO:0034660 ncRNA metabolic process 325 9 1
5677 GO:0016197 endosomal transport 188 4 1
8373 GO:0042158 lipoprotein biosynthetic process 70 0 1
7816 GO:0006497 protein lipidation 67 0 1
5678 GO:0072659 protein localization to plasma membrane 129 2 1
5679 GO:0002831 regulation of response to biotic stimulus 97 1 1
8129 GO:0030316 osteoclast differentiation 63 0 1
5676 GO:0006664 glycolipid metabolic process 90 1 1
5654 GO:0006396 RNA processing 676 27 1
5663 GO:0043900 regulation of multi-organism process 273 8 1
5669 GO:0070588 calcium ion transmembrane transport 140 3 1
7846 GO:0006672 ceramide metabolic process 57 0 1
5675 GO:0046467 membrane lipid biosynthetic process 89 1 1
7724 GO:0002456 T cell mediated immunity 59 0 1
5674 GO:0043901 negative regulation of multi-organism process 93 1 1
5671 GO:0045185 maintenance of protein location 116 2 1
5673 GO:0060491 regulation of cell projection assembly 88 1 1

Positive Effect

Over enriched categories (13)

rowN <- max(minRow, sum(goPos$q.value<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
category term numInCat numDEInCat q.value
3 GO:0090068 positive regulation of cell cycle process 196 20 0.0113000
6 GO:0034644 cellular response to UV 47 9 0.0170484
7 GO:0009411 response to UV 112 14 0.0170484
8 GO:0010498 proteasomal protein catabolic process 284 24 0.0170484
9 GO:0071478 cellular response to radiation 116 14 0.0181423
11 GO:0071482 cellular response to light stimulus 74 11 0.0181423
12 GO:0051276 chromosome organization 738 45 0.0268223
13 GO:0043161 proteasome-mediated ubiquitin-dependent protein catabolic process 265 22 0.0331790
14 GO:0000077 DNA damage checkpoint 148 15 0.0401151
17 GO:0031571 mitotic G1 DNA damage checkpoint 75 10 0.0424681
19 GO:0044819 mitotic G1/S transition checkpoint 76 10 0.0424681
21 GO:0044783 G1 DNA damage checkpoint 76 10 0.0424681
22 GO:0031570 DNA integrity checkpoint 154 15 0.0424681
24 GO:0071214 cellular response to abiotic stimulus 208 18 0.0516221
25 GO:0006977 DNA damage response, signal transduction by p53 class mediator resulting in cell cycle arrest 66 9 0.0532391
26 GO:0072431 signal transduction involved in mitotic G1 DNA damage checkpoint 67 9 0.0532391
27 GO:1902400 intracellular signal transduction involved in G1 DNA damage checkpoint 67 9 0.0532391
28 GO:0051726 regulation of cell cycle 757 43 0.0532391
29 GO:0045787 positive regulation of cell cycle 107 12 0.0532391
30 GO:0072401 signal transduction involved in DNA integrity checkpoint 68 9 0.0532391

Under enriched (0)

goPos <- goPos[order(goPos$under_represented_pvalue),]
rowN <- max(minRow, sum(goPos$q.value2<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
category term numInCat numDEInCat q.value2
4282 GO:0007600 sensory perception 334 2 1
4279 GO:0000904 cell morphogenesis involved in differentiation 701 11 1
4281 GO:0051090 regulation of sequence-specific DNA binding transcription factor activity 304 2 1
5407 GO:0007009 plasma membrane organization 169 0 1
4278 GO:0050877 neurological system process 626 9 1
4276 GO:0010256 endomembrane system organization 351 4 1
8696 GO:0045333 cellular respiration 158 0 1
4274 GO:0048667 cell morphogenesis involved in neuron differentiation 535 9 1
11115 GO:0072659 protein localization to plasma membrane 129 0 1
4277 GO:0051091 positive regulation of sequence-specific DNA binding transcription factor activity 189 1 1
8370 GO:0043112 receptor metabolic process 120 0 1
4271 GO:0007409 axonogenesis 469 8 1
4262 GO:0000902 cell morphogenesis 993 22 1
4259 GO:0032990 cell part morphogenesis 718 15 1
5168 GO:0006414 translational elongation 121 0 1
8135 GO:0042157 lipoprotein metabolic process 105 0 1
4261 GO:0061564 axon development 487 9 1
4275 GO:0098609 cell-cell adhesion 148 1 1
5603 GO:0009063 cellular amino acid catabolic process 103 0 1
4251 GO:0048858 cell projection morphogenesis 699 15 1

Negative Effect

Over enriched categories (46)

category term numInCat numDEInCat q.value
1 GO:0045333 cellular respiration 158 25 0.0000000
2 GO:0015980 energy derivation by oxidation of organic compounds 319 33 0.0000000
4 GO:0055114 oxidation-reduction process 896 58 0.0000000
5 GO:0022904 respiratory electron transport chain 105 18 0.0000001
6 GO:0022900 electron transport chain 106 18 0.0000001
7 GO:0006091 generation of precursor metabolites and energy 394 35 0.0000001
8 GO:0042773 ATP synthesis coupled electron transport 54 11 0.0000417
9 GO:0042775 mitochondrial ATP synthesis coupled electron transport 54 11 0.0000417
10 GO:0006119 oxidative phosphorylation 70 12 0.0000758
11 GO:0052652 cyclic purine nucleotide metabolic process 98 14 0.0001723
12 GO:0009190 cyclic nucleotide biosynthetic process 99 14 0.0001767
13 GO:0009152 purine ribonucleotide biosynthetic process 185 19 0.0001767
17 GO:0006164 purine nucleotide biosynthetic process 191 19 0.0002212
18 GO:0006120 mitochondrial electron transport, NADH to ubiquinone 42 9 0.0002212
19 GO:0009260 ribonucleotide biosynthetic process 198 19 0.0003628
20 GO:0046390 ribose phosphate biosynthetic process 201 19 0.0004326
21 GO:0072522 purine-containing compound biosynthetic process 204 19 0.0005125
24 GO:0009187 cyclic nucleotide metabolic process 133 15 0.0007561
27 GO:0030334 regulation of cell migration 468 30 0.0019338
30 GO:0009165 nucleotide biosynthetic process 259 20 0.0036713
31 GO:1901293 nucleoside phosphate biosynthetic process 260 20 0.0037564
32 GO:0045765 regulation of angiogenesis 158 15 0.0043306
33 GO:2000145 regulation of cell motility 492 30 0.0043306
34 GO:0035924 cellular response to vascular endothelial growth factor stimulus 31 7 0.0043969
35 GO:0001525 angiogenesis 350 24 0.0044788
40 GO:1901342 regulation of vasculature development 172 15 0.0097643
41 GO:0006171 cAMP biosynthetic process 80 10 0.0097643
42 GO:0048514 blood vessel morphogenesis 421 26 0.0107077
45 GO:0007188 adenylate cyclase-modulating G-protein coupled receptor signaling pathway 82 10 0.0124704
46 GO:0051270 regulation of cellular component movement 555 31 0.0124943
47 GO:0040012 regulation of locomotion 533 30 0.0132796
48 GO:0015711 organic anion transport 293 20 0.0187427
49 GO:0007267 cell-cell signaling 915 43 0.0204764
50 GO:0046058 cAMP metabolic process 107 11 0.0221478
52 GO:0038084 vascular endothelial growth factor signaling pathway 19 5 0.0280316
54 GO:0001568 blood vessel development 480 27 0.0303176
55 GO:0030799 regulation of cyclic nucleotide metabolic process 96 10 0.0313048
56 GO:0001944 vasculature development 509 28 0.0313048
57 GO:0090407 organophosphate biosynthetic process 491 27 0.0334307
58 GO:0044283 small molecule biosynthetic process 388 23 0.0334307
60 GO:0007268 synaptic transmission 566 30 0.0401281
61 GO:0046034 ATP metabolic process 442 25 0.0408152
62 GO:1901566 organonitrogen compound biosynthetic process 586 30 0.0408152
63 GO:0030802 regulation of cyclic nucleotide biosynthetic process 82 9 0.0408152
65 GO:0007187 G-protein coupled receptor signaling pathway, coupled to cyclic nucleotide second messenger 100 10 0.0456433
66 GO:0007189 adenylate cyclase-activating G-protein coupled receptor signaling pathway 35 6 0.0469579

Under enriched (3)

category term numInCat numDEInCat q.value2
3997 GO:0006396 RNA processing 676 2 0.0055980
3993 GO:0006974 cellular response to DNA damage stimulus 668 3 0.0250466
3990 GO:0044265 cellular macromolecule catabolic process 798 5 0.0271548
3983 GO:0044257 cellular protein catabolic process 485 2 0.1556804
3986 GO:0006397 mRNA processing 404 1 0.1733067
3982 GO:0051603 proteolysis involved in cellular protein catabolic process 470 2 0.1990009
3984 GO:0006281 DNA repair 393 1 0.2088992
3978 GO:0044764 multi-organism cellular process 682 5 0.2088992
3979 GO:0016071 mRNA metabolic process 615 4 0.2088992
3975 GO:0016032 viral process 674 5 0.2354857
5655 GO:0010498 proteasomal protein catabolic process 284 0 0.2811789
3977 GO:0043632 modification-dependent macromolecule catabolic process 437 2 0.3255418
3976 GO:0019941 modification-dependent protein catabolic process 433 2 0.3336579
3971 GO:1903047 mitotic cell cycle process 746 7 0.3660643
3974 GO:0006511 ubiquitin-dependent protein catabolic process 424 2 0.3660643
8234 GO:0043161 proteasome-mediated ubiquitin-dependent protein catabolic process 265 0 0.3660643
3963 GO:0006259 DNA metabolic process 838 9 0.5244412
3973 GO:0008380 RNA splicing 332 1 0.5244412
3968 GO:0033365 protein localization to organelle 603 5 0.5244412
3964 GO:0044403 symbiosis, encompassing mutualism through parasitism 727 7 0.5244412

Final Step: csv output

write.csv(go,file=paste("csv/", outFile,"_main.csv",sep=''), row.names=FALSE)
write.csv(goPos,file=paste("csv/", outFile,"Pos.csv",sep=''), row.names=FALSE)
write.csv(goNeg,file=paste("csv/", outFile,"Neg.csv",sep=''), row.names=FALSE)