This report has goseq results for 30 minute insulin when:
- Top 1000 genes are marked as differentially expressed
- Top genes with positive effect in top 1000 overall genes are marked as differentially expressed
- Top genes with negative effect in top 1000 overall genes are marked as differentially expressed
This report was generated on June 21 2015
Goseq results also saved in csv files located on snowwhite in directory:
/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/12junReps/csv
Step 1: Load in all the necessary data/libraries
library(goseq)
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
## Loading required package: DBI
library(qvalue)
fName <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/traits/peer_k03_S_Insu_30_all_genes.txt"
outFile <- "S_Insu_30"
data <- read.table(fName, as.is=T, header=T)
gene_length_file <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/length.composite.gene.models.gencode.v19"
gene_lengths = read.table(gene_length_file, header=T, as.is=T);
gene_lengths$gene = sapply(gene_lengths$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });
data$gene <- sapply(data$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });
data <- merge(data, gene_lengths, by="gene", all.x=T)
data <- data[order(data$p.value),]
data$q.value <- qvalue(data$p.value)$qvalues
data$rank <- seq(1,length(data[,1]))
minRow <- 20
Step 2: Create genes vectors
The first vector simply marks the top 1000 genes as differentially expressed. The second and third vectors mark the genes
with positive or negative effect in the top 1000 as differentially expressed.
genes <- as.numeric(data$rank <= 1000)
genesPos <- as.numeric(data$rank <= 1000 & data$effect > 0)
genesNeg <- as.numeric(data$rank <= 1000 & data$effect < 0)
names(genes) <- data$gene
names(genesPos) <- data$gene
names(genesNeg) <- data$gene
There are 503 DE genes with postive effect and 497 DE genes with negative effect.
Step 3: PWFs
pwf <- nullp(genes,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfPos=nullp(genesPos,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfNeg=nullp(genesNeg,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
Step 4: run goseq
go <- goseq(pwf,"hg19","ensGene",test.cats=c("GO:BP"))
goPos <- goseq(pwfPos,"hg19","ensGene",test.cats=c("GO:BP"))
goNeg <- goseq(pwfNeg,"hg19","ensGene",test.cats=c("GO:BP"))
rownames(go) <- NULL
rownames(goPos) <- NULL
rownames(goNeg) <- NULL
# Fix problem with some p-values being slightly more than 1
go$over_represented_pvalue[go$over_represented_pvalue>1]=1;
go$under_represented_pvalue[go$under_represented_pvalue>1]=1;
goPos$over_represented_pvalue[goPos$over_represented_pvalue>1]=1;
goPos$under_represented_pvalue[goPos$under_represented_pvalue>1]=1;
goNeg$over_represented_pvalue[goNeg$over_represented_pvalue>1]=1;
goNeg$under_represented_pvalue[goNeg$under_represented_pvalue>1]=1;
go$q.value <- qvalue(go$over_represented_pvalue)$qvalues
goPos$q.value=qvalue(goPos$over_represented_pvalue)$qvalues
goNeg$q.value=qvalue(goNeg$over_represented_pvalue)$qvalues
go$q.value2 <- qvalue(go$under_represented_pvalue)$qvalues
goPos$q.value2=qvalue(goPos$under_represented_pvalue)$qvalues
goNeg$q.value2=qvalue(goNeg$under_represented_pvalue)$qvalues
go <- go[which(go$numInCat < 1000),]
goPos <- goPos[which(goPos$numInCat < 1000),]
goNeg <- goNeg[which(goNeg$numInCat < 1000),]
Top 1000 Results
Over enriched categories (45)
rowN <- max(minRow, sum(go$q.value<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
| |
category |
term |
numInCat |
numDEInCat |
q.value |
| 1 |
GO:0006413 |
translational initiation |
167 |
37 |
0.0000000 |
| 2 |
GO:0006414 |
translational elongation |
121 |
30 |
0.0000000 |
| 3 |
GO:0006415 |
translational termination |
95 |
26 |
0.0000000 |
| 4 |
GO:0070972 |
protein localization to endoplasmic reticulum |
128 |
30 |
0.0000000 |
| 5 |
GO:0006614 |
SRP-dependent cotranslational protein targeting to membrane |
107 |
27 |
0.0000000 |
| 6 |
GO:0006613 |
cotranslational protein targeting to membrane |
109 |
27 |
0.0000000 |
| 7 |
GO:0045047 |
protein targeting to ER |
110 |
27 |
0.0000000 |
| 8 |
GO:0072599 |
establishment of protein localization to endoplasmic reticulum |
111 |
27 |
0.0000000 |
| 9 |
GO:0000184 |
nuclear-transcribed mRNA catabolic process, nonsense-mediated decay |
118 |
27 |
0.0000002 |
| 10 |
GO:0006091 |
generation of precursor metabolites and energy |
394 |
54 |
0.0000010 |
| 11 |
GO:0019083 |
viral transcription |
158 |
30 |
0.0000020 |
| 12 |
GO:0043241 |
protein complex disassembly |
195 |
34 |
0.0000036 |
| 13 |
GO:0055114 |
oxidation-reduction process |
896 |
93 |
0.0000051 |
| 14 |
GO:0019080 |
viral gene expression |
168 |
30 |
0.0000072 |
| 15 |
GO:0006412 |
translation |
496 |
60 |
0.0000092 |
| 16 |
GO:0032984 |
macromolecular complex disassembly |
205 |
34 |
0.0000095 |
| 17 |
GO:0044033 |
multi-organism metabolic process |
178 |
30 |
0.0000249 |
| 18 |
GO:0043624 |
cellular protein complex disassembly |
174 |
30 |
0.0000249 |
| 19 |
GO:0015980 |
energy derivation by oxidation of organic compounds |
319 |
43 |
0.0000529 |
| 20 |
GO:0019752 |
carboxylic acid metabolic process |
829 |
84 |
0.0001140 |
| 23 |
GO:0006612 |
protein targeting to membrane |
171 |
28 |
0.0001406 |
| 25 |
GO:0006090 |
pyruvate metabolic process |
81 |
18 |
0.0004532 |
| 26 |
GO:0000956 |
nuclear-transcribed mRNA catabolic process |
184 |
28 |
0.0007075 |
| 27 |
GO:0043436 |
oxoacid metabolic process |
934 |
89 |
0.0008013 |
| 28 |
GO:0006082 |
organic acid metabolic process |
944 |
90 |
0.0008013 |
| 29 |
GO:0006402 |
mRNA catabolic process |
196 |
29 |
0.0008278 |
| 30 |
GO:0044724 |
single-organism carbohydrate catabolic process |
124 |
22 |
0.0012384 |
| 31 |
GO:0032787 |
monocarboxylic acid metabolic process |
427 |
49 |
0.0018611 |
| 35 |
GO:0016052 |
carbohydrate catabolic process |
131 |
22 |
0.0027407 |
| 37 |
GO:0006401 |
RNA catabolic process |
222 |
30 |
0.0029237 |
| 38 |
GO:0006096 |
glycolytic process |
57 |
13 |
0.0073882 |
| 39 |
GO:0045333 |
cellular respiration |
158 |
22 |
0.0104354 |
| 40 |
GO:0022411 |
cellular component disassembly |
412 |
45 |
0.0122468 |
| 41 |
GO:0051186 |
cofactor metabolic process |
249 |
31 |
0.0126724 |
| 43 |
GO:0022904 |
respiratory electron transport chain |
105 |
16 |
0.0248748 |
| 44 |
GO:0022900 |
electron transport chain |
106 |
16 |
0.0278997 |
| 45 |
GO:0009135 |
purine nucleoside diphosphate metabolic process |
15 |
6 |
0.0279785 |
| 46 |
GO:0009179 |
purine ribonucleoside diphosphate metabolic process |
15 |
6 |
0.0279785 |
| 47 |
GO:0006732 |
coenzyme metabolic process |
194 |
25 |
0.0372085 |
| 49 |
GO:0022613 |
ribonucleoprotein complex biogenesis |
280 |
32 |
0.0407275 |
| 50 |
GO:0033539 |
fatty acid beta-oxidation using acyl-CoA dehydrogenase |
6 |
4 |
0.0408517 |
| 51 |
GO:0072657 |
protein localization to membrane |
371 |
39 |
0.0442825 |
| 52 |
GO:0042254 |
ribosome biogenesis |
167 |
22 |
0.0449407 |
| 53 |
GO:0006120 |
mitochondrial electron transport, NADH to ubiquinone |
42 |
9 |
0.0449407 |
| 54 |
GO:0019058 |
viral life cycle |
303 |
33 |
0.0483598 |
Under enriched (0)
go <- go[order(go$under_represented_pvalue),]
rowN <- max(minRow, sum(go$q.value2<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
| |
category |
term |
numInCat |
numDEInCat |
q.value2 |
| 5228 |
GO:0022603 |
regulation of anatomical structure morphogenesis |
647 |
19 |
1 |
| 5227 |
GO:0007186 |
G-protein coupled receptor signaling pathway |
549 |
14 |
1 |
| 5226 |
GO:0045596 |
negative regulation of cell differentiation |
440 |
12 |
1 |
| 5224 |
GO:0051093 |
negative regulation of developmental process |
546 |
17 |
1 |
| 5220 |
GO:0001501 |
skeletal system development |
391 |
11 |
1 |
| 5218 |
GO:0000902 |
cell morphogenesis |
993 |
41 |
1 |
| 5212 |
GO:0001944 |
vasculature development |
509 |
17 |
1 |
| 5215 |
GO:0048514 |
blood vessel morphogenesis |
421 |
13 |
1 |
| 5222 |
GO:0048705 |
skeletal system morphogenesis |
183 |
3 |
1 |
| 5221 |
GO:0010720 |
positive regulation of cell development |
178 |
3 |
1 |
| 5210 |
GO:0001568 |
blood vessel development |
480 |
16 |
1 |
| 5211 |
GO:0001525 |
angiogenesis |
350 |
10 |
1 |
| 5217 |
GO:0009615 |
response to virus |
261 |
6 |
1 |
| 5213 |
GO:0090305 |
nucleic acid phosphodiester bond hydrolysis |
200 |
4 |
1 |
| 5202 |
GO:0046903 |
secretion |
769 |
30 |
1 |
| 5216 |
GO:0051607 |
defense response to virus |
173 |
3 |
1 |
| 8899 |
GO:0060349 |
bone morphogenesis |
77 |
0 |
1 |
| 5200 |
GO:0032940 |
secretion by cell |
682 |
26 |
1 |
| 5219 |
GO:0007224 |
smoothened signaling pathway |
105 |
1 |
1 |
| 5204 |
GO:0042493 |
response to drug |
312 |
9 |
1 |
Positive Effect
Over enriched categories (18)
rowN <- max(minRow, sum(goPos$q.value<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
| |
category |
term |
numInCat |
numDEInCat |
q.value |
| 1 |
GO:0006091 |
generation of precursor metabolites and energy |
394 |
38 |
0.0000010 |
| 2 |
GO:0055114 |
oxidation-reduction process |
896 |
58 |
0.0000559 |
| 3 |
GO:0015980 |
energy derivation by oxidation of organic compounds |
319 |
30 |
0.0000733 |
| 4 |
GO:0045333 |
cellular respiration |
158 |
19 |
0.0002804 |
| 5 |
GO:0022904 |
respiratory electron transport chain |
105 |
15 |
0.0003436 |
| 6 |
GO:0022900 |
electron transport chain |
106 |
15 |
0.0003436 |
| 7 |
GO:0006120 |
mitochondrial electron transport, NADH to ubiquinone |
42 |
9 |
0.0020776 |
| 8 |
GO:0019752 |
carboxylic acid metabolic process |
829 |
48 |
0.0143048 |
| 9 |
GO:0042773 |
ATP synthesis coupled electron transport |
54 |
9 |
0.0143048 |
| 10 |
GO:0042775 |
mitochondrial ATP synthesis coupled electron transport |
54 |
9 |
0.0143048 |
| 11 |
GO:0033539 |
fatty acid beta-oxidation using acyl-CoA dehydrogenase |
6 |
4 |
0.0143048 |
| 12 |
GO:0032787 |
monocarboxylic acid metabolic process |
427 |
30 |
0.0181175 |
| 13 |
GO:0044724 |
single-organism carbohydrate catabolic process |
124 |
14 |
0.0219200 |
| 15 |
GO:0043436 |
oxoacid metabolic process |
934 |
51 |
0.0241844 |
| 16 |
GO:0006082 |
organic acid metabolic process |
944 |
51 |
0.0301276 |
| 17 |
GO:0016052 |
carbohydrate catabolic process |
131 |
14 |
0.0318524 |
| 18 |
GO:0009135 |
purine nucleoside diphosphate metabolic process |
15 |
5 |
0.0318524 |
| 19 |
GO:0009179 |
purine ribonucleoside diphosphate metabolic process |
15 |
5 |
0.0318524 |
| 20 |
GO:0006119 |
oxidative phosphorylation |
70 |
9 |
0.0681726 |
| 21 |
GO:0046031 |
ADP metabolic process |
10 |
4 |
0.0737939 |
Under enriched (1)
goPos <- goPos[order(goPos$under_represented_pvalue),]
rowN <- max(minRow, sum(goPos$q.value2<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
| |
category |
term |
numInCat |
numDEInCat |
q.value2 |
| 4045 |
GO:0016071 |
mRNA metabolic process |
615 |
3 |
0.0317998 |
| 5393 |
GO:0008380 |
RNA splicing |
332 |
0 |
0.1105265 |
| 4042 |
GO:0006396 |
RNA processing |
676 |
6 |
0.3368223 |
| 4097 |
GO:0000375 |
RNA splicing, via transesterification reactions |
225 |
0 |
1.0000000 |
| 4098 |
GO:0000377 |
RNA splicing, via transesterification reactions with bulged adenosine as nucleophile |
220 |
0 |
1.0000000 |
| 4107 |
GO:0000398 |
mRNA splicing, via spliceosome |
220 |
0 |
1.0000000 |
| 4999 |
GO:0006401 |
RNA catabolic process |
222 |
0 |
1.0000000 |
| 4040 |
GO:0006397 |
mRNA processing |
404 |
3 |
1.0000000 |
| 4041 |
GO:0022613 |
ribonucleoprotein complex biogenesis |
280 |
1 |
1.0000000 |
| 11297 |
GO:0090305 |
nucleic acid phosphodiester bond hydrolysis |
200 |
0 |
1.0000000 |
| 5000 |
GO:0006402 |
mRNA catabolic process |
196 |
0 |
1.0000000 |
| 4032 |
GO:0045944 |
positive regulation of transcription from RNA polymerase II promoter |
719 |
10 |
1.0000000 |
| 4029 |
GO:0044403 |
symbiosis, encompassing mutualism through parasitism |
727 |
10 |
1.0000000 |
| 4030 |
GO:0044419 |
interspecies interaction between organisms |
727 |
10 |
1.0000000 |
| 4026 |
GO:0044265 |
cellular macromolecule catabolic process |
798 |
12 |
1.0000000 |
| 4157 |
GO:0000956 |
nuclear-transcribed mRNA catabolic process |
184 |
0 |
1.0000000 |
| 4033 |
GO:0007346 |
regulation of mitotic cell cycle |
358 |
3 |
1.0000000 |
| 4038 |
GO:0044843 |
cell cycle G1/S phase transition |
238 |
1 |
1.0000000 |
| 4037 |
GO:0000082 |
G1/S transition of mitotic cell cycle |
236 |
1 |
1.0000000 |
| 4031 |
GO:0019058 |
viral life cycle |
303 |
2 |
1.0000000 |
Negative Effect
Over enriched categories (52)
| |
category |
term |
numInCat |
numDEInCat |
q.value |
| 1 |
GO:0006413 |
translational initiation |
167 |
35 |
0.0000000 |
| 2 |
GO:0006415 |
translational termination |
95 |
26 |
0.0000000 |
| 3 |
GO:0006414 |
translational elongation |
121 |
28 |
0.0000000 |
| 4 |
GO:0006614 |
SRP-dependent cotranslational protein targeting to membrane |
107 |
26 |
0.0000000 |
| 5 |
GO:0006613 |
cotranslational protein targeting to membrane |
109 |
26 |
0.0000000 |
| 6 |
GO:0045047 |
protein targeting to ER |
110 |
26 |
0.0000000 |
| 7 |
GO:0072599 |
establishment of protein localization to endoplasmic reticulum |
111 |
26 |
0.0000000 |
| 8 |
GO:0000184 |
nuclear-transcribed mRNA catabolic process, nonsense-mediated decay |
118 |
27 |
0.0000000 |
| 9 |
GO:0019083 |
viral transcription |
158 |
29 |
0.0000000 |
| 10 |
GO:0070972 |
protein localization to endoplasmic reticulum |
128 |
26 |
0.0000000 |
| 11 |
GO:0019080 |
viral gene expression |
168 |
29 |
0.0000000 |
| 12 |
GO:0044033 |
multi-organism metabolic process |
178 |
29 |
0.0000000 |
| 13 |
GO:0043241 |
protein complex disassembly |
195 |
30 |
0.0000000 |
| 14 |
GO:0032984 |
macromolecular complex disassembly |
205 |
30 |
0.0000000 |
| 15 |
GO:0006412 |
translation |
496 |
47 |
0.0000000 |
| 16 |
GO:0006402 |
mRNA catabolic process |
196 |
29 |
0.0000000 |
| 17 |
GO:0000956 |
nuclear-transcribed mRNA catabolic process |
184 |
28 |
0.0000000 |
| 18 |
GO:0043624 |
cellular protein complex disassembly |
174 |
27 |
0.0000000 |
| 19 |
GO:0006401 |
RNA catabolic process |
222 |
30 |
0.0000000 |
| 20 |
GO:0006612 |
protein targeting to membrane |
171 |
26 |
0.0000000 |
| 27 |
GO:0022613 |
ribonucleoprotein complex biogenesis |
280 |
31 |
0.0000000 |
| 34 |
GO:0019058 |
viral life cycle |
303 |
31 |
0.0000001 |
| 42 |
GO:0042254 |
ribosome biogenesis |
167 |
21 |
0.0000015 |
| 44 |
GO:0022411 |
cellular component disassembly |
412 |
35 |
0.0000019 |
| 47 |
GO:0090150 |
establishment of protein localization to membrane |
296 |
28 |
0.0000024 |
| 49 |
GO:0016071 |
mRNA metabolic process |
615 |
43 |
0.0000051 |
| 51 |
GO:0016032 |
viral process |
674 |
45 |
0.0000083 |
| 52 |
GO:0044764 |
multi-organism cellular process |
682 |
45 |
0.0000114 |
| 53 |
GO:0072594 |
establishment of protein localization to organelle |
456 |
35 |
0.0000114 |
| 56 |
GO:0072657 |
protein localization to membrane |
371 |
30 |
0.0000303 |
| 57 |
GO:0044403 |
symbiosis, encompassing mutualism through parasitism |
727 |
46 |
0.0000303 |
| 58 |
GO:0044419 |
interspecies interaction between organisms |
727 |
46 |
0.0000303 |
| 60 |
GO:0006605 |
protein targeting |
497 |
36 |
0.0000366 |
| 67 |
GO:1902578 |
single-organism localization |
426 |
32 |
0.0000645 |
| 68 |
GO:1902580 |
single-organism cellular localization |
426 |
32 |
0.0000645 |
| 69 |
GO:0006364 |
rRNA processing |
117 |
15 |
0.0001024 |
| 70 |
GO:0016072 |
rRNA metabolic process |
122 |
15 |
0.0001810 |
| 71 |
GO:0044802 |
single-organism membrane organization |
638 |
39 |
0.0007707 |
| 72 |
GO:0001731 |
formation of translation preinitiation complex |
18 |
6 |
0.0011387 |
| 73 |
GO:0033365 |
protein localization to organelle |
603 |
37 |
0.0011628 |
| 74 |
GO:0016482 |
cytoplasmic transport |
825 |
46 |
0.0012939 |
| 75 |
GO:0042274 |
ribosomal small subunit biogenesis |
21 |
6 |
0.0016820 |
| 76 |
GO:0022618 |
ribonucleoprotein complex assembly |
135 |
14 |
0.0043042 |
| 80 |
GO:0042273 |
ribosomal large subunit biogenesis |
16 |
5 |
0.0054764 |
| 81 |
GO:0006417 |
regulation of translation |
249 |
20 |
0.0057914 |
| 82 |
GO:0061024 |
membrane organization |
778 |
42 |
0.0062815 |
| 83 |
GO:0071826 |
ribonucleoprotein complex subunit organization |
141 |
14 |
0.0064693 |
| 85 |
GO:0034470 |
ncRNA processing |
235 |
18 |
0.0114765 |
| 88 |
GO:0044265 |
cellular macromolecule catabolic process |
798 |
42 |
0.0130529 |
| 89 |
GO:0006446 |
regulation of translational initiation |
67 |
9 |
0.0134427 |
| 90 |
GO:0006886 |
intracellular protein transport |
758 |
39 |
0.0324000 |
| 92 |
GO:0009057 |
macromolecule catabolic process |
983 |
47 |
0.0490219 |
Under enriched (0)
| |
category |
term |
numInCat |
numDEInCat |
q.value2 |
| 3646 |
GO:0000902 |
cell morphogenesis |
993 |
12 |
0.2828393 |
| 3643 |
GO:0022610 |
biological adhesion |
916 |
11 |
0.4628117 |
| 3642 |
GO:0007155 |
cell adhesion |
912 |
11 |
0.4628117 |
| 3641 |
GO:0007186 |
G-protein coupled receptor signaling pathway |
549 |
4 |
0.6489588 |
| 3645 |
GO:0098602 |
single organism cell adhesion |
327 |
1 |
0.6489588 |
| 3640 |
GO:0001944 |
vasculature development |
509 |
4 |
0.7840956 |
| 3635 |
GO:0001568 |
blood vessel development |
480 |
4 |
1.0000000 |
| 3633 |
GO:0007610 |
behavior |
486 |
4 |
1.0000000 |
| 3636 |
GO:0048514 |
blood vessel morphogenesis |
421 |
3 |
1.0000000 |
| 3639 |
GO:0016337 |
single organismal cell-cell adhesion |
283 |
1 |
1.0000000 |
| 3638 |
GO:0006887 |
exocytosis |
286 |
1 |
1.0000000 |
| 3634 |
GO:0001525 |
angiogenesis |
350 |
2 |
1.0000000 |
| 3628 |
GO:0032940 |
secretion by cell |
682 |
8 |
1.0000000 |
| 3626 |
GO:0007267 |
cell-cell signaling |
915 |
13 |
1.0000000 |
| 3630 |
GO:0043062 |
extracellular structure organization |
325 |
2 |
1.0000000 |
| 3629 |
GO:0030198 |
extracellular matrix organization |
324 |
2 |
1.0000000 |
| 3625 |
GO:0022603 |
regulation of anatomical structure morphogenesis |
647 |
8 |
1.0000000 |
| 3618 |
GO:0002682 |
regulation of immune system process |
997 |
15 |
1.0000000 |
| 5350 |
GO:0010720 |
positive regulation of cell development |
178 |
0 |
1.0000000 |
| 3617 |
GO:0000904 |
cell morphogenesis involved in differentiation |
701 |
10 |
1.0000000 |
Final Step: csv output
write.csv(go,file=paste("csv/", outFile,"main.csv",sep=''), row.names=FALSE)
write.csv(goPos,file=paste("csv/", outFile,"Pos.csv",sep=''), row.names=FALSE)
write.csv(goNeg,file=paste("csv/", outFile,"Neg.csv",sep=''), row.names=FALSE)