This report has goseq results for NGT versus T2D when:
- Top 1000 genes are marked as differentially expressed
- Top genes with positive effect in top 1000 overall genes are marked as differentially expressed
- Top genes with negative effect in top 1000 overall genes are marked as differentially expressed
This report was generated on June 21 2015
Goseq results also saved in csv files located on snowwhite in directory:
/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/12junReps/csv
Step 1: Load in all the necessary data/libraries
library(goseq)
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
## Loading required package: DBI
library(qvalue)
fName <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/ngt_vs_t2d/peer_k03_all_genes.txt"
outFile <- "ngt_t2d"
data <- read.table(fName, as.is=T, header=T)
gene_length_file <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/length.composite.gene.models.gencode.v19"
gene_lengths = read.table(gene_length_file, header=T, as.is=T);
gene_lengths$gene = sapply(gene_lengths$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });
data$gene <- sapply(data$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });
data <- merge(data, gene_lengths, by="gene", all.x=T)
data <- data[order(data$p.value),]
data$q.value <- qvalue(data$p.value)$qvalues
data$rank <- seq(1,length(data[,1]))
minRow <- 20
Step 2: Create genes vectors
The first vector simply marks the top 1000 genes as differentially expressed. The second and third vectors mark the genes
with positive or negative effect in the top 1000 as differentially expressed.
genes <- as.numeric(data$rank <= 1000)
genesPos <- as.numeric(data$rank <= 1000 & data$effect > 0)
genesNeg <- as.numeric(data$rank <= 1000 & data$effect < 0)
names(genes) <- data$gene
names(genesPos) <- data$gene
names(genesNeg) <- data$gene
There are 511 DE genes with postive effect and 489 DE genes with negative effect.
Step 3: PWFs
pwf <- nullp(genes,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfPos=nullp(genesPos,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfNeg=nullp(genesNeg,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
Step 4: run goseq
go <- goseq(pwf,"hg19","ensGene",test.cats=c("GO:BP"))
goPos <- goseq(pwfPos,"hg19","ensGene",test.cats=c("GO:BP"))
goNeg <- goseq(pwfNeg,"hg19","ensGene",test.cats=c("GO:BP"))
rownames(go) <- NULL
rownames(goPos) <- NULL
rownames(goNeg) <- NULL
# Fix problem with some p-values being slightly more than 1
go$over_represented_pvalue[go$over_represented_pvalue>1]=1;
go$under_represented_pvalue[go$under_represented_pvalue>1]=1;
goPos$over_represented_pvalue[goPos$over_represented_pvalue>1]=1;
goPos$under_represented_pvalue[goPos$under_represented_pvalue>1]=1;
goNeg$over_represented_pvalue[goNeg$over_represented_pvalue>1]=1;
goNeg$under_represented_pvalue[goNeg$under_represented_pvalue>1]=1;
go$q.value <- qvalue(go$over_represented_pvalue)$qvalues
goPos$q.value=qvalue(goPos$over_represented_pvalue)$qvalues
goNeg$q.value=qvalue(goNeg$over_represented_pvalue)$qvalues
go$q.value2 <- qvalue(go$under_represented_pvalue)$qvalues
goPos$q.value2=qvalue(goPos$under_represented_pvalue)$qvalues
goNeg$q.value2=qvalue(goNeg$under_represented_pvalue)$qvalues
go <- go[which(go$numInCat < 1000),]
goPos <- goPos[which(goPos$numInCat < 1000),]
goNeg <- goNeg[which(goNeg$numInCat < 1000),]
Top 1000 Results
Over enriched categories (16)
rowN <- max(minRow, sum(go$q.value<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
| category |
term |
numInCat |
numDEInCat |
q.value |
| GO:0022904 |
respiratory electron transport chain |
105 |
32 |
0.0000000 |
| GO:0022900 |
electron transport chain |
106 |
32 |
0.0000000 |
| GO:0045333 |
cellular respiration |
158 |
35 |
0.0000000 |
| GO:1902600 |
hydrogen ion transmembrane transport |
84 |
21 |
0.0000020 |
| GO:0055114 |
oxidation-reduction process |
895 |
90 |
0.0000148 |
| GO:0006091 |
generation of precursor metabolites and energy |
395 |
50 |
0.0000281 |
| GO:0015980 |
energy derivation by oxidation of organic compounds |
319 |
43 |
0.0000325 |
| GO:0042773 |
ATP synthesis coupled electron transport |
54 |
15 |
0.0000325 |
| GO:0042775 |
mitochondrial ATP synthesis coupled electron transport |
54 |
15 |
0.0000325 |
| GO:0015992 |
proton transport |
112 |
21 |
0.0002193 |
| GO:0006818 |
hydrogen transport |
114 |
21 |
0.0002873 |
| GO:0006120 |
mitochondrial electron transport, NADH to ubiquinone |
42 |
12 |
0.0003775 |
| GO:0006119 |
oxidative phosphorylation |
70 |
15 |
0.0011039 |
| GO:0015985 |
energy coupled proton transport, down electrochemical gradient |
19 |
7 |
0.0124564 |
| GO:0015986 |
ATP synthesis coupled proton transport |
19 |
7 |
0.0124564 |
| GO:0042776 |
mitochondrial ATP synthesis coupled proton transport |
14 |
6 |
0.0184776 |
| GO:0021539 |
subthalamus development |
42 |
10 |
0.0607247 |
| GO:0048857 |
neural nucleus development |
54 |
11 |
0.1448926 |
| GO:0021762 |
substantia nigra development |
40 |
9 |
0.2036624 |
| GO:0006754 |
ATP biosynthetic process |
36 |
8 |
0.2582218 |
Under enriched (0)
go <- go[order(go$under_represented_pvalue),]
rowN <- max(minRow, sum(go$q.value2<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
| |
category |
term |
numInCat |
numDEInCat |
q.value2 |
| 4932 |
GO:0022610 |
biological adhesion |
920 |
32 |
1 |
| 4929 |
GO:0007155 |
cell adhesion |
916 |
32 |
1 |
| 4936 |
GO:0060271 |
cilium morphogenesis |
154 |
1 |
1 |
| 4935 |
GO:0070661 |
leukocyte proliferation |
200 |
2 |
1 |
| 4924 |
GO:0030031 |
cell projection assembly |
267 |
5 |
1 |
| 4931 |
GO:0032943 |
mononuclear cell proliferation |
191 |
2 |
1 |
| 4927 |
GO:0046651 |
lymphocyte proliferation |
189 |
2 |
1 |
| 4915 |
GO:0046649 |
lymphocyte activation |
480 |
13 |
1 |
| 4912 |
GO:0002520 |
immune system development |
648 |
21 |
1 |
| 4923 |
GO:0051302 |
regulation of cell division |
209 |
3 |
1 |
| 4934 |
GO:0044782 |
cilium organization |
137 |
1 |
1 |
| 4933 |
GO:0070663 |
regulation of leukocyte proliferation |
151 |
1 |
1 |
| 4914 |
GO:0042110 |
T cell activation |
347 |
8 |
1 |
| 4930 |
GO:0032102 |
negative regulation of response to external stimulus |
143 |
1 |
1 |
| 4920 |
GO:0050727 |
regulation of inflammatory response |
212 |
3 |
1 |
| 4928 |
GO:0032944 |
regulation of mononuclear cell proliferation |
147 |
1 |
1 |
| 4910 |
GO:0002684 |
positive regulation of immune system process |
611 |
19 |
1 |
| 4926 |
GO:0050670 |
regulation of lymphocyte proliferation |
146 |
1 |
1 |
| 4909 |
GO:0045321 |
leukocyte activation |
561 |
17 |
1 |
| 4916 |
GO:0050867 |
positive regulation of cell activation |
239 |
4 |
1 |
Positive Effect
Over enriched categories (0)
rowN <- max(minRow, sum(goPos$q.value<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
| |
category |
term |
numInCat |
numDEInCat |
q.value |
| 18 |
GO:0070647 |
protein modification by small protein conjugation or removal |
731 |
41 |
0.5265730 |
| 22 |
GO:0035414 |
negative regulation of catenin import into nucleus |
8 |
3 |
0.6617710 |
| 24 |
GO:0006333 |
chromatin assembly or disassembly |
110 |
10 |
0.9016002 |
| 25 |
GO:0034728 |
nucleosome organization |
95 |
9 |
0.9016002 |
| 26 |
GO:0016567 |
protein ubiquitination |
612 |
34 |
1.0000000 |
| 27 |
GO:0006325 |
chromatin organization |
563 |
32 |
1.0000000 |
| 28 |
GO:0032446 |
protein modification by small protein conjugation |
654 |
35 |
1.0000000 |
| 29 |
GO:0070537 |
histone H2A K63-linked deubiquitination |
3 |
2 |
1.0000000 |
| 31 |
GO:0031497 |
chromatin assembly |
87 |
8 |
1.0000000 |
| 32 |
GO:0070602 |
regulation of centromeric sister chromatid cohesion |
3 |
2 |
1.0000000 |
| 34 |
GO:0016568 |
chromatin modification |
502 |
29 |
1.0000000 |
| 35 |
GO:0006334 |
nucleosome assembly |
74 |
7 |
1.0000000 |
| 36 |
GO:0051276 |
chromosome organization |
738 |
38 |
1.0000000 |
| 37 |
GO:0010558 |
negative regulation of macromolecule biosynthetic process |
959 |
46 |
1.0000000 |
| 38 |
GO:0071824 |
protein-DNA complex subunit organization |
128 |
10 |
1.0000000 |
| 39 |
GO:0016584 |
nucleosome positioning |
4 |
2 |
1.0000000 |
| 42 |
GO:0015846 |
polyamine transport |
4 |
2 |
1.0000000 |
| 43 |
GO:0060244 |
negative regulation of cell proliferation involved in contact inhibition |
4 |
2 |
1.0000000 |
| 45 |
GO:0006282 |
regulation of DNA repair |
54 |
6 |
1.0000000 |
| 46 |
GO:0010499 |
proteasomal ubiquitin-independent protein catabolic process |
4 |
2 |
1.0000000 |
Under enriched (0)
goPos <- goPos[order(goPos$under_represented_pvalue),]
rowN <- max(minRow, sum(goPos$q.value2<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
| |
category |
term |
numInCat |
numDEInCat |
q.value2 |
| 3655 |
GO:0006520 |
cellular amino acid metabolic process |
417 |
2 |
1 |
| 3654 |
GO:0030001 |
metal ion transport |
584 |
7 |
1 |
| 3651 |
GO:0006812 |
cation transport |
768 |
11 |
1 |
| 3649 |
GO:0022610 |
biological adhesion |
920 |
16 |
1 |
| 3647 |
GO:0007155 |
cell adhesion |
916 |
16 |
1 |
| 11737 |
GO:1901605 |
alpha-amino acid metabolic process |
188 |
0 |
1 |
| 3652 |
GO:0090066 |
regulation of anatomical structure size |
311 |
2 |
1 |
| 3644 |
GO:0098662 |
inorganic cation transmembrane transport |
397 |
4 |
1 |
| 3645 |
GO:0015672 |
monovalent inorganic cation transport |
351 |
3 |
1 |
| 7773 |
GO:0042113 |
B cell activation |
169 |
0 |
1 |
| 3635 |
GO:0001775 |
cell activation |
767 |
12 |
1 |
| 3638 |
GO:0098655 |
cation transmembrane transport |
467 |
6 |
1 |
| 6075 |
GO:0019932 |
second-messenger-mediated signaling |
163 |
0 |
1 |
| 3630 |
GO:0006082 |
organic acid metabolic process |
945 |
17 |
1 |
| 3650 |
GO:0050867 |
positive regulation of cell activation |
239 |
1 |
1 |
| 3634 |
GO:0098660 |
inorganic ion transmembrane transport |
456 |
6 |
1 |
| 3631 |
GO:0046903 |
secretion |
769 |
13 |
1 |
| 3641 |
GO:0019058 |
viral life cycle |
303 |
2 |
1 |
| 3623 |
GO:0043436 |
oxoacid metabolic process |
935 |
17 |
1 |
| 3632 |
GO:0046649 |
lymphocyte activation |
480 |
6 |
1 |
Negative Effect
Over enriched categories (35)
| |
category |
term |
numInCat |
numDEInCat |
q.value |
| 1 |
GO:0022904 |
respiratory electron transport chain |
105 |
32 |
0.0000000 |
| 2 |
GO:0022900 |
electron transport chain |
106 |
32 |
0.0000000 |
| 3 |
GO:0045333 |
cellular respiration |
158 |
35 |
0.0000000 |
| 4 |
GO:0006091 |
generation of precursor metabolites and energy |
395 |
45 |
0.0000000 |
| 5 |
GO:0015980 |
energy derivation by oxidation of organic compounds |
319 |
40 |
0.0000000 |
| 6 |
GO:0055114 |
oxidation-reduction process |
895 |
68 |
0.0000000 |
| 7 |
GO:1902600 |
hydrogen ion transmembrane transport |
84 |
21 |
0.0000000 |
| 8 |
GO:0015992 |
proton transport |
112 |
21 |
0.0000000 |
| 9 |
GO:0006818 |
hydrogen transport |
114 |
21 |
0.0000000 |
| 10 |
GO:0042773 |
ATP synthesis coupled electron transport |
54 |
15 |
0.0000000 |
| 11 |
GO:0042775 |
mitochondrial ATP synthesis coupled electron transport |
54 |
15 |
0.0000000 |
| 12 |
GO:0006119 |
oxidative phosphorylation |
70 |
15 |
0.0000001 |
| 13 |
GO:0006120 |
mitochondrial electron transport, NADH to ubiquinone |
42 |
12 |
0.0000002 |
| 15 |
GO:0006812 |
cation transport |
768 |
46 |
0.0000268 |
| 16 |
GO:0009123 |
nucleoside monophosphate metabolic process |
487 |
35 |
0.0000281 |
| 18 |
GO:0015672 |
monovalent inorganic cation transport |
351 |
28 |
0.0000564 |
| 19 |
GO:0015985 |
energy coupled proton transport, down electrochemical gradient |
19 |
7 |
0.0000879 |
| 20 |
GO:0015986 |
ATP synthesis coupled proton transport |
19 |
7 |
0.0000879 |
| 21 |
GO:0009161 |
ribonucleoside monophosphate metabolic process |
477 |
33 |
0.0001433 |
| 22 |
GO:0034220 |
ion transmembrane transport |
657 |
40 |
0.0001918 |
| 23 |
GO:0042776 |
mitochondrial ATP synthesis coupled proton transport |
14 |
6 |
0.0002190 |
| 24 |
GO:0009167 |
purine ribonucleoside monophosphate metabolic process |
465 |
32 |
0.0002201 |
| 25 |
GO:0009126 |
purine nucleoside monophosphate metabolic process |
466 |
32 |
0.0002201 |
| 26 |
GO:0046034 |
ATP metabolic process |
441 |
31 |
0.0002201 |
| 27 |
GO:0098655 |
cation transmembrane transport |
467 |
30 |
0.0020848 |
| 28 |
GO:0098662 |
inorganic cation transmembrane transport |
397 |
27 |
0.0020991 |
| 29 |
GO:0098660 |
inorganic ion transmembrane transport |
456 |
29 |
0.0030351 |
| 30 |
GO:0006754 |
ATP biosynthetic process |
36 |
7 |
0.0084068 |
| 32 |
GO:0009156 |
ribonucleoside monophosphate biosynthetic process |
73 |
9 |
0.0234307 |
| 33 |
GO:0009127 |
purine nucleoside monophosphate biosynthetic process |
61 |
8 |
0.0362184 |
| 34 |
GO:0009168 |
purine ribonucleoside monophosphate biosynthetic process |
61 |
8 |
0.0362184 |
| 35 |
GO:0009206 |
purine ribonucleoside triphosphate biosynthetic process |
46 |
7 |
0.0362184 |
| 36 |
GO:0009124 |
nucleoside monophosphate biosynthetic process |
79 |
9 |
0.0385134 |
| 37 |
GO:0009145 |
purine nucleoside triphosphate biosynthetic process |
47 |
7 |
0.0393327 |
| 38 |
GO:0009142 |
nucleoside triphosphate biosynthetic process |
63 |
8 |
0.0395647 |
Under enriched (0)
| |
category |
term |
numInCat |
numDEInCat |
q.value2 |
| 3563 |
GO:0006397 |
mRNA processing |
404 |
2 |
0.7662154 |
| 3562 |
GO:0016568 |
chromatin modification |
502 |
4 |
0.8450203 |
| 3556 |
GO:0051276 |
chromosome organization |
738 |
8 |
0.8450203 |
| 3559 |
GO:0006325 |
chromatin organization |
563 |
5 |
0.8450203 |
| 3552 |
GO:0043549 |
regulation of kinase activity |
675 |
7 |
1.0000000 |
| 3545 |
GO:0045893 |
positive regulation of transcription, DNA-templated |
989 |
13 |
1.0000000 |
| 3544 |
GO:0051338 |
regulation of transferase activity |
782 |
9 |
1.0000000 |
| 3555 |
GO:0007389 |
pattern specification process |
346 |
2 |
1.0000000 |
| 3560 |
GO:0030031 |
cell projection assembly |
267 |
1 |
1.0000000 |
| 3538 |
GO:0045859 |
regulation of protein kinase activity |
637 |
7 |
1.0000000 |
| 3548 |
GO:0019221 |
cytokine-mediated signaling pathway |
345 |
2 |
1.0000000 |
| 8974 |
GO:0048705 |
skeletal system morphogenesis |
184 |
0 |
1.0000000 |
| 3528 |
GO:0051247 |
positive regulation of protein metabolic process |
951 |
13 |
1.0000000 |
| 3543 |
GO:0008380 |
RNA splicing |
332 |
2 |
1.0000000 |
| 3523 |
GO:0006259 |
DNA metabolic process |
838 |
11 |
1.0000000 |
| 3517 |
GO:0000902 |
cell morphogenesis |
993 |
15 |
1.0000000 |
| 3522 |
GO:0051301 |
cell division |
666 |
8 |
1.0000000 |
| 10481 |
GO:0071103 |
DNA conformation change |
173 |
0 |
1.0000000 |
| 3518 |
GO:0032101 |
regulation of response to external stimulus |
502 |
5 |
1.0000000 |
| 3506 |
GO:0009790 |
embryo development |
829 |
12 |
1.0000000 |
Final Step: csv output
write.csv(go,file=paste("csv/", outFile,"_main.csv",sep=''), row.names=FALSE)
write.csv(goPos,file=paste("csv/", outFile,"Pos.csv",sep=''), row.names=FALSE)
write.csv(goNeg,file=paste("csv/", outFile,"Neg.csv",sep=''), row.names=FALSE)