This report has goseq results for BMI when:

  1. Top 1000 genes are marked as differentially expressed
  2. Top genes with positive effect in top 1000 overall genes are marked as differentially expressed
  3. Top genes with negative effect in top 1000 overall genes are marked as differentially expressed

Significant categories with more than 1000 members are not displayed

This report was generated on June 21 2015

Goseq results also saved in csv files located on snowwhite in directory: /net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/12junReps/csv

Step 1: Load in all the necessary data/libraries

library(goseq)
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
## Loading required package: DBI
library(qvalue)

fName <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/traits/peer_k03_bmi_all_genes.txt"
outFile <- "bmi"

data <- read.table(fName, as.is=T, header=T)

gene_length_file <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/length.composite.gene.models.gencode.v19"
gene_lengths = read.table(gene_length_file, header=T, as.is=T);
gene_lengths$gene = sapply(gene_lengths$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });

data$gene <- sapply(data$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });

data <- merge(data, gene_lengths, by="gene", all.x=T)
data <- data[order(data$p.value),]
data$q.value <- qvalue(data$p.value)$qvalues
data$rank <- seq(1,length(data[,1]))

minRow <- 20

Step 2: Create genes vectors

The first vector simply marks the top 1000 genes as differentially expressed. The second and third vectors mark the genes with positive or negative effect in the top 1000 as differentially expressed.

genes <- as.numeric(data$rank <= 1000)
genesPos <- as.numeric(data$rank <= 1000 & data$effect > 0)
genesNeg <- as.numeric(data$rank <= 1000 & data$effect < 0)

names(genes) <- data$gene
names(genesPos) <- data$gene
names(genesNeg) <- data$gene

There are 602 DE genes with postive effect and 398 DE genes with negative effect.

Step 3: PWFs

pwf <- nullp(genes,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfPos=nullp(genesPos,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfNeg=nullp(genesNeg,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)

Step 4: run goseq

go <- goseq(pwf,"hg19","ensGene",test.cats=c("GO:BP"))
goPos <- goseq(pwfPos,"hg19","ensGene",test.cats=c("GO:BP"))
goNeg <- goseq(pwfNeg,"hg19","ensGene",test.cats=c("GO:BP"))

rownames(go) <- NULL
rownames(goPos) <- NULL
rownames(goNeg) <- NULL

# Fix problem with some p-values being slightly more than 1
go$over_represented_pvalue[go$over_represented_pvalue>1]=1;
go$under_represented_pvalue[go$under_represented_pvalue>1]=1;
goPos$over_represented_pvalue[goPos$over_represented_pvalue>1]=1;
goPos$under_represented_pvalue[goPos$under_represented_pvalue>1]=1;
goNeg$over_represented_pvalue[goNeg$over_represented_pvalue>1]=1;
goNeg$under_represented_pvalue[goNeg$under_represented_pvalue>1]=1;

go$q.value <- qvalue(go$over_represented_pvalue)$qvalues
goPos$q.value=qvalue(goPos$over_represented_pvalue)$qvalues
goNeg$q.value=qvalue(goNeg$over_represented_pvalue)$qvalues

go$q.value2 <- qvalue(go$under_represented_pvalue)$qvalues
goPos$q.value2=qvalue(goPos$under_represented_pvalue)$qvalues
goNeg$q.value2=qvalue(goNeg$under_represented_pvalue)$qvalues

go <- go[which(go$numInCat < 1000),]
goPos <- goPos[which(goPos$numInCat < 1000),]
goNeg <- goNeg[which(goNeg$numInCat < 1000),]

Top 1000 Results

Over enriched categories (64)

rowN <- max(minRow, sum(go$q.value<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
category term numInCat numDEInCat q.value
1 GO:0006614 SRP-dependent cotranslational protein targeting to membrane 107 47 0.0000000
2 GO:0006414 translational elongation 121 50 0.0000000
3 GO:0006613 cotranslational protein targeting to membrane 109 47 0.0000000
4 GO:0006415 translational termination 95 44 0.0000000
5 GO:0045047 protein targeting to ER 110 47 0.0000000
6 GO:0072599 establishment of protein localization to endoplasmic reticulum 111 47 0.0000000
7 GO:0000184 nuclear-transcribed mRNA catabolic process, nonsense-mediated decay 118 48 0.0000000
8 GO:0070972 protein localization to endoplasmic reticulum 128 49 0.0000000
9 GO:0006413 translational initiation 167 54 0.0000000
10 GO:0043624 cellular protein complex disassembly 174 53 0.0000000
11 GO:0043241 protein complex disassembly 195 55 0.0000000
12 GO:0006612 protein targeting to membrane 171 51 0.0000000
13 GO:0032984 macromolecular complex disassembly 205 56 0.0000000
14 GO:0000956 nuclear-transcribed mRNA catabolic process 184 53 0.0000000
15 GO:0006402 mRNA catabolic process 196 53 0.0000000
16 GO:0006412 translation 496 88 0.0000000
17 GO:0019083 viral transcription 158 46 0.0000000
18 GO:0019080 viral gene expression 168 46 0.0000000
19 GO:0006401 RNA catabolic process 222 54 0.0000000
20 GO:0044033 multi-organism metabolic process 178 46 0.0000000
21 GO:0022411 cellular component disassembly 412 75 0.0000000
22 GO:0090150 establishment of protein localization to membrane 296 60 0.0000000
23 GO:0072594 establishment of protein localization to organelle 456 77 0.0000000
24 GO:0006605 protein targeting 497 79 0.0000000
25 GO:0044802 single-organism membrane organization 638 92 0.0000000
26 GO:0072657 protein localization to membrane 371 64 0.0000000
28 GO:0033365 protein localization to organelle 603 87 0.0000000
29 GO:0061024 membrane organization 778 102 0.0000000
30 GO:0006886 intracellular protein transport 758 100 0.0000000
31 GO:1902578 single-organism localization 426 67 0.0000000
32 GO:1902580 single-organism cellular localization 426 67 0.0000000
35 GO:0019058 viral life cycle 303 49 0.0000000
39 GO:0016482 cytoplasmic transport 825 97 0.0000000
41 GO:0044265 cellular macromolecule catabolic process 798 93 0.0000001
42 GO:0016032 viral process 674 81 0.0000001
43 GO:0044403 symbiosis, encompassing mutualism through parasitism 727 85 0.0000001
44 GO:0044419 interspecies interaction between organisms 727 85 0.0000001
45 GO:0009057 macromolecule catabolic process 983 107 0.0000002
47 GO:0044764 multi-organism cellular process 682 81 0.0000002
50 GO:0016071 mRNA metabolic process 615 73 0.0000013
57 GO:0042274 ribosomal small subunit biogenesis 21 9 0.0001727
60 GO:0000028 ribosomal small subunit assembly 7 5 0.0021687
61 GO:0019395 fatty acid oxidation 77 16 0.0025808
64 GO:0034440 lipid oxidation 79 16 0.0033173
72 GO:0006635 fatty acid beta-oxidation 57 13 0.0056824
74 GO:0072329 monocarboxylic acid catabolic process 84 16 0.0073290
75 GO:0019752 carboxylic acid metabolic process 829 78 0.0073290
76 GO:0042255 ribosome assembly 19 7 0.0078576
80 GO:0006631 fatty acid metabolic process 281 34 0.0128903
81 GO:0055114 oxidation-reduction process 896 80 0.0144011
82 GO:0044255 cellular lipid metabolic process 772 73 0.0156281
84 GO:0009062 fatty acid catabolic process 72 14 0.0156281
85 GO:0042401 cellular biogenic amine biosynthetic process 15 6 0.0158654
86 GO:0006417 regulation of translation 249 31 0.0175542
87 GO:0009309 amine biosynthetic process 16 6 0.0225900
88 GO:0032787 monocarboxylic acid metabolic process 427 45 0.0267191
89 GO:0006596 polyamine biosynthetic process 11 5 0.0287114
91 GO:0048199 vesicle targeting, to, from or within Golgi 28 8 0.0332952
93 GO:0006082 organic acid metabolic process 944 83 0.0359978
94 GO:0009753 response to jasmonic acid 3 3 0.0359978
95 GO:0071395 cellular response to jasmonic acid stimulus 3 3 0.0359978
96 GO:0042254 ribosome biogenesis 167 22 0.0359978
98 GO:0043436 oxoacid metabolic process 934 82 0.0393812
99 GO:1902001 fatty acid transmembrane transport 16 6 0.0411107

Under enriched (0)

go <- go[order(go$under_represented_pvalue),]
rowN <- max(minRow, sum(go$q.value2<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
category term numInCat numDEInCat q.value2
5392 GO:0045596 negative regulation of cell differentiation 440 10 1
5390 GO:0022610 biological adhesion 916 34 1
5389 GO:0007155 cell adhesion 912 34 1
5391 GO:0006816 calcium ion transport 276 5 1
5388 GO:0007186 G-protein coupled receptor signaling pathway 549 16 1
5387 GO:0006954 inflammatory response 473 13 1
9623 GO:0050770 regulation of axonogenesis 99 0 1
5382 GO:0022603 regulation of anatomical structure morphogenesis 647 23 1
5385 GO:0072511 divalent inorganic cation transport 294 7 1
5384 GO:0070838 divalent metal ion transport 292 7 1
8416 GO:0002526 acute inflammatory response 96 0 1
5383 GO:0006874 cellular calcium ion homeostasis 244 5 1
5386 GO:0098609 cell-cell adhesion 148 2 1
5378 GO:0050878 regulation of body fluid levels 579 21 1
5375 GO:0051093 negative regulation of developmental process 546 19 1
9622 GO:0050768 negative regulation of neurogenesis 81 0 1
5372 GO:0050877 neurological system process 626 23 1
5379 GO:0072503 cellular divalent inorganic cation homeostasis 259 6 1
5376 GO:0040017 positive regulation of locomotion 278 7 1
5380 GO:0002443 leukocyte mediated immunity 211 4 1

Positive Effect

Over enriched categories (19)

rowN <- max(minRow, sum(goPos$q.value<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
category term numInCat numDEInCat q.value
1 GO:0019752 carboxylic acid metabolic process 829 61 0.0011949
2 GO:0006631 fatty acid metabolic process 281 30 0.0011949
4 GO:0019395 fatty acid oxidation 77 14 0.0028347
5 GO:0006635 fatty acid beta-oxidation 57 12 0.0028347
6 GO:0034440 lipid oxidation 79 14 0.0028347
7 GO:0055114 oxidation-reduction process 896 61 0.0028564
8 GO:0009062 fatty acid catabolic process 72 13 0.0046312
9 GO:0072329 monocarboxylic acid catabolic process 84 14 0.0046312
11 GO:0044255 cellular lipid metabolic process 772 55 0.0048297
12 GO:0006082 organic acid metabolic process 944 63 0.0052682
13 GO:0032787 monocarboxylic acid metabolic process 427 36 0.0052682
14 GO:0043436 oxoacid metabolic process 934 62 0.0069845
15 GO:0007005 mitochondrion organization 298 26 0.0258148
16 GO:0010565 regulation of cellular ketone metabolic process 162 18 0.0258148
19 GO:0042180 cellular ketone metabolic process 211 21 0.0258148
20 GO:0044242 cellular lipid catabolic process 132 16 0.0277316
24 GO:0030258 lipid modification 149 17 0.0301505
25 GO:0009753 response to jasmonic acid 3 3 0.0317198
26 GO:0071395 cellular response to jasmonic acid stimulus 3 3 0.0317198
29 GO:0006457 protein folding 197 19 0.0554189

Under enriched (0)

goPos <- goPos[order(goPos$under_represented_pvalue),]
rowN <- max(minRow, sum(goPos$q.value2<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
category term numInCat numDEInCat q.value2
4419 GO:0006954 inflammatory response 473 6 1
4417 GO:0009887 organ morphogenesis 691 13 1
4416 GO:0043547 positive regulation of GTPase activity 440 7 1
4415 GO:0033124 regulation of GTP catabolic process 476 8 1
4414 GO:0043087 regulation of GTPase activity 474 8 1
4418 GO:0019058 viral life cycle 303 3 1
10198 GO:2000027 regulation of organ morphogenesis 135 0 1
4411 GO:0045596 negative regulation of cell differentiation 440 7 1
4406 GO:0001501 skeletal system development 391 6 1
4413 GO:0002697 regulation of immune effector process 231 2 1
4401 GO:0009792 embryo development ending in birth or egg hatching 530 10 1
4403 GO:0051046 regulation of secretion 431 7 1
4397 GO:0043009 chordate embryonic development 524 10 1
4398 GO:0031347 regulation of defense response 462 8 1
4393 GO:0045321 leukocyte activation 561 11 1
4391 GO:0022603 regulation of anatomical structure morphogenesis 647 14 1
4404 GO:0031349 positive regulation of defense response 259 3 1
4412 GO:0044033 multi-organism metabolic process 178 1 1
4405 GO:0051047 positive regulation of secretion 215 2 1
4410 GO:0032103 positive regulation of response to external stimulus 169 1 1

Negative Effect

Over enriched categories (52)

category term numInCat numDEInCat q.value
1 GO:0006415 translational termination 95 43 0.0000000
2 GO:0006414 translational elongation 121 46 0.0000000
3 GO:0006614 SRP-dependent cotranslational protein targeting to membrane 107 43 0.0000000
4 GO:0000184 nuclear-transcribed mRNA catabolic process, nonsense-mediated decay 118 45 0.0000000
5 GO:0006613 cotranslational protein targeting to membrane 109 43 0.0000000
6 GO:0045047 protein targeting to ER 110 43 0.0000000
7 GO:0072599 establishment of protein localization to endoplasmic reticulum 111 43 0.0000000
8 GO:0006413 translational initiation 167 48 0.0000000
9 GO:0070972 protein localization to endoplasmic reticulum 128 43 0.0000000
10 GO:0019083 viral transcription 158 45 0.0000000
11 GO:0019080 viral gene expression 168 45 0.0000000
12 GO:0043624 cellular protein complex disassembly 174 46 0.0000000
13 GO:0044033 multi-organism metabolic process 178 45 0.0000000
14 GO:0000956 nuclear-transcribed mRNA catabolic process 184 46 0.0000000
15 GO:0043241 protein complex disassembly 195 47 0.0000000
16 GO:0006612 protein targeting to membrane 171 44 0.0000000
17 GO:0032984 macromolecular complex disassembly 205 47 0.0000000
18 GO:0006402 mRNA catabolic process 196 46 0.0000000
19 GO:0006401 RNA catabolic process 222 46 0.0000000
20 GO:0006412 translation 496 59 0.0000000
21 GO:0090150 establishment of protein localization to membrane 296 47 0.0000000
22 GO:0019058 viral life cycle 303 46 0.0000000
23 GO:0022411 cellular component disassembly 412 54 0.0000000
24 GO:0072657 protein localization to membrane 371 47 0.0000000
25 GO:0072594 establishment of protein localization to organelle 456 50 0.0000000
26 GO:0006605 protein targeting 497 51 0.0000000
27 GO:1902578 single-organism localization 426 47 0.0000000
28 GO:1902580 single-organism cellular localization 426 47 0.0000000
29 GO:0044802 single-organism membrane organization 638 54 0.0000000
30 GO:0033365 protein localization to organelle 603 50 0.0000000
31 GO:0016071 mRNA metabolic process 615 49 0.0000000
32 GO:0016032 viral process 674 51 0.0000000
34 GO:0044764 multi-organism cellular process 682 51 0.0000000
36 GO:0044265 cellular macromolecule catabolic process 798 56 0.0000000
37 GO:0044403 symbiosis, encompassing mutualism through parasitism 727 52 0.0000000
38 GO:0044419 interspecies interaction between organisms 727 52 0.0000000
39 GO:0061024 membrane organization 778 55 0.0000000
40 GO:0009057 macromolecule catabolic process 983 61 0.0000000
41 GO:0006886 intracellular protein transport 758 52 0.0000000
42 GO:0016482 cytoplasmic transport 825 51 0.0000000
43 GO:0042274 ribosomal small subunit biogenesis 21 9 0.0000000
55 GO:0000028 ribosomal small subunit assembly 7 5 0.0000110
58 GO:0022613 ribonucleoprotein complex biogenesis 280 22 0.0000421
63 GO:0042254 ribosome biogenesis 167 16 0.0000870
67 GO:0006364 rRNA processing 117 12 0.0010521
71 GO:0016072 rRNA metabolic process 122 12 0.0016311
80 GO:0042255 ribosome assembly 19 5 0.0041541
82 GO:0030490 maturation of SSU-rRNA 11 4 0.0076545
85 GO:0022618 ribonucleoprotein complex assembly 135 11 0.0320026
87 GO:0042273 ribosomal large subunit biogenesis 16 4 0.0320026
88 GO:0006417 regulation of translation 249 16 0.0332596
91 GO:0071826 ribonucleoprotein complex subunit organization 141 11 0.0437873

Under enriched (0)

category term numInCat numDEInCat q.value2
10960 GO:0072507 divalent inorganic cation homeostasis 275 0 1
3516 GO:0022610 biological adhesion 916 10 1
3521 GO:0006873 cellular ion homeostasis 365 1 1
3514 GO:0007155 cell adhesion 912 10 1
10958 GO:0072503 cellular divalent inorganic cation homeostasis 259 0 1
3519 GO:0048514 blood vessel morphogenesis 421 2 1
3520 GO:0030003 cellular cation homeostasis 358 1 1
9512 GO:0055074 calcium ion homeostasis 255 0 1
3515 GO:0042060 wound healing 595 5 1
4693 GO:0006874 cellular calcium ion homeostasis 244 0 1
3518 GO:0006875 cellular metal ion homeostasis 333 1 1
3512 GO:0007017 microtubule-based process 479 4 1
3517 GO:0006066 alcohol metabolic process 315 1 1
3511 GO:0045596 negative regulation of cell differentiation 440 3 1
3508 GO:0044092 negative regulation of molecular function 778 8 1
3506 GO:0007267 cell-cell signaling 915 11 1
3507 GO:0007610 behavior 486 4 1
3513 GO:0006816 calcium ion transport 276 1 1
3509 GO:0001525 angiogenesis 350 2 1
3503 GO:0050878 regulation of body fluid levels 579 6 1

Final Step: csv output

write.csv(go,file=paste("csv/", outFile,"_main.csv",sep=''), row.names=FALSE)
write.csv(goPos,file=paste("csv/", outFile,"Pos.csv",sep=''), row.names=FALSE)
write.csv(goNeg,file=paste("csv/", outFile,"Neg.csv",sep=''), row.names=FALSE)