This report has goseq results for fasting peptide when:

  1. Top 1000 genes are marked as differentially expressed
  2. Top genes with positive effect in top 1000 overall genes are marked as differentially expressed
  3. Top genes with negative effect in top 1000 overall genes are marked as differentially expressed

This report was generated on June 21 2015

Goseq results also saved in csv files located on snowwhite in directory: /net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/12junReps/csv

Step 1: Load in all the necessary data/libraries

library(goseq)
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
## Loading required package: DBI
library(qvalue)

fName <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/traits/peer_k03_fS_C_pept_all_genes.txt"
outFile <- "pept"

data <- read.table(fName, as.is=T, header=T)

gene_length_file <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/length.composite.gene.models.gencode.v19"
gene_lengths = read.table(gene_length_file, header=T, as.is=T);
gene_lengths$gene = sapply(gene_lengths$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });

data$gene <- sapply(data$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });

data <- merge(data, gene_lengths, by="gene", all.x=T)
data <- data[order(data$p.value),]
data$q.value <- qvalue(data$p.value)$qvalues
data$rank <- seq(1,length(data[,1]))

minRow <- 20

Step 2: Create genes vectors

The first vector simply marks the top 1000 genes as differentially expressed. The second and third vectors mark the genes with positive or negative effect in the top 1000 as differentially expressed.

genes <- as.numeric(data$rank <= 1000)
genesPos <- as.numeric(data$rank <= 1000 & data$effect > 0)
genesNeg <- as.numeric(data$rank <= 1000 & data$effect < 0)

names(genes) <- data$gene
names(genesPos) <- data$gene
names(genesNeg) <- data$gene

There are 553 DE genes with postive effect and 447 DE genes with negative effect.

Step 3: PWFs

pwf <- nullp(genes,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfPos=nullp(genesPos,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfNeg=nullp(genesNeg,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)

Step 4: run goseq

go <- goseq(pwf,"hg19","ensGene",test.cats=c("GO:BP"))
goPos <- goseq(pwfPos,"hg19","ensGene",test.cats=c("GO:BP"))
goNeg <- goseq(pwfNeg,"hg19","ensGene",test.cats=c("GO:BP"))

rownames(go) <- NULL
rownames(goPos) <- NULL
rownames(goNeg) <- NULL

# Fix problem with some p-values being slightly more than 1
go$over_represented_pvalue[go$over_represented_pvalue>1]=1;
go$under_represented_pvalue[go$under_represented_pvalue>1]=1;
goPos$over_represented_pvalue[goPos$over_represented_pvalue>1]=1;
goPos$under_represented_pvalue[goPos$under_represented_pvalue>1]=1;
goNeg$over_represented_pvalue[goNeg$over_represented_pvalue>1]=1;
goNeg$under_represented_pvalue[goNeg$under_represented_pvalue>1]=1;

go$q.value <- qvalue(go$over_represented_pvalue)$qvalues
goPos$q.value=qvalue(goPos$over_represented_pvalue)$qvalues
goNeg$q.value=qvalue(goNeg$over_represented_pvalue)$qvalues

go$q.value2 <- qvalue(go$under_represented_pvalue)$qvalues
goPos$q.value2=qvalue(goPos$under_represented_pvalue)$qvalues
goNeg$q.value2=qvalue(goNeg$under_represented_pvalue)$qvalues

go <- go[which(go$numInCat < 1000),]
goPos <- goPos[which(goPos$numInCat < 1000),]
goNeg <- goNeg[which(goNeg$numInCat < 1000),]

Top 1000 Results

Over enriched categories (64)

rowN <- max(minRow, sum(go$q.value<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
category term numInCat numDEInCat q.value
1 GO:0006414 translational elongation 121 53 0.0000000
2 GO:0006415 translational termination 95 47 0.0000000
3 GO:0006613 cotranslational protein targeting to membrane 109 49 0.0000000
4 GO:0045047 protein targeting to ER 110 49 0.0000000
5 GO:0072599 establishment of protein localization to endoplasmic reticulum 111 49 0.0000000
6 GO:0006614 SRP-dependent cotranslational protein targeting to membrane 107 48 0.0000000
7 GO:0070972 protein localization to endoplasmic reticulum 128 51 0.0000000
8 GO:0000184 nuclear-transcribed mRNA catabolic process, nonsense-mediated decay 118 49 0.0000000
9 GO:0006413 translational initiation 167 57 0.0000000
10 GO:0043624 cellular protein complex disassembly 174 57 0.0000000
11 GO:0043241 protein complex disassembly 195 58 0.0000000
12 GO:0019083 viral transcription 158 51 0.0000000
13 GO:0006612 protein targeting to membrane 171 53 0.0000000
14 GO:0032984 macromolecular complex disassembly 205 58 0.0000000
15 GO:0019080 viral gene expression 168 51 0.0000000
16 GO:0044033 multi-organism metabolic process 178 51 0.0000000
17 GO:0006412 translation 496 86 0.0000000
18 GO:0000956 nuclear-transcribed mRNA catabolic process 184 49 0.0000000
19 GO:0006402 mRNA catabolic process 196 50 0.0000000
20 GO:0006401 RNA catabolic process 222 51 0.0000000
21 GO:0090150 establishment of protein localization to membrane 296 58 0.0000000
22 GO:0072657 protein localization to membrane 371 66 0.0000000
23 GO:0019058 viral life cycle 303 57 0.0000000
24 GO:1902578 single-organism localization 426 68 0.0000000
25 GO:1902580 single-organism cellular localization 426 68 0.0000000
26 GO:0022411 cellular component disassembly 412 67 0.0000000
28 GO:0044802 single-organism membrane organization 638 87 0.0000000
30 GO:0072594 establishment of protein localization to organelle 456 66 0.0000000
31 GO:0006605 protein targeting 497 70 0.0000000
32 GO:0016032 viral process 674 85 0.0000000
33 GO:0061024 membrane organization 778 94 0.0000000
34 GO:0044764 multi-organism cellular process 682 85 0.0000000
35 GO:0044403 symbiosis, encompassing mutualism through parasitism 727 87 0.0000000
36 GO:0044419 interspecies interaction between organisms 727 87 0.0000000
37 GO:0033365 protein localization to organelle 603 76 0.0000000
39 GO:0006886 intracellular protein transport 758 83 0.0000028
40 GO:0016482 cytoplasmic transport 825 87 0.0000063
42 GO:0042274 ribosomal small subunit biogenesis 21 10 0.0000116
43 GO:0044265 cellular macromolecule catabolic process 798 84 0.0000132
45 GO:0016071 mRNA metabolic process 615 67 0.0000480
48 GO:0006417 regulation of translation 249 35 0.0004144
49 GO:0009057 macromolecule catabolic process 983 93 0.0004551
50 GO:0000028 ribosomal small subunit assembly 7 5 0.0021299
52 GO:0042254 ribosome biogenesis 167 24 0.0051623
53 GO:0051188 cofactor biosynthetic process 132 21 0.0058332
57 GO:0042255 ribosome assembly 19 7 0.0083930
59 GO:0006091 generation of precursor metabolites and energy 394 42 0.0099675
62 GO:0022613 ribonucleoprotein complex biogenesis 280 33 0.0126085
63 GO:0022904 respiratory electron transport chain 105 16 0.0132998
66 GO:0022900 electron transport chain 106 16 0.0146580
67 GO:0015980 energy derivation by oxidation of organic compounds 319 35 0.0199645
70 GO:0042273 ribosomal large subunit biogenesis 16 6 0.0217530
72 GO:0048199 vesicle targeting, to, from or within Golgi 28 8 0.0339006
73 GO:0036476 neuron death in response to hydrogen peroxide 3 3 0.0339006
74 GO:1903207 regulation of hydrogen peroxide-induced neuron death 3 3 0.0339006
75 GO:0051186 cofactor metabolic process 249 29 0.0383539
76 GO:0006461 protein complex assembly 932 81 0.0384390
78 GO:0070271 protein complex biogenesis 934 81 0.0391646
80 GO:0055114 oxidation-reduction process 896 76 0.0399955
81 GO:0051262 protein tetramerization 84 14 0.0399955
82 GO:0006364 rRNA processing 117 17 0.0399955
83 GO:0006090 pyruvate metabolic process 81 14 0.0399955
84 GO:0009108 coenzyme biosynthetic process 103 16 0.0482681
85 GO:0045821 positive regulation of glycolytic process 11 5 0.0482888

Under enriched (0)

go <- go[order(go$under_represented_pvalue),]
rowN <- max(minRow, sum(go$q.value2<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
category term numInCat numDEInCat q.value2
5460 GO:0045596 negative regulation of cell differentiation 440 10 1
5459 GO:0001501 skeletal system development 391 9 1
7539 GO:0010721 negative regulation of cell development 115 0 1
5457 GO:0051093 negative regulation of developmental process 546 16 1
5456 GO:0022603 regulation of anatomical structure morphogenesis 647 21 1
5458 GO:0048705 skeletal system morphogenesis 183 2 1
5455 GO:0007186 G-protein coupled receptor signaling pathway 549 16 1
7312 GO:0006023 aminoglycan biosynthetic process 96 0 1
7313 GO:0006024 glycosaminoglycan biosynthetic process 95 0 1
5449 GO:0001944 vasculature development 509 17 1
5450 GO:0022604 regulation of cell morphogenesis 341 10 1
8426 GO:0050768 negative regulation of neurogenesis 81 0 1
5444 GO:0001568 blood vessel development 480 16 1
5437 GO:0048646 anatomical structure formation involved in morphogenesis 855 35 1
5435 GO:0009887 organ morphogenesis 691 27 1
9048 GO:2000736 regulation of stem cell differentiation 78 0 1
5446 GO:0010769 regulation of cell morphogenesis involved in differentiation 214 5 1
8597 GO:0060349 bone morphogenesis 77 0 1
5432 GO:0048514 blood vessel morphogenesis 421 14 1
5454 GO:0008360 regulation of cell shape 105 1 1

Positive Effect

Over enriched categories (8)

rowN <- max(minRow, sum(goPos$q.value<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
category term numInCat numDEInCat q.value
1 GO:0022904 respiratory electron transport chain 105 15 0.0020265
2 GO:0022900 electron transport chain 106 15 0.0020265
3 GO:0045333 cellular respiration 158 18 0.0045032
4 GO:0006091 generation of precursor metabolites and energy 394 32 0.0045032
5 GO:0015980 energy derivation by oxidation of organic compounds 319 27 0.0106757
6 GO:0055114 oxidation-reduction process 896 54 0.0173210
7 GO:0042102 positive regulation of T cell proliferation 70 11 0.0206300
8 GO:0006120 mitochondrial electron transport, NADH to ubiquinone 42 8 0.0244866
9 GO:0050863 regulation of T cell activation 226 20 0.0673906
10 GO:0022898 regulation of transmembrane transporter activity 121 14 0.0707754
11 GO:0042110 T cell activation 347 26 0.0951374
12 GO:0050870 positive regulation of T cell activation 165 16 0.0951374
14 GO:0042773 ATP synthesis coupled electron transport 54 8 0.1008410
15 GO:0042775 mitochondrial ATP synthesis coupled electron transport 54 8 0.1008410
16 GO:0032409 regulation of transporter activity 130 14 0.1008410
17 GO:0042129 regulation of T cell proliferation 108 12 0.1157876
18 GO:0032412 regulation of ion transmembrane transporter activity 118 13 0.1221797
20 GO:0050671 positive regulation of lymphocyte proliferation 97 11 0.1638534
21 GO:0032946 positive regulation of mononuclear cell proliferation 98 11 0.1661351
22 GO:0070296 sarcoplasmic reticulum calcium ion transport 30 6 0.1661351

Under enriched (0)

goPos <- goPos[order(goPos$under_represented_pvalue),]
rowN <- max(minRow, sum(goPos$q.value2<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
category term numInCat numDEInCat q.value2
4293 GO:0006396 RNA processing 676 7 0.4756751
4292 GO:0016071 mRNA metabolic process 615 6 0.4756751
4294 GO:0001501 skeletal system development 391 2 0.4756751
4289 GO:0043547 positive regulation of GTPase activity 440 5 1.0000000
4284 GO:0033124 regulation of GTP catabolic process 476 6 1.0000000
4287 GO:0006397 mRNA processing 404 4 1.0000000
4283 GO:0043087 regulation of GTPase activity 474 6 1.0000000
4282 GO:0045596 negative regulation of cell differentiation 440 5 1.0000000
4280 GO:0022603 regulation of anatomical structure morphogenesis 647 10 1.0000000
4281 GO:0008380 RNA splicing 332 3 1.0000000
4288 GO:0000375 RNA splicing, via transesterification reactions 225 1 1.0000000
4275 GO:0051093 negative regulation of developmental process 546 8 1.0000000
4285 GO:0000377 RNA splicing, via transesterification reactions with bulged adenosine as nucleophile 220 1 1.0000000
4286 GO:0000398 mRNA splicing, via spliceosome 220 1 1.0000000
4267 GO:1901069 guanosine-containing compound catabolic process 681 12 1.0000000
4266 GO:0006184 GTP catabolic process 679 12 1.0000000
8098 GO:0040029 regulation of gene expression, epigenetic 143 0 1.0000000
6999 GO:0032102 negative regulation of response to external stimulus 143 0 1.0000000
4262 GO:1901068 guanosine-containing compound metabolic process 700 13 1.0000000
4271 GO:0051241 negative regulation of multicellular organismal process 306 3 1.0000000

Negative Effect

Over enriched categories (60)

category term numInCat numDEInCat q.value
1 GO:0006415 translational termination 95 46 0.0000000
2 GO:0006414 translational elongation 121 49 0.0000000
3 GO:0006614 SRP-dependent cotranslational protein targeting to membrane 107 46 0.0000000
4 GO:0000184 nuclear-transcribed mRNA catabolic process, nonsense-mediated decay 118 48 0.0000000
5 GO:0006613 cotranslational protein targeting to membrane 109 46 0.0000000
6 GO:0045047 protein targeting to ER 110 46 0.0000000
7 GO:0072599 establishment of protein localization to endoplasmic reticulum 111 46 0.0000000
8 GO:0006413 translational initiation 167 53 0.0000000
9 GO:0070972 protein localization to endoplasmic reticulum 128 46 0.0000000
10 GO:0019083 viral transcription 158 48 0.0000000
11 GO:0019080 viral gene expression 168 48 0.0000000
12 GO:0006612 protein targeting to membrane 171 48 0.0000000
13 GO:0044033 multi-organism metabolic process 178 48 0.0000000
14 GO:0043624 cellular protein complex disassembly 174 48 0.0000000
15 GO:0000956 nuclear-transcribed mRNA catabolic process 184 48 0.0000000
16 GO:0043241 protein complex disassembly 195 49 0.0000000
17 GO:0006402 mRNA catabolic process 196 49 0.0000000
18 GO:0032984 macromolecular complex disassembly 205 49 0.0000000
19 GO:0006401 RNA catabolic process 222 49 0.0000000
20 GO:0006412 translation 496 64 0.0000000
21 GO:0090150 establishment of protein localization to membrane 296 51 0.0000000
22 GO:0019058 viral life cycle 303 50 0.0000000
23 GO:0072657 protein localization to membrane 371 52 0.0000000
24 GO:0006605 protein targeting 497 59 0.0000000
25 GO:0072594 establishment of protein localization to organelle 456 56 0.0000000
26 GO:0022411 cellular component disassembly 412 54 0.0000000
27 GO:1902578 single-organism localization 426 54 0.0000000
28 GO:1902580 single-organism cellular localization 426 54 0.0000000
29 GO:0016071 mRNA metabolic process 615 61 0.0000000
30 GO:0044802 single-organism membrane organization 638 60 0.0000000
31 GO:0033365 protein localization to organelle 603 58 0.0000000
34 GO:0016032 viral process 674 58 0.0000000
35 GO:0044764 multi-organism cellular process 682 58 0.0000000
36 GO:0044403 symbiosis, encompassing mutualism through parasitism 727 59 0.0000000
37 GO:0044419 interspecies interaction between organisms 727 59 0.0000000
38 GO:0061024 membrane organization 778 61 0.0000000
39 GO:0006886 intracellular protein transport 758 59 0.0000000
40 GO:0016482 cytoplasmic transport 825 61 0.0000000
41 GO:0044265 cellular macromolecule catabolic process 798 60 0.0000000
42 GO:0009057 macromolecule catabolic process 983 64 0.0000000
43 GO:0022613 ribonucleoprotein complex biogenesis 280 30 0.0000000
44 GO:0042274 ribosomal small subunit biogenesis 21 10 0.0000000
45 GO:0042254 ribosome biogenesis 167 22 0.0000000
71 GO:0006364 rRNA processing 117 16 0.0000021
73 GO:0016072 rRNA metabolic process 122 16 0.0000040
76 GO:0000028 ribosomal small subunit assembly 7 5 0.0000120
88 GO:0042273 ribosomal large subunit biogenesis 16 6 0.0000878
90 GO:0006417 regulation of translation 249 21 0.0002159
91 GO:0042255 ribosome assembly 19 6 0.0002984
93 GO:0001731 formation of translation preinitiation complex 18 6 0.0003773
94 GO:0034470 ncRNA processing 235 19 0.0003801
97 GO:0022618 ribonucleoprotein complex assembly 135 14 0.0007262
100 GO:0071826 ribonucleoprotein complex subunit organization 141 14 0.0011664
106 GO:0051259 protein oligomerization 341 22 0.0059363
107 GO:0030490 maturation of SSU-rRNA 11 4 0.0084529
111 GO:0006396 RNA processing 676 33 0.0156462
112 GO:0045821 positive regulation of glycolytic process 11 4 0.0175798
113 GO:0051262 protein tetramerization 84 9 0.0242953
114 GO:0034660 ncRNA metabolic process 325 19 0.0276720
115 GO:0000027 ribosomal large subunit assembly 2 2 0.0480523

Under enriched (0)

category term numInCat numDEInCat q.value2
3741 GO:0022610 biological adhesion 916 11 1
3740 GO:0007155 cell adhesion 912 11 1
3742 GO:0007186 G-protein coupled receptor signaling pathway 549 4 1
3739 GO:0007268 synaptic transmission 566 5 1
3736 GO:0007267 cell-cell signaling 915 11 1
3738 GO:0072507 divalent inorganic cation homeostasis 275 1 1
3734 GO:0098602 single organism cell adhesion 327 2 1
3730 GO:0001944 vasculature development 509 5 1
3737 GO:0071396 cellular response to lipid 275 1 1
6752 GO:0032496 response to lipopolysaccharide 200 0 1
3731 GO:0006875 cellular metal ion homeostasis 333 2 1
3735 GO:0072503 cellular divalent inorganic cation homeostasis 259 1 1
3733 GO:0055074 calcium ion homeostasis 255 1 1
3726 GO:0001568 blood vessel development 480 5 1
3716 GO:0002682 regulation of immune system process 997 14 1
3724 GO:0007610 behavior 486 5 1
3715 GO:0048646 anatomical structure formation involved in morphogenesis 855 13 1
3728 GO:0006066 alcohol metabolic process 315 2 1
3725 GO:0048514 blood vessel morphogenesis 421 4 1
3719 GO:0033993 response to lipid 568 6 1

Final Step: csv output

write.csv(go,file=paste("csv/", outFile,"_main.csv",sep=''), row.names=FALSE)
write.csv(goPos,file=paste("csv/", outFile,"Pos.csv",sep=''), row.names=FALSE)
write.csv(goNeg,file=paste("csv/", outFile,"Neg.csv",sep=''), row.names=FALSE)