This report has goseq results for 30 minute peptide when:

  1. Top 1000 genes are marked as differentially expressed
  2. Top genes with positive effect in top 1000 overall genes are marked as differentially expressed
  3. Top genes with negative effect in top 1000 overall genes are marked as differentially expressed

This report was generated on June 21 2015

Goseq results also saved in csv files located on snowwhite in directory: /net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/12junReps/csv

Step 1: Load in all the necessary data/libraries

library(goseq)
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
## Loading required package: DBI
library(qvalue)

fName <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/traits/peer_k03_fS_C_pept_30_all_genes.txt"
outFile <- "pept_30"

data <- read.table(fName, as.is=T, header=T)

gene_length_file <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/length.composite.gene.models.gencode.v19"
gene_lengths = read.table(gene_length_file, header=T, as.is=T);
gene_lengths$gene = sapply(gene_lengths$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });

data$gene <- sapply(data$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });

data <- merge(data, gene_lengths, by="gene", all.x=T)
data <- data[order(data$p.value),]
data$q.value <- qvalue(data$p.value)$qvalues
data$rank <- seq(1,length(data[,1]))

minRow <- 20

Step 2: Create genes vectors

The first vector simply marks the top 1000 genes as differentially expressed. The second and third vectors mark the genes with positive or negative effect in the top 1000 as differentially expressed.

genes <- as.numeric(data$rank <= 1000)
genesPos <- as.numeric(data$rank <= 1000 & data$effect > 0)
genesNeg <- as.numeric(data$rank <= 1000 & data$effect < 0)

names(genes) <- data$gene
names(genesPos) <- data$gene
names(genesNeg) <- data$gene

There are 507 DE genes with postive effect and 493 DE genes with negative effect.

Step 3: PWFs

pwf <- nullp(genes,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfPos=nullp(genesPos,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfNeg=nullp(genesNeg,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)

Step 4: run goseq

go <- goseq(pwf,"hg19","ensGene",test.cats=c("GO:BP"))
goPos <- goseq(pwfPos,"hg19","ensGene",test.cats=c("GO:BP"))
goNeg <- goseq(pwfNeg,"hg19","ensGene",test.cats=c("GO:BP"))

rownames(go) <- NULL
rownames(goPos) <- NULL
rownames(goNeg) <- NULL

# Fix problem with some p-values being slightly more than 1
go$over_represented_pvalue[go$over_represented_pvalue>1]=1;
go$under_represented_pvalue[go$under_represented_pvalue>1]=1;
goPos$over_represented_pvalue[goPos$over_represented_pvalue>1]=1;
goPos$under_represented_pvalue[goPos$under_represented_pvalue>1]=1;
goNeg$over_represented_pvalue[goNeg$over_represented_pvalue>1]=1;
goNeg$under_represented_pvalue[goNeg$under_represented_pvalue>1]=1;

go$q.value <- qvalue(go$over_represented_pvalue)$qvalues
goPos$q.value=qvalue(goPos$over_represented_pvalue)$qvalues
goNeg$q.value=qvalue(goNeg$over_represented_pvalue)$qvalues

go$q.value2 <- qvalue(go$under_represented_pvalue)$qvalues
goPos$q.value2=qvalue(goPos$under_represented_pvalue)$qvalues
goNeg$q.value2=qvalue(goNeg$under_represented_pvalue)$qvalues

go <- go[which(go$numInCat < 1000),]
goPos <- goPos[which(goPos$numInCat < 1000),]
goNeg <- goNeg[which(goNeg$numInCat < 1000),]

Top 1000 Results

Over enriched categories (45)

rowN <- max(minRow, sum(go$q.value<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
category term numInCat numDEInCat q.value
1 GO:0006415 translational termination 95 34 0.0000000
2 GO:0006413 translational initiation 167 45 0.0000000
3 GO:0006414 translational elongation 121 38 0.0000000
4 GO:0006614 SRP-dependent cotranslational protein targeting to membrane 107 35 0.0000000
5 GO:0006613 cotranslational protein targeting to membrane 109 35 0.0000000
6 GO:0070972 protein localization to endoplasmic reticulum 128 38 0.0000000
7 GO:0045047 protein targeting to ER 110 35 0.0000000
8 GO:0072599 establishment of protein localization to endoplasmic reticulum 111 35 0.0000000
9 GO:0000184 nuclear-transcribed mRNA catabolic process, nonsense-mediated decay 118 36 0.0000000
10 GO:0019083 viral transcription 158 39 0.0000000
11 GO:0019080 viral gene expression 168 39 0.0000000
12 GO:0044033 multi-organism metabolic process 178 39 0.0000000
13 GO:0043241 protein complex disassembly 195 41 0.0000000
14 GO:0043624 cellular protein complex disassembly 174 38 0.0000000
15 GO:0006612 protein targeting to membrane 171 37 0.0000000
16 GO:0032984 macromolecular complex disassembly 205 41 0.0000000
17 GO:0000956 nuclear-transcribed mRNA catabolic process 184 37 0.0000000
18 GO:0006402 mRNA catabolic process 196 38 0.0000000
19 GO:0006412 translation 496 65 0.0000001
20 GO:0006401 RNA catabolic process 222 39 0.0000001
21 GO:0072657 protein localization to membrane 371 51 0.0000022
22 GO:0019058 viral life cycle 303 44 0.0000025
25 GO:0090150 establishment of protein localization to membrane 296 42 0.0000121
26 GO:0006091 generation of precursor metabolites and energy 394 50 0.0000222
27 GO:1902578 single-organism localization 426 53 0.0000294
28 GO:1902580 single-organism cellular localization 426 53 0.0000294
29 GO:0022411 cellular component disassembly 412 52 0.0000369
30 GO:0072594 establishment of protein localization to organelle 456 54 0.0000889
31 GO:0044802 single-organism membrane organization 638 69 0.0000998
32 GO:0015980 energy derivation by oxidation of organic compounds 319 42 0.0001068
33 GO:0055114 oxidation-reduction process 896 86 0.0003971
34 GO:0006605 protein targeting 497 55 0.0010953
35 GO:0061024 membrane organization 778 77 0.0011234
36 GO:0016482 cytoplasmic transport 825 80 0.0017958
37 GO:0033365 protein localization to organelle 603 62 0.0030018
39 GO:0006090 pyruvate metabolic process 81 16 0.0066933
42 GO:0022613 ribonucleoprotein complex biogenesis 280 34 0.0098661
43 GO:0019752 carboxylic acid metabolic process 829 77 0.0108823
44 GO:0001731 formation of translation preinitiation complex 18 7 0.0119945
45 GO:0042254 ribosome biogenesis 167 23 0.0208033
47 GO:0009135 purine nucleoside diphosphate metabolic process 15 6 0.0262367
48 GO:0009179 purine ribonucleoside diphosphate metabolic process 15 6 0.0262367
49 GO:0016071 mRNA metabolic process 615 59 0.0269584
50 GO:1901605 alpha-amino acid metabolic process 187 25 0.0277981
51 GO:0006886 intracellular protein transport 758 69 0.0473734

Under enriched (0)

go <- go[order(go$under_represented_pvalue),]
rowN <- max(minRow, sum(go$q.value2<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
category term numInCat numDEInCat q.value2
5087 GO:0022603 regulation of anatomical structure morphogenesis 647 17 0.2155844
5083 GO:0007186 G-protein coupled receptor signaling pathway 549 15 1.0000000
5076 GO:0000902 cell morphogenesis 993 39 1.0000000
5085 GO:0010720 positive regulation of cell development 178 2 1.0000000
5080 GO:0045596 negative regulation of cell differentiation 440 12 1.0000000
5084 GO:0048705 skeletal system morphogenesis 183 2 1.0000000
5079 GO:0001501 skeletal system development 391 10 1.0000000
5073 GO:0022604 regulation of cell morphogenesis 341 9 1.0000000
5078 GO:0090305 nucleic acid phosphodiester bond hydrolysis 200 3 1.0000000
5067 GO:0016477 cell migration 887 34 1.0000000
5065 GO:0051093 negative regulation of developmental process 546 18 1.0000000
5064 GO:0060284 regulation of cell development 552 19 1.0000000
5060 GO:0032990 cell part morphogenesis 718 28 1.0000000
5081 GO:0050769 positive regulation of neurogenesis 121 1 1.0000000
5057 GO:0048870 cell motility 948 38 1.0000000
5058 GO:0051674 localization of cell 948 38 1.0000000
5063 GO:0006954 inflammatory response 473 14 1.0000000
5052 GO:0030030 cell projection organization 991 42 1.0000000
5056 GO:0051094 positive regulation of developmental process 705 26 1.0000000
5055 GO:0048646 anatomical structure formation involved in morphogenesis 855 34 1.0000000

Positive Effect

Over enriched categories (8)

rowN <- max(minRow, sum(goPos$q.value<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
category term numInCat numDEInCat q.value
GO:0006091 generation of precursor metabolites and energy 394 36 0.0000161
GO:0015980 energy derivation by oxidation of organic compounds 319 31 0.0000314
GO:0022904 respiratory electron transport chain 105 15 0.0003065
GO:0022900 electron transport chain 106 15 0.0003065
GO:0045333 cellular respiration 158 18 0.0007839
GO:0055114 oxidation-reduction process 896 53 0.0025176
GO:0006120 mitochondrial electron transport, NADH to ubiquinone 42 8 0.0150136
GO:0022898 regulation of transmembrane transporter activity 121 14 0.0415072
GO:0009135 purine nucleoside diphosphate metabolic process 15 5 0.0613727
GO:0009179 purine ribonucleoside diphosphate metabolic process 15 5 0.0613727
GO:0032409 regulation of transporter activity 130 14 0.0630310
GO:0042773 ATP synthesis coupled electron transport 54 8 0.0630310
GO:0042775 mitochondrial ATP synthesis coupled electron transport 54 8 0.0630310
GO:0032412 regulation of ion transmembrane transporter activity 118 13 0.0791484
GO:0046031 ADP metabolic process 10 4 0.1045462
GO:0009185 ribonucleoside diphosphate metabolic process 18 5 0.1126198
GO:0070296 sarcoplasmic reticulum calcium ion transport 30 6 0.1536909
GO:0051592 response to calcium ion 82 10 0.1718132
GO:0006119 oxidative phosphorylation 70 8 0.3113769
GO:0009132 nucleoside diphosphate metabolic process 37 6 0.3956959

Under enriched (0)

goPos <- goPos[order(goPos$under_represented_pvalue),]
rowN <- max(minRow, sum(goPos$q.value2<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
category term numInCat numDEInCat q.value2
5277 GO:0008380 RNA splicing 332 0 0.1032286
3907 GO:0016071 mRNA metabolic process 615 4 0.1032286
3905 GO:0006396 RNA processing 676 7 0.6047556
3900 GO:0045944 positive regulation of transcription from RNA polymerase II promoter 719 9 0.6047556
3904 GO:0007346 regulation of mitotic cell cycle 358 2 0.6047556
3960 GO:0000375 RNA splicing, via transesterification reactions 225 0 0.6047556
3961 GO:0000377 RNA splicing, via transesterification reactions with bulged adenosine as nucleophile 220 0 0.6047556
3970 GO:0000398 mRNA splicing, via spliceosome 220 0 0.6047556
3896 GO:0044403 symbiosis, encompassing mutualism through parasitism 727 9 0.6047556
3897 GO:0044419 interspecies interaction between organisms 727 9 0.6047556
3901 GO:0006397 mRNA processing 404 3 0.6137702
3903 GO:0022613 ribonucleoprotein complex biogenesis 280 1 0.8224909
11283 GO:0090305 nucleic acid phosphodiester bond hydrolysis 200 0 0.8458512
3884 GO:0044764 multi-organism cellular process 682 9 1.0000000
6042 GO:0018205 peptidyl-lysine modification 181 0 1.0000000
3883 GO:0022603 regulation of anatomical structure morphogenesis 647 9 1.0000000
3882 GO:0016032 viral process 674 9 1.0000000
3891 GO:0044843 cell cycle G1/S phase transition 238 1 1.0000000
3889 GO:0000082 G1/S transition of mitotic cell cycle 236 1 1.0000000
3876 GO:0045596 negative regulation of cell differentiation 440 5 1.0000000

Negative Effect

Over enriched categories (55)

category term numInCat numDEInCat q.value
1 GO:0006413 translational initiation 167 43 0.0000000
2 GO:0006415 translational termination 95 34 0.0000000
3 GO:0006414 translational elongation 121 36 0.0000000
4 GO:0006614 SRP-dependent cotranslational protein targeting to membrane 107 34 0.0000000
5 GO:0006613 cotranslational protein targeting to membrane 109 34 0.0000000
6 GO:0045047 protein targeting to ER 110 34 0.0000000
7 GO:0072599 establishment of protein localization to endoplasmic reticulum 111 34 0.0000000
8 GO:0000184 nuclear-transcribed mRNA catabolic process, nonsense-mediated decay 118 35 0.0000000
9 GO:0019083 viral transcription 158 38 0.0000000
10 GO:0070972 protein localization to endoplasmic reticulum 128 34 0.0000000
11 GO:0019080 viral gene expression 168 38 0.0000000
12 GO:0044033 multi-organism metabolic process 178 38 0.0000000
13 GO:0043241 protein complex disassembly 195 38 0.0000000
14 GO:0032984 macromolecular complex disassembly 205 38 0.0000000
15 GO:0006612 protein targeting to membrane 171 35 0.0000000
16 GO:0000956 nuclear-transcribed mRNA catabolic process 184 36 0.0000000
17 GO:0006402 mRNA catabolic process 196 37 0.0000000
18 GO:0043624 cellular protein complex disassembly 174 35 0.0000000
19 GO:0006401 RNA catabolic process 222 38 0.0000000
20 GO:0006412 translation 496 54 0.0000000
21 GO:0019058 viral life cycle 303 41 0.0000000
22 GO:0090150 establishment of protein localization to membrane 296 37 0.0000000
23 GO:0016071 mRNA metabolic process 615 55 0.0000000
25 GO:0022411 cellular component disassembly 412 43 0.0000000
26 GO:0072594 establishment of protein localization to organelle 456 45 0.0000000
32 GO:0072657 protein localization to membrane 371 39 0.0000000
33 GO:0006605 protein targeting 497 46 0.0000000
38 GO:0022613 ribonucleoprotein complex biogenesis 280 33 0.0000000
41 GO:1902578 single-organism localization 426 41 0.0000000
42 GO:1902580 single-organism cellular localization 426 41 0.0000000
46 GO:0016032 viral process 674 52 0.0000000
48 GO:0044764 multi-organism cellular process 682 52 0.0000000
51 GO:0033365 protein localization to organelle 603 47 0.0000000
53 GO:0016482 cytoplasmic transport 825 57 0.0000000
55 GO:0044403 symbiosis, encompassing mutualism through parasitism 727 52 0.0000000
56 GO:0044419 interspecies interaction between organisms 727 52 0.0000000
57 GO:0044802 single-organism membrane organization 638 48 0.0000001
59 GO:0042254 ribosome biogenesis 167 22 0.0000002
61 GO:0061024 membrane organization 778 51 0.0000017
62 GO:0006886 intracellular protein transport 758 49 0.0000058
67 GO:0006364 rRNA processing 117 16 0.0000212
72 GO:0016072 rRNA metabolic process 122 16 0.0000364
75 GO:0001731 formation of translation preinitiation complex 18 7 0.0000549
76 GO:0044265 cellular macromolecule catabolic process 798 48 0.0000826
78 GO:0022618 ribonucleoprotein complex assembly 135 15 0.0009374
79 GO:0009057 macromolecule catabolic process 983 52 0.0014788
80 GO:0006417 regulation of translation 249 21 0.0015340
81 GO:0071826 ribonucleoprotein complex subunit organization 141 15 0.0015606
82 GO:0042274 ribosomal small subunit biogenesis 21 6 0.0019218
83 GO:0006446 regulation of translational initiation 67 10 0.0022584
85 GO:0034470 ncRNA processing 235 19 0.0040762
87 GO:0042273 ribosomal large subunit biogenesis 16 5 0.0062276
93 GO:0006396 RNA processing 676 37 0.0110453
97 GO:0034660 ncRNA metabolic process 325 22 0.0136102
102 GO:0051259 protein oligomerization 341 22 0.0362849

Under enriched (0)

category term numInCat numDEInCat q.value2
3518 GO:0000902 cell morphogenesis 993 12 0.2778901
3515 GO:0007267 cell-cell signaling 915 11 0.5737586
3509 GO:0002682 regulation of immune system process 997 13 0.7592892
3510 GO:0022610 biological adhesion 916 12 0.7592892
3508 GO:0007155 cell adhesion 912 12 0.7592892
3512 GO:0050877 neurological system process 626 6 0.7592892
3516 GO:0030900 forebrain development 289 1 0.9668036
3507 GO:0007186 G-protein coupled receptor signaling pathway 549 5 0.9668036
3505 GO:0032990 cell part morphogenesis 718 9 0.9668036
3504 GO:0007268 synaptic transmission 566 6 1.0000000
3501 GO:0032940 secretion by cell 682 8 1.0000000
3498 GO:0048858 cell projection morphogenesis 699 9 1.0000000
3494 GO:0000904 cell morphogenesis involved in differentiation 701 9 1.0000000
3488 GO:0050776 regulation of immune response 666 8 1.0000000
6607 GO:0032496 response to lipopolysaccharide 200 0 1.0000000
3489 GO:0033993 response to lipid 568 6 1.0000000
3484 GO:0022603 regulation of anatomical structure morphogenesis 647 8 1.0000000
3490 GO:0035295 tube development 490 5 1.0000000
3503 GO:0007600 sensory perception 334 2 1.0000000
3486 GO:0007017 microtubule-based process 479 5 1.0000000

Final Step: csv output

write.csv(go,file=paste("csv/", outFile,"_main.csv",sep=''), row.names=FALSE)
write.csv(goPos,file=paste("csv/", outFile,"Pos.csv",sep=''), row.names=FALSE)
write.csv(goNeg,file=paste("csv/", outFile,"Neg.csv",sep=''), row.names=FALSE)