This report has goseq results for 30 minute insulin when:

  1. Top 1000 genes are marked as differentially expressed
  2. Top genes with positive effect in top 1000 overall genes are marked as differentially expressed
  3. Top genes with negative effect in top 1000 overall genes are marked as differentially expressed

This report was generated on June 21 2015

Goseq results also saved in csv files located on snowwhite in directory: /net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/12junReps/csv

Step 1: Load in all the necessary data/libraries

library(goseq)
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
## Loading required package: DBI
library(qvalue)

fName <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/traits/peer_k03_S_Insu_30_all_genes.txt"
outFile <- "S_Insu_30"

data <- read.table(fName, as.is=T, header=T)

gene_length_file <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/length.composite.gene.models.gencode.v19"
gene_lengths = read.table(gene_length_file, header=T, as.is=T);
gene_lengths$gene = sapply(gene_lengths$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });

data$gene <- sapply(data$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });

data <- merge(data, gene_lengths, by="gene", all.x=T)
data <- data[order(data$p.value),]
data$q.value <- qvalue(data$p.value)$qvalues
data$rank <- seq(1,length(data[,1]))

minRow <- 20

Step 2: Create genes vectors

The first vector simply marks the top 1000 genes as differentially expressed. The second and third vectors mark the genes with positive or negative effect in the top 1000 as differentially expressed.

genes <- as.numeric(data$rank <= 1000)
genesPos <- as.numeric(data$rank <= 1000 & data$effect > 0)
genesNeg <- as.numeric(data$rank <= 1000 & data$effect < 0)

names(genes) <- data$gene
names(genesPos) <- data$gene
names(genesNeg) <- data$gene

There are 503 DE genes with postive effect and 497 DE genes with negative effect.

Step 3: PWFs

pwf <- nullp(genes,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfPos=nullp(genesPos,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfNeg=nullp(genesNeg,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)

Step 4: run goseq

go <- goseq(pwf,"hg19","ensGene",test.cats=c("GO:BP"))
goPos <- goseq(pwfPos,"hg19","ensGene",test.cats=c("GO:BP"))
goNeg <- goseq(pwfNeg,"hg19","ensGene",test.cats=c("GO:BP"))

rownames(go) <- NULL
rownames(goPos) <- NULL
rownames(goNeg) <- NULL

# Fix problem with some p-values being slightly more than 1
go$over_represented_pvalue[go$over_represented_pvalue>1]=1;
go$under_represented_pvalue[go$under_represented_pvalue>1]=1;
goPos$over_represented_pvalue[goPos$over_represented_pvalue>1]=1;
goPos$under_represented_pvalue[goPos$under_represented_pvalue>1]=1;
goNeg$over_represented_pvalue[goNeg$over_represented_pvalue>1]=1;
goNeg$under_represented_pvalue[goNeg$under_represented_pvalue>1]=1;

go$q.value <- qvalue(go$over_represented_pvalue)$qvalues
goPos$q.value=qvalue(goPos$over_represented_pvalue)$qvalues
goNeg$q.value=qvalue(goNeg$over_represented_pvalue)$qvalues

go$q.value2 <- qvalue(go$under_represented_pvalue)$qvalues
goPos$q.value2=qvalue(goPos$under_represented_pvalue)$qvalues
goNeg$q.value2=qvalue(goNeg$under_represented_pvalue)$qvalues

go <- go[which(go$numInCat < 1000),]
goPos <- goPos[which(goPos$numInCat < 1000),]
goNeg <- goNeg[which(goNeg$numInCat < 1000),]

Top 1000 Results

Over enriched categories (45)

rowN <- max(minRow, sum(go$q.value<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
category term numInCat numDEInCat q.value
1 GO:0006413 translational initiation 167 37 0.0000000
2 GO:0006414 translational elongation 121 30 0.0000000
3 GO:0006415 translational termination 95 26 0.0000000
4 GO:0070972 protein localization to endoplasmic reticulum 128 30 0.0000000
5 GO:0006614 SRP-dependent cotranslational protein targeting to membrane 107 27 0.0000000
6 GO:0006613 cotranslational protein targeting to membrane 109 27 0.0000000
7 GO:0045047 protein targeting to ER 110 27 0.0000000
8 GO:0072599 establishment of protein localization to endoplasmic reticulum 111 27 0.0000000
9 GO:0000184 nuclear-transcribed mRNA catabolic process, nonsense-mediated decay 118 27 0.0000002
10 GO:0006091 generation of precursor metabolites and energy 394 54 0.0000010
11 GO:0019083 viral transcription 158 30 0.0000020
12 GO:0043241 protein complex disassembly 195 34 0.0000036
13 GO:0055114 oxidation-reduction process 896 93 0.0000051
14 GO:0019080 viral gene expression 168 30 0.0000072
15 GO:0006412 translation 496 60 0.0000092
16 GO:0032984 macromolecular complex disassembly 205 34 0.0000095
17 GO:0044033 multi-organism metabolic process 178 30 0.0000249
18 GO:0043624 cellular protein complex disassembly 174 30 0.0000249
19 GO:0015980 energy derivation by oxidation of organic compounds 319 43 0.0000529
20 GO:0019752 carboxylic acid metabolic process 829 84 0.0001140
23 GO:0006612 protein targeting to membrane 171 28 0.0001406
25 GO:0006090 pyruvate metabolic process 81 18 0.0004532
26 GO:0000956 nuclear-transcribed mRNA catabolic process 184 28 0.0007075
27 GO:0043436 oxoacid metabolic process 934 89 0.0008013
28 GO:0006082 organic acid metabolic process 944 90 0.0008013
29 GO:0006402 mRNA catabolic process 196 29 0.0008278
30 GO:0044724 single-organism carbohydrate catabolic process 124 22 0.0012384
31 GO:0032787 monocarboxylic acid metabolic process 427 49 0.0018611
35 GO:0016052 carbohydrate catabolic process 131 22 0.0027407
37 GO:0006401 RNA catabolic process 222 30 0.0029237
38 GO:0006096 glycolytic process 57 13 0.0073882
39 GO:0045333 cellular respiration 158 22 0.0104354
40 GO:0022411 cellular component disassembly 412 45 0.0122468
41 GO:0051186 cofactor metabolic process 249 31 0.0126724
43 GO:0022904 respiratory electron transport chain 105 16 0.0248748
44 GO:0022900 electron transport chain 106 16 0.0278997
45 GO:0009135 purine nucleoside diphosphate metabolic process 15 6 0.0279785
46 GO:0009179 purine ribonucleoside diphosphate metabolic process 15 6 0.0279785
47 GO:0006732 coenzyme metabolic process 194 25 0.0372085
49 GO:0022613 ribonucleoprotein complex biogenesis 280 32 0.0407275
50 GO:0033539 fatty acid beta-oxidation using acyl-CoA dehydrogenase 6 4 0.0408517
51 GO:0072657 protein localization to membrane 371 39 0.0442825
52 GO:0042254 ribosome biogenesis 167 22 0.0449407
53 GO:0006120 mitochondrial electron transport, NADH to ubiquinone 42 9 0.0449407
54 GO:0019058 viral life cycle 303 33 0.0483598

Under enriched (0)

go <- go[order(go$under_represented_pvalue),]
rowN <- max(minRow, sum(go$q.value2<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
category term numInCat numDEInCat q.value2
5228 GO:0022603 regulation of anatomical structure morphogenesis 647 19 1
5227 GO:0007186 G-protein coupled receptor signaling pathway 549 14 1
5226 GO:0045596 negative regulation of cell differentiation 440 12 1
5224 GO:0051093 negative regulation of developmental process 546 17 1
5220 GO:0001501 skeletal system development 391 11 1
5218 GO:0000902 cell morphogenesis 993 41 1
5212 GO:0001944 vasculature development 509 17 1
5215 GO:0048514 blood vessel morphogenesis 421 13 1
5222 GO:0048705 skeletal system morphogenesis 183 3 1
5221 GO:0010720 positive regulation of cell development 178 3 1
5210 GO:0001568 blood vessel development 480 16 1
5211 GO:0001525 angiogenesis 350 10 1
5217 GO:0009615 response to virus 261 6 1
5213 GO:0090305 nucleic acid phosphodiester bond hydrolysis 200 4 1
5202 GO:0046903 secretion 769 30 1
5216 GO:0051607 defense response to virus 173 3 1
8899 GO:0060349 bone morphogenesis 77 0 1
5200 GO:0032940 secretion by cell 682 26 1
5219 GO:0007224 smoothened signaling pathway 105 1 1
5204 GO:0042493 response to drug 312 9 1

Positive Effect

Over enriched categories (18)

rowN <- max(minRow, sum(goPos$q.value<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
category term numInCat numDEInCat q.value
1 GO:0006091 generation of precursor metabolites and energy 394 38 0.0000010
2 GO:0055114 oxidation-reduction process 896 58 0.0000559
3 GO:0015980 energy derivation by oxidation of organic compounds 319 30 0.0000733
4 GO:0045333 cellular respiration 158 19 0.0002804
5 GO:0022904 respiratory electron transport chain 105 15 0.0003436
6 GO:0022900 electron transport chain 106 15 0.0003436
7 GO:0006120 mitochondrial electron transport, NADH to ubiquinone 42 9 0.0020776
8 GO:0019752 carboxylic acid metabolic process 829 48 0.0143048
9 GO:0042773 ATP synthesis coupled electron transport 54 9 0.0143048
10 GO:0042775 mitochondrial ATP synthesis coupled electron transport 54 9 0.0143048
11 GO:0033539 fatty acid beta-oxidation using acyl-CoA dehydrogenase 6 4 0.0143048
12 GO:0032787 monocarboxylic acid metabolic process 427 30 0.0181175
13 GO:0044724 single-organism carbohydrate catabolic process 124 14 0.0219200
15 GO:0043436 oxoacid metabolic process 934 51 0.0241844
16 GO:0006082 organic acid metabolic process 944 51 0.0301276
17 GO:0016052 carbohydrate catabolic process 131 14 0.0318524
18 GO:0009135 purine nucleoside diphosphate metabolic process 15 5 0.0318524
19 GO:0009179 purine ribonucleoside diphosphate metabolic process 15 5 0.0318524
20 GO:0006119 oxidative phosphorylation 70 9 0.0681726
21 GO:0046031 ADP metabolic process 10 4 0.0737939

Under enriched (1)

goPos <- goPos[order(goPos$under_represented_pvalue),]
rowN <- max(minRow, sum(goPos$q.value2<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
category term numInCat numDEInCat q.value2
4045 GO:0016071 mRNA metabolic process 615 3 0.0317998
5393 GO:0008380 RNA splicing 332 0 0.1105265
4042 GO:0006396 RNA processing 676 6 0.3368223
4097 GO:0000375 RNA splicing, via transesterification reactions 225 0 1.0000000
4098 GO:0000377 RNA splicing, via transesterification reactions with bulged adenosine as nucleophile 220 0 1.0000000
4107 GO:0000398 mRNA splicing, via spliceosome 220 0 1.0000000
4999 GO:0006401 RNA catabolic process 222 0 1.0000000
4040 GO:0006397 mRNA processing 404 3 1.0000000
4041 GO:0022613 ribonucleoprotein complex biogenesis 280 1 1.0000000
11297 GO:0090305 nucleic acid phosphodiester bond hydrolysis 200 0 1.0000000
5000 GO:0006402 mRNA catabolic process 196 0 1.0000000
4032 GO:0045944 positive regulation of transcription from RNA polymerase II promoter 719 10 1.0000000
4029 GO:0044403 symbiosis, encompassing mutualism through parasitism 727 10 1.0000000
4030 GO:0044419 interspecies interaction between organisms 727 10 1.0000000
4026 GO:0044265 cellular macromolecule catabolic process 798 12 1.0000000
4157 GO:0000956 nuclear-transcribed mRNA catabolic process 184 0 1.0000000
4033 GO:0007346 regulation of mitotic cell cycle 358 3 1.0000000
4038 GO:0044843 cell cycle G1/S phase transition 238 1 1.0000000
4037 GO:0000082 G1/S transition of mitotic cell cycle 236 1 1.0000000
4031 GO:0019058 viral life cycle 303 2 1.0000000

Negative Effect

Over enriched categories (52)

category term numInCat numDEInCat q.value
1 GO:0006413 translational initiation 167 35 0.0000000
2 GO:0006415 translational termination 95 26 0.0000000
3 GO:0006414 translational elongation 121 28 0.0000000
4 GO:0006614 SRP-dependent cotranslational protein targeting to membrane 107 26 0.0000000
5 GO:0006613 cotranslational protein targeting to membrane 109 26 0.0000000
6 GO:0045047 protein targeting to ER 110 26 0.0000000
7 GO:0072599 establishment of protein localization to endoplasmic reticulum 111 26 0.0000000
8 GO:0000184 nuclear-transcribed mRNA catabolic process, nonsense-mediated decay 118 27 0.0000000
9 GO:0019083 viral transcription 158 29 0.0000000
10 GO:0070972 protein localization to endoplasmic reticulum 128 26 0.0000000
11 GO:0019080 viral gene expression 168 29 0.0000000
12 GO:0044033 multi-organism metabolic process 178 29 0.0000000
13 GO:0043241 protein complex disassembly 195 30 0.0000000
14 GO:0032984 macromolecular complex disassembly 205 30 0.0000000
15 GO:0006412 translation 496 47 0.0000000
16 GO:0006402 mRNA catabolic process 196 29 0.0000000
17 GO:0000956 nuclear-transcribed mRNA catabolic process 184 28 0.0000000
18 GO:0043624 cellular protein complex disassembly 174 27 0.0000000
19 GO:0006401 RNA catabolic process 222 30 0.0000000
20 GO:0006612 protein targeting to membrane 171 26 0.0000000
27 GO:0022613 ribonucleoprotein complex biogenesis 280 31 0.0000000
34 GO:0019058 viral life cycle 303 31 0.0000001
42 GO:0042254 ribosome biogenesis 167 21 0.0000015
44 GO:0022411 cellular component disassembly 412 35 0.0000019
47 GO:0090150 establishment of protein localization to membrane 296 28 0.0000024
49 GO:0016071 mRNA metabolic process 615 43 0.0000051
51 GO:0016032 viral process 674 45 0.0000083
52 GO:0044764 multi-organism cellular process 682 45 0.0000114
53 GO:0072594 establishment of protein localization to organelle 456 35 0.0000114
56 GO:0072657 protein localization to membrane 371 30 0.0000303
57 GO:0044403 symbiosis, encompassing mutualism through parasitism 727 46 0.0000303
58 GO:0044419 interspecies interaction between organisms 727 46 0.0000303
60 GO:0006605 protein targeting 497 36 0.0000366
67 GO:1902578 single-organism localization 426 32 0.0000645
68 GO:1902580 single-organism cellular localization 426 32 0.0000645
69 GO:0006364 rRNA processing 117 15 0.0001024
70 GO:0016072 rRNA metabolic process 122 15 0.0001810
71 GO:0044802 single-organism membrane organization 638 39 0.0007707
72 GO:0001731 formation of translation preinitiation complex 18 6 0.0011387
73 GO:0033365 protein localization to organelle 603 37 0.0011628
74 GO:0016482 cytoplasmic transport 825 46 0.0012939
75 GO:0042274 ribosomal small subunit biogenesis 21 6 0.0016820
76 GO:0022618 ribonucleoprotein complex assembly 135 14 0.0043042
80 GO:0042273 ribosomal large subunit biogenesis 16 5 0.0054764
81 GO:0006417 regulation of translation 249 20 0.0057914
82 GO:0061024 membrane organization 778 42 0.0062815
83 GO:0071826 ribonucleoprotein complex subunit organization 141 14 0.0064693
85 GO:0034470 ncRNA processing 235 18 0.0114765
88 GO:0044265 cellular macromolecule catabolic process 798 42 0.0130529
89 GO:0006446 regulation of translational initiation 67 9 0.0134427
90 GO:0006886 intracellular protein transport 758 39 0.0324000
92 GO:0009057 macromolecule catabolic process 983 47 0.0490219

Under enriched (0)

category term numInCat numDEInCat q.value2
3646 GO:0000902 cell morphogenesis 993 12 0.2828393
3643 GO:0022610 biological adhesion 916 11 0.4628117
3642 GO:0007155 cell adhesion 912 11 0.4628117
3641 GO:0007186 G-protein coupled receptor signaling pathway 549 4 0.6489588
3645 GO:0098602 single organism cell adhesion 327 1 0.6489588
3640 GO:0001944 vasculature development 509 4 0.7840956
3635 GO:0001568 blood vessel development 480 4 1.0000000
3633 GO:0007610 behavior 486 4 1.0000000
3636 GO:0048514 blood vessel morphogenesis 421 3 1.0000000
3639 GO:0016337 single organismal cell-cell adhesion 283 1 1.0000000
3638 GO:0006887 exocytosis 286 1 1.0000000
3634 GO:0001525 angiogenesis 350 2 1.0000000
3628 GO:0032940 secretion by cell 682 8 1.0000000
3626 GO:0007267 cell-cell signaling 915 13 1.0000000
3630 GO:0043062 extracellular structure organization 325 2 1.0000000
3629 GO:0030198 extracellular matrix organization 324 2 1.0000000
3625 GO:0022603 regulation of anatomical structure morphogenesis 647 8 1.0000000
3618 GO:0002682 regulation of immune system process 997 15 1.0000000
5350 GO:0010720 positive regulation of cell development 178 0 1.0000000
3617 GO:0000904 cell morphogenesis involved in differentiation 701 10 1.0000000

Final Step: csv output

write.csv(go,file=paste("csv/", outFile,"main.csv",sep=''), row.names=FALSE)
write.csv(goPos,file=paste("csv/", outFile,"Pos.csv",sep=''), row.names=FALSE)
write.csv(goNeg,file=paste("csv/", outFile,"Neg.csv",sep=''), row.names=FALSE)