This report has goseq results for fasting insulin when:

  1. Top 1000 genes are marked as differentially expressed
  2. Top genes with positive effect in top 1000 overall genes are marked as differentially expressed
  3. Top genes with negative effect in top 1000 overall genes are marked as differentially expressed

This report was generated on June 21 2015

Goseq results also saved in csv files located on snowwhite in directory: /net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/12junReps/csv

Step 1: Load in all the necessary data/libraries

library(goseq)
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
## Loading required package: DBI
library(qvalue)

fName <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/traits/peer_k03_S_Insu_all_genes.txt"
outFile <- "S_Insu"

data <- read.table(fName, as.is=T, header=T)

gene_length_file <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/length.composite.gene.models.gencode.v19"
gene_lengths = read.table(gene_length_file, header=T, as.is=T);
gene_lengths$gene = sapply(gene_lengths$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });

data$gene <- sapply(data$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });

data <- merge(data, gene_lengths, by="gene", all.x=T)
data <- data[order(data$p.value),]
data$q.value <- qvalue(data$p.value)$qvalues
data$rank <- seq(1,length(data[,1]))

minRow <- 20

Step 2: Create genes vectors

The first vector simply marks the top 1000 genes as differentially expressed. The second and third vectors mark the genes with positive or negative effect in the top 1000 as differentially expressed.

genes <- as.numeric(data$rank <= 1000)
genesPos <- as.numeric(data$rank <= 1000 & data$effect > 0)
genesNeg <- as.numeric(data$rank <= 1000 & data$effect < 0)

names(genes) <- data$gene
names(genesPos) <- data$gene
names(genesNeg) <- data$gene

There are 548 DE genes with postive effect and 452 DE genes with negative effect.

Step 3: PWFs

pwf <- nullp(genes,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfPos=nullp(genesPos,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfNeg=nullp(genesNeg,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)

Step 4: run goseq

go <- goseq(pwf,"hg19","ensGene",test.cats=c("GO:BP"))
goPos <- goseq(pwfPos,"hg19","ensGene",test.cats=c("GO:BP"))
goNeg <- goseq(pwfNeg,"hg19","ensGene",test.cats=c("GO:BP"))

rownames(go) <- NULL
rownames(goPos) <- NULL
rownames(goNeg) <- NULL

# Fix problem with some p-values being slightly more than 1
go$over_represented_pvalue[go$over_represented_pvalue>1]=1;
go$under_represented_pvalue[go$under_represented_pvalue>1]=1;
goPos$over_represented_pvalue[goPos$over_represented_pvalue>1]=1;
goPos$under_represented_pvalue[goPos$under_represented_pvalue>1]=1;
goNeg$over_represented_pvalue[goNeg$over_represented_pvalue>1]=1;
goNeg$under_represented_pvalue[goNeg$under_represented_pvalue>1]=1;

go$q.value <- qvalue(go$over_represented_pvalue)$qvalues
goPos$q.value=qvalue(goPos$over_represented_pvalue)$qvalues
goNeg$q.value=qvalue(goNeg$over_represented_pvalue)$qvalues

go$q.value2 <- qvalue(go$under_represented_pvalue)$qvalues
goPos$q.value2=qvalue(goPos$under_represented_pvalue)$qvalues
goNeg$q.value2=qvalue(goNeg$under_represented_pvalue)$qvalues

go <- go[which(go$numInCat < 1000),]
goPos <- goPos[which(goPos$numInCat < 1000),]
goNeg <- goNeg[which(goNeg$numInCat < 1000),]

Top 1000 Results

Over enriched categories (54)

rowN <- max(minRow, sum(go$q.value<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
category term numInCat numDEInCat q.value
1 GO:0006415 translational termination 95 43 0.0000000
2 GO:0006613 cotranslational protein targeting to membrane 109 45 0.0000000
3 GO:0045047 protein targeting to ER 110 45 0.0000000
4 GO:0072599 establishment of protein localization to endoplasmic reticulum 111 45 0.0000000
5 GO:0006614 SRP-dependent cotranslational protein targeting to membrane 107 44 0.0000000
6 GO:0070972 protein localization to endoplasmic reticulum 128 48 0.0000000
7 GO:0006414 translational elongation 121 46 0.0000000
8 GO:0006413 translational initiation 167 53 0.0000000
9 GO:0000184 nuclear-transcribed mRNA catabolic process, nonsense-mediated decay 118 45 0.0000000
10 GO:0043624 cellular protein complex disassembly 174 51 0.0000000
11 GO:0006612 protein targeting to membrane 171 50 0.0000000
12 GO:0019083 viral transcription 158 47 0.0000000
13 GO:0043241 protein complex disassembly 195 52 0.0000000
14 GO:0019080 viral gene expression 168 47 0.0000000
15 GO:0032984 macromolecular complex disassembly 205 52 0.0000000
16 GO:0044033 multi-organism metabolic process 178 47 0.0000000
17 GO:0006412 translation 496 80 0.0000000
18 GO:0000956 nuclear-transcribed mRNA catabolic process 184 45 0.0000000
19 GO:0006402 mRNA catabolic process 196 46 0.0000000
20 GO:0090150 establishment of protein localization to membrane 296 56 0.0000000
21 GO:0006401 RNA catabolic process 222 47 0.0000000
22 GO:0072657 protein localization to membrane 371 62 0.0000000
23 GO:0019058 viral life cycle 303 53 0.0000000
24 GO:1902578 single-organism localization 426 64 0.0000000
25 GO:1902580 single-organism cellular localization 426 64 0.0000000
26 GO:0072594 establishment of protein localization to organelle 456 66 0.0000000
27 GO:0006605 protein targeting 497 70 0.0000000
28 GO:0022411 cellular component disassembly 412 62 0.0000000
30 GO:0044802 single-organism membrane organization 638 80 0.0000000
33 GO:0033365 protein localization to organelle 603 75 0.0000001
34 GO:0061024 membrane organization 778 89 0.0000002
35 GO:0016032 viral process 674 78 0.0000008
36 GO:0044764 multi-organism cellular process 682 78 0.0000013
37 GO:0006886 intracellular protein transport 758 84 0.0000026
38 GO:0044403 symbiosis, encompassing mutualism through parasitism 727 80 0.0000036
39 GO:0044419 interspecies interaction between organisms 727 80 0.0000036
40 GO:0016482 cytoplasmic transport 825 87 0.0000117
43 GO:0042274 ribosomal small subunit biogenesis 21 9 0.0001931
45 GO:0044265 cellular macromolecule catabolic process 798 80 0.0004756
46 GO:0006091 generation of precursor metabolites and energy 394 46 0.0007169
48 GO:0016071 mRNA metabolic process 615 63 0.0024054
49 GO:0015980 energy derivation by oxidation of organic compounds 319 38 0.0030665
50 GO:0009057 macromolecule catabolic process 983 91 0.0034596
51 GO:0045821 positive regulation of glycolytic process 11 6 0.0054052
52 GO:0042254 ribosome biogenesis 167 23 0.0180964
53 GO:0006090 pyruvate metabolic process 81 15 0.0188632
54 GO:0051188 cofactor biosynthetic process 132 20 0.0195348
55 GO:0051262 protein tetramerization 84 15 0.0195348
56 GO:0006417 regulation of translation 249 31 0.0195348
59 GO:0042273 ribosomal large subunit biogenesis 16 6 0.0287049
60 GO:0055114 oxidation-reduction process 896 78 0.0307820
61 GO:0036476 neuron death in response to hydrogen peroxide 3 3 0.0419078
62 GO:1903207 regulation of hydrogen peroxide-induced neuron death 3 3 0.0419078
63 GO:0070544 histone H3-K36 demethylation 6 4 0.0467587

Under enriched (0)

go <- go[order(go$under_represented_pvalue),]
rowN <- max(minRow, sum(go$q.value2<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
category term numInCat numDEInCat q.value2
5492 GO:0098609 cell-cell adhesion 148 1 1
5490 GO:0045596 negative regulation of cell differentiation 440 12 1
5491 GO:0048705 skeletal system morphogenesis 183 2 1
8606 GO:0006023 aminoglycan biosynthetic process 96 0 1
5488 GO:0001501 skeletal system development 391 11 1
8607 GO:0006024 glycosaminoglycan biosynthetic process 95 0 1
5487 GO:0051093 negative regulation of developmental process 546 18 1
5483 GO:0022603 regulation of anatomical structure morphogenesis 647 24 1
5480 GO:0022610 biological adhesion 916 38 1
5478 GO:0007155 cell adhesion 912 38 1
5489 GO:0007156 homophilic cell adhesion 111 1 1
5468 GO:0007186 G-protein coupled receptor signaling pathway 549 19 1
5469 GO:0006897 endocytosis 464 16 1
5463 GO:0048870 cell motility 948 40 1
5464 GO:0051674 localization of cell 948 40 1
5482 GO:0000375 RNA splicing, via transesterification reactions 225 5 1
5484 GO:0051607 defense response to virus 173 3 1
9858 GO:0060349 bone morphogenesis 77 0 1
5472 GO:0045088 regulation of innate immune response 243 6 1
5481 GO:0007018 microtubule-based movement 182 4 1

Positive Effect

Over enriched categories (5)

rowN <- max(minRow, sum(goPos$q.value<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
category term numInCat numDEInCat q.value
1 GO:0006091 generation of precursor metabolites and energy 394 32 0.0091082
2 GO:0045333 cellular respiration 158 18 0.0091082
3 GO:0022904 respiratory electron transport chain 105 14 0.0091082
4 GO:0022900 electron transport chain 106 14 0.0091082
5 GO:0015980 energy derivation by oxidation of organic compounds 319 26 0.0343078
6 GO:0055114 oxidation-reduction process 896 51 0.1609763
7 GO:0046031 ADP metabolic process 10 4 0.2694423
8 GO:0006120 mitochondrial electron transport, NADH to ubiquinone 42 7 0.2694423
9 GO:0070296 sarcoplasmic reticulum calcium ion transport 30 6 0.4129191
11 GO:0051384 response to glucocorticoid 101 11 0.4484915
12 GO:0051592 response to calcium ion 82 10 0.4484915
13 GO:0051188 cofactor biosynthetic process 132 13 0.4651445
14 GO:0006635 fatty acid beta-oxidation 57 8 0.5595700
16 GO:0022898 regulation of transmembrane transporter activity 121 12 0.5595700
17 GO:0033539 fatty acid beta-oxidation using acyl-CoA dehydrogenase 6 3 0.5595700
18 GO:0031960 response to corticosteroid 109 11 0.5595700
19 GO:0042773 ATP synthesis coupled electron transport 54 7 0.5635738
20 GO:0042775 mitochondrial ATP synthesis coupled electron transport 54 7 0.5635738
21 GO:0070098 chemokine-mediated signaling pathway 37 6 0.5635738
22 GO:0009135 purine nucleoside diphosphate metabolic process 15 4 0.5638632

Under enriched (0)

goPos <- goPos[order(goPos$under_represented_pvalue),]
rowN <- max(minRow, sum(goPos$q.value2<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
category term numInCat numDEInCat q.value2
4346 GO:0016071 mRNA metabolic process 615 7 1
4391 GO:0000375 RNA splicing, via transesterification reactions 225 0 1
4392 GO:0000377 RNA splicing, via transesterification reactions with bulged adenosine as nucleophile 220 0 1
4401 GO:0000398 mRNA splicing, via spliceosome 220 0 1
4345 GO:0001501 skeletal system development 391 3 1
4344 GO:0006396 RNA processing 676 9 1
4343 GO:0006397 mRNA processing 404 4 1
4342 GO:0008380 RNA splicing 332 3 1
4334 GO:0043547 positive regulation of GTPase activity 440 6 1
4327 GO:0009887 organ morphogenesis 691 12 1
4331 GO:0033124 regulation of GTP catabolic process 476 7 1
4330 GO:0043087 regulation of GTPase activity 474 7 1
4329 GO:0045596 negative regulation of cell differentiation 440 6 1
4333 GO:0051241 negative regulation of multicellular organismal process 306 3 1
12217 GO:2000027 regulation of organ morphogenesis 135 0 1
4341 GO:0006402 mRNA catabolic process 196 1 1
4322 GO:0051093 negative regulation of developmental process 546 9 1
4339 GO:0061448 connective tissue development 186 1 1
4320 GO:0031347 regulation of defense response 462 7 1
4315 GO:0022603 regulation of anatomical structure morphogenesis 647 12 1

Negative Effect

Over enriched categories (57)

category term numInCat numDEInCat q.value
1 GO:0006415 translational termination 95 42 0.0000000
2 GO:0006614 SRP-dependent cotranslational protein targeting to membrane 107 42 0.0000000
3 GO:0000184 nuclear-transcribed mRNA catabolic process, nonsense-mediated decay 118 44 0.0000000
4 GO:0006613 cotranslational protein targeting to membrane 109 42 0.0000000
5 GO:0006414 translational elongation 121 44 0.0000000
6 GO:0045047 protein targeting to ER 110 42 0.0000000
7 GO:0072599 establishment of protein localization to endoplasmic reticulum 111 42 0.0000000
8 GO:0006413 translational initiation 167 49 0.0000000
9 GO:0070972 protein localization to endoplasmic reticulum 128 42 0.0000000
10 GO:0019083 viral transcription 158 45 0.0000000
11 GO:0019080 viral gene expression 168 45 0.0000000
12 GO:0044033 multi-organism metabolic process 178 45 0.0000000
13 GO:0006612 protein targeting to membrane 171 44 0.0000000
14 GO:0043624 cellular protein complex disassembly 174 44 0.0000000
15 GO:0000956 nuclear-transcribed mRNA catabolic process 184 44 0.0000000
16 GO:0043241 protein complex disassembly 195 45 0.0000000
17 GO:0006402 mRNA catabolic process 196 45 0.0000000
18 GO:0032984 macromolecular complex disassembly 205 45 0.0000000
19 GO:0006401 RNA catabolic process 222 45 0.0000000
20 GO:0090150 establishment of protein localization to membrane 296 48 0.0000000
21 GO:0006412 translation 496 60 0.0000000
22 GO:0019058 viral life cycle 303 47 0.0000000
23 GO:0072657 protein localization to membrane 371 49 0.0000000
24 GO:0072594 establishment of protein localization to organelle 456 54 0.0000000
25 GO:0006605 protein targeting 497 56 0.0000000
26 GO:1902578 single-organism localization 426 51 0.0000000
27 GO:1902580 single-organism cellular localization 426 51 0.0000000
28 GO:0022411 cellular component disassembly 412 49 0.0000000
29 GO:0016071 mRNA metabolic process 615 56 0.0000000
30 GO:0033365 protein localization to organelle 603 56 0.0000000
31 GO:0044802 single-organism membrane organization 638 56 0.0000000
32 GO:0016032 viral process 674 57 0.0000000
33 GO:0044764 multi-organism cellular process 682 57 0.0000000
35 GO:0044403 symbiosis, encompassing mutualism through parasitism 727 58 0.0000000
36 GO:0044419 interspecies interaction between organisms 727 58 0.0000000
38 GO:0006886 intracellular protein transport 758 58 0.0000000
39 GO:0061024 membrane organization 778 58 0.0000000
40 GO:0016482 cytoplasmic transport 825 59 0.0000000
41 GO:0044265 cellular macromolecule catabolic process 798 57 0.0000000
42 GO:0009057 macromolecule catabolic process 983 62 0.0000000
43 GO:0042274 ribosomal small subunit biogenesis 21 9 0.0000001
44 GO:0022613 ribonucleoprotein complex biogenesis 280 28 0.0000001
47 GO:0042254 ribosome biogenesis 167 20 0.0000008
58 GO:0006364 rRNA processing 117 15 0.0000244
63 GO:0016072 rRNA metabolic process 122 15 0.0000425
72 GO:0042273 ribosomal large subunit biogenesis 16 6 0.0001203
76 GO:0001731 formation of translation preinitiation complex 18 6 0.0005528
80 GO:0000028 ribosomal small subunit assembly 7 4 0.0009739
83 GO:0034470 ncRNA processing 235 18 0.0024315
89 GO:0051259 protein oligomerization 341 23 0.0042529
90 GO:0006417 regulation of translation 249 19 0.0050387
91 GO:0022618 ribonucleoprotein complex assembly 135 13 0.0054557
92 GO:0042255 ribosome assembly 19 5 0.0063558
94 GO:0071826 ribonucleoprotein complex subunit organization 141 13 0.0082630
100 GO:0045821 positive regulation of glycolytic process 11 4 0.0236093
103 GO:0051262 protein tetramerization 84 9 0.0356088
104 GO:0034660 ncRNA metabolic process 325 19 0.0432283

Under enriched (0)

category term numInCat numDEInCat q.value2
11003 GO:0072507 divalent inorganic cation homeostasis 275 0 1
3797 GO:0030003 cellular cation homeostasis 358 1 1
11001 GO:0072503 cellular divalent inorganic cation homeostasis 259 0 1
9556 GO:0055074 calcium ion homeostasis 255 0 1
3796 GO:0006875 cellular metal ion homeostasis 333 1 1
4920 GO:0006874 cellular calcium ion homeostasis 244 0 1
3794 GO:0022610 biological adhesion 916 12 1
3793 GO:0007155 cell adhesion 912 12 1
3795 GO:0002253 activation of immune response 373 2 1
3787 GO:0007267 cell-cell signaling 915 12 1
3785 GO:0002682 regulation of immune system process 997 13 1
3789 GO:0007268 synaptic transmission 566 6 1
3792 GO:0006873 cellular ion homeostasis 365 2 1
3790 GO:0006897 endocytosis 464 4 1
3786 GO:0001944 vasculature development 509 5 1
3791 GO:0098602 single organism cell adhesion 327 2 1
3788 GO:0002757 immune response-activating signal transduction 333 2 1
3782 GO:0001568 blood vessel development 480 5 1
3781 GO:0007610 behavior 486 5 1
9331 GO:0051480 cytosolic calcium ion homeostasis 178 0 1

Final Step: csv output

write.csv(go,file=paste("csv/", outFile,"main.csv",sep=''), row.names=FALSE)
write.csv(goPos,file=paste("csv/", outFile,"Pos.csv",sep=''), row.names=FALSE)
write.csv(goNeg,file=paste("csv/", outFile,"Neg.csv",sep=''), row.names=FALSE)