This report has goseq results for 120 minute glucose when:

  1. Top 1000 genes are marked as differentially expressed
  2. Top genes with positive effect in top 1000 overall genes are marked as differentially expressed
  3. Top genes with negative effect in top 1000 overall genes are marked as differentially expressed

This report was generated on June 21 2015

Goseq results also saved in csv files located on snowwhite in directory: /net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/12junReps/csv

Step 1: Load in all the necessary data/libraries

library(goseq)
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
## Loading required package: DBI
library(qvalue)

fName <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/traits/peer_k01_GL120_all_genes.txt"
outFile <- "GL120"

data <- read.table(fName, as.is=T, header=T)

gene_length_file <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/length.composite.gene.models.gencode.v19"
gene_lengths = read.table(gene_length_file, header=T, as.is=T);
gene_lengths$gene = sapply(gene_lengths$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });

data$gene <- sapply(data$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });

data <- merge(data, gene_lengths, by="gene", all.x=T)
data <- data[order(data$p.value),]
data$q.value <- qvalue(data$p.value)$qvalues
data$rank <- seq(1,length(data[,1]))

minRow <- 20

Step 2: Create genes vectors

The first vector simply marks the top 1000 genes as differentially expressed. The second and third vectors mark the genes with positive or negative effect in the top 1000 as differentially expressed.

genes <- as.numeric(data$rank <= 1000)
genesPos <- as.numeric(data$rank <= 1000 & data$effect > 0)
genesNeg <- as.numeric(data$rank <= 1000 & data$effect < 0)

names(genes) <- data$gene
names(genesPos) <- data$gene
names(genesNeg) <- data$gene

There are 556 DE genes with postive effect and 444 DE genes with negative effect.

Step 3: PWFs

pwf <- nullp(genes,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfPos=nullp(genesPos,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfNeg=nullp(genesNeg,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)

Step 4: run goseq

go <- goseq(pwf,"hg19","ensGene",test.cats=c("GO:BP"))
goPos <- goseq(pwfPos,"hg19","ensGene",test.cats=c("GO:BP"))
goNeg <- goseq(pwfNeg,"hg19","ensGene",test.cats=c("GO:BP"))

rownames(go) <- NULL
rownames(goPos) <- NULL
rownames(goNeg) <- NULL

# Fix problem with some p-values being slightly more than 1
go$over_represented_pvalue[go$over_represented_pvalue>1]=1;
go$under_represented_pvalue[go$under_represented_pvalue>1]=1;
goPos$over_represented_pvalue[goPos$over_represented_pvalue>1]=1;
goPos$under_represented_pvalue[goPos$under_represented_pvalue>1]=1;
goNeg$over_represented_pvalue[goNeg$over_represented_pvalue>1]=1;
goNeg$under_represented_pvalue[goNeg$under_represented_pvalue>1]=1;

go$q.value <- qvalue(go$over_represented_pvalue)$qvalues
goPos$q.value=qvalue(goPos$over_represented_pvalue)$qvalues
goNeg$q.value=qvalue(goNeg$over_represented_pvalue)$qvalues

go$q.value2 <- qvalue(go$under_represented_pvalue)$qvalues
goPos$q.value2=qvalue(goPos$under_represented_pvalue)$qvalues
goNeg$q.value2=qvalue(goNeg$under_represented_pvalue)$qvalues
go <- go[which(go$numInCat < 1000),]
goPos <- goPos[which(goPos$numInCat < 1000),]
goNeg <- goNeg[which(goNeg$numInCat < 1000),]

Top 1000 Results

Over enriched categories (0)

rowN <- max(minRow, sum(go$q.value<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
category term numInCat numDEInCat q.value
3 GO:0006412 translation 496 48 0.2760568
5 GO:0006614 SRP-dependent cotranslational protein targeting to membrane 107 16 0.2760568
6 GO:0006613 cotranslational protein targeting to membrane 109 16 0.2760568
7 GO:0045047 protein targeting to ER 110 16 0.2760568
8 GO:0072599 establishment of protein localization to endoplasmic reticulum 111 16 0.2760568
9 GO:0006414 translational elongation 121 17 0.2760568
10 GO:0006413 translational initiation 167 21 0.2760568
11 GO:1902578 single-organism localization 426 42 0.2760568
12 GO:1902580 single-organism cellular localization 426 42 0.2760568
15 GO:0044802 single-organism membrane organization 638 57 0.3346083
17 GO:0070972 protein localization to endoplasmic reticulum 128 17 0.3442966
23 GO:0070374 positive regulation of ERK1 and ERK2 cascade 95 14 0.4124581
25 GO:0030216 keratinocyte differentiation 63 11 0.4288717
26 GO:0072657 protein localization to membrane 371 36 0.4452403
27 GO:0048199 vesicle targeting, to, from or within Golgi 28 7 0.4525929
30 GO:0042116 macrophage activation 39 8 0.5311201
32 GO:0042228 interleukin-8 biosynthetic process 10 4 0.5311201
33 GO:0006415 translational termination 95 13 0.5311201
34 GO:0006901 vesicle coating 38 8 0.5528865
35 GO:0061024 membrane organization 778 64 0.6021103

Under enriched (0)

go <- go[order(go$under_represented_pvalue),]
rowN <- max(minRow, sum(go$q.value2<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
category term numInCat numDEInCat q.value2
5576 GO:1901990 regulation of mitotic cell cycle phase transition 247 6 1
5578 GO:0007224 smoothened signaling pathway 105 1 1
5577 GO:0035023 regulation of Rho protein signal transduction 185 4 1
5574 GO:0060271 cilium morphogenesis 154 3 1
5575 GO:0042384 cilium assembly 124 2 1
5571 GO:0010720 positive regulation of cell development 178 4 1
5569 GO:1901987 regulation of cell cycle phase transition 253 7 1
5557 GO:0006396 RNA processing 676 27 1
5567 GO:0032319 regulation of Rho GTPase activity 164 4 1
8993 GO:0002456 T cell mediated immunity 59 0 1
9176 GO:0008589 regulation of smoothened signaling pathway 55 0 1
5563 GO:1901991 negative regulation of mitotic cell cycle phase transition 193 5 1
10253 GO:0090263 positive regulation of canonical Wnt signaling pathway 55 0 1
5566 GO:0044782 cilium organization 137 3 1
10360 GO:2000648 positive regulation of stem cell proliferation 54 0 1
5560 GO:1901605 alpha-amino acid metabolic process 187 5 1
5564 GO:0021915 neural tube development 134 3 1
5541 GO:0006397 mRNA processing 404 15 1
5570 GO:0051099 positive regulation of binding 83 1 1
9268 GO:0014013 regulation of gliogenesis 52 0 1

Positive Effect

Over enriched categories (0)

rowN <- max(minRow, sum(goPos$q.value<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
category term numInCat numDEInCat q.value
11 GO:0048199 vesicle targeting, to, from or within Golgi 28 7 0.0542321
14 GO:0006901 vesicle coating 38 8 0.0555984
15 GO:0006903 vesicle targeting 39 8 0.0555984
17 GO:0045087 innate immune response 724 43 0.0757343
18 GO:0042176 regulation of protein catabolic process 207 19 0.0757343
25 GO:1902591 single-organism membrane budding 34 7 0.0867957
29 GO:0032434 regulation of proteasomal ubiquitin-dependent protein catabolic process 75 10 0.1036360
30 GO:0048102 autophagic cell death 6 3 0.1036360
33 GO:1903050 regulation of proteolysis involved in cellular protein catabolic process 105 12 0.1051683
35 GO:0045416 positive regulation of interleukin-8 biosynthetic process 6 3 0.1216077
36 GO:0061136 regulation of proteasomal protein catabolic process 94 11 0.1413337
38 GO:1903362 regulation of cellular protein catabolic process 110 12 0.1413337
41 GO:0009896 positive regulation of catabolic process 194 17 0.1659491
42 GO:0006900 membrane budding 53 8 0.1659689
43 GO:0045321 leukocyte activation 561 33 0.1864359
44 GO:0042108 positive regulation of cytokine biosynthetic process 48 7 0.1873191
45 GO:0031331 positive regulation of cellular catabolic process 146 14 0.1981974
46 GO:0035964 COPI-coated vesicle budding 13 4 0.1981974
47 GO:0048200 Golgi transport vesicle coating 13 4 0.1981974
48 GO:0048205 COPI coating of Golgi vesicle 13 4 0.1981974

Under enriched (0)

goPos <- goPos[order(goPos$under_represented_pvalue),]
rowN <- max(minRow, sum(goPos$q.value2<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
category term numInCat numDEInCat q.value2
4456 GO:0009126 purine nucleoside monophosphate metabolic process 467 4 0.6051119
4455 GO:0009167 purine ribonucleoside monophosphate metabolic process 466 4 0.6051119
4454 GO:0046034 ATP metabolic process 442 4 0.7700409
4450 GO:0009123 nucleoside monophosphate metabolic process 489 5 0.8171874
4452 GO:0006520 cellular amino acid metabolic process 416 3 0.8171874
4449 GO:0009161 ribonucleoside monophosphate metabolic process 478 5 0.8171874
4440 GO:0006082 organic acid metabolic process 944 16 1.0000000
11767 GO:1901605 alpha-amino acid metabolic process 187 0 1.0000000
4435 GO:0043436 oxoacid metabolic process 934 16 1.0000000
5406 GO:0006575 cellular modified amino acid metabolic process 171 0 1.0000000
4433 GO:0009125 nucleoside monophosphate catabolic process 347 4 1.0000000
4432 GO:0009128 purine nucleoside monophosphate catabolic process 345 4 1.0000000
4430 GO:0009158 ribonucleoside monophosphate catabolic process 344 4 1.0000000
4431 GO:0009169 purine ribonucleoside monophosphate catabolic process 344 4 1.0000000
4428 GO:0006200 ATP catabolic process 340 4 1.0000000
4439 GO:0008202 steroid metabolic process 212 1 1.0000000
4410 GO:0019752 carboxylic acid metabolic process 829 15 1.0000000
4404 GO:0006140 regulation of nucleotide metabolic process 613 12 1.0000000
4402 GO:1900542 regulation of purine nucleotide metabolic process 610 12 1.0000000
4409 GO:0045017 glycerolipid biosynthetic process 215 2 1.0000000

Negative Effect

Over enriched categories (18)

category term numInCat numDEInCat q.value
1 GO:0006414 translational elongation 121 17 0.0000450
2 GO:0006415 translational termination 95 13 0.0020900
3 GO:0006614 SRP-dependent cotranslational protein targeting to membrane 107 13 0.0040171
4 GO:0006613 cotranslational protein targeting to membrane 109 13 0.0040171
5 GO:0045047 protein targeting to ER 110 13 0.0040171
6 GO:0072599 establishment of protein localization to endoplasmic reticulum 111 13 0.0040171
7 GO:0006413 translational initiation 167 16 0.0040171
8 GO:0006612 protein targeting to membrane 171 16 0.0049309
9 GO:0000184 nuclear-transcribed mRNA catabolic process, nonsense-mediated decay 118 13 0.0060540
10 GO:0022904 respiratory electron transport chain 105 12 0.0072708
11 GO:0022900 electron transport chain 106 12 0.0073275
12 GO:0006412 translation 496 29 0.0080880
14 GO:0070972 protein localization to endoplasmic reticulum 128 13 0.0094213
15 GO:0045333 cellular respiration 158 14 0.0178212
16 GO:0019083 viral transcription 158 14 0.0180904
17 GO:0055114 oxidation-reduction process 896 41 0.0314471
18 GO:0019080 viral gene expression 168 14 0.0320059
19 GO:0006091 generation of precursor metabolites and energy 394 23 0.0449056
20 GO:0044033 multi-organism metabolic process 178 14 0.0528157
21 GO:0090150 establishment of protein localization to membrane 296 19 0.0528157

Under enriched (0)

category term numInCat numDEInCat q.value2
3692 GO:0006397 mRNA processing 404 1 0.3084258
4677 GO:0006260 DNA replication 281 0 0.5219822
3691 GO:0007067 mitotic nuclear division 346 1 0.9114961
3685 GO:0006974 cellular response to DNA damage stimulus 668 6 1.0000000
3686 GO:0000280 nuclear division 460 3 1.0000000
3765 GO:0000375 RNA splicing, via transesterification reactions 225 0 1.0000000
3766 GO:0000377 RNA splicing, via transesterification reactions with bulged adenosine as nucleophile 220 0 1.0000000
3774 GO:0000398 mRNA splicing, via spliceosome 220 0 1.0000000
3680 GO:0045893 positive regulation of transcription, DNA-templated 988 12 1.0000000
3679 GO:1903047 mitotic cell cycle process 746 8 1.0000000
3672 GO:0000278 mitotic cell cycle 856 10 1.0000000
3683 GO:0002252 immune effector process 504 4 1.0000000
3670 GO:0006396 RNA processing 676 7 1.0000000
3678 GO:0048285 organelle fission 488 4 1.0000000
3669 GO:0051301 cell division 666 7 1.0000000
3667 GO:0006259 DNA metabolic process 838 10 1.0000000
5882 GO:0016197 endosomal transport 188 0 1.0000000
3671 GO:0008380 RNA splicing 332 2 1.0000000
3666 GO:0006281 DNA repair 393 3 1.0000000
10535 GO:0071103 DNA conformation change 173 0 1.0000000

Final Step: csv output

write.csv(go,file=paste("csv/", outFile,"_main.csv",sep=''), row.names=FALSE)
write.csv(goPos,file=paste("csv/", outFile,"Pos.csv",sep=''), row.names=FALSE)
write.csv(goNeg,file=paste("csv/", outFile,"Neg.csv",sep=''), row.names=FALSE)