This report has goseq results for fasting glucose when:
- Genes with FDR < 0.1 marked as DE
- Genes from above list with positive effect marked as differentially expressed
- Genes from above list with negative effect marked as differentially expressed
This report was generated on June 18 2015
Goseq results also saved in csv files located on snowwhite in directory:
/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/12junReps/csv
Step 1: Load in all the necessary data/libraries
library(goseq)
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
## Loading required package: DBI
library(qvalue)
fName <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/traits/peer_k03_GL0_all_genes.txt"
outFile <- "GL0"
data <- read.table(fName, as.is=T, header=T)
gene_length_file <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/length.composite.gene.models.gencode.v19"
gene_lengths = read.table(gene_length_file, header=T, as.is=T);
gene_lengths$gene = sapply(gene_lengths$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });
data$gene <- sapply(data$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });
data <- merge(data, gene_lengths, by="gene", all.x=T)
data <- data[order(data$p.value),]
data$q.value <- qvalue(data$p.value)$qvalues
data$rank <- seq(1,length(data[,1]))
minRow <- 20
Step 2: Create genes vectors
The first vector simply marks the top 1000 genes as differentially expressed. The second and third vectors mark the genes
with positive or negative effect in the top 1000 as differentially expressed.
genes <- as.numeric(data$q.value <= 0.1)
genesPos <- as.numeric(data$q.value <= 0.1 & data$effect > 0)
genesNeg <- as.numeric(data$q.value <= 0.1 & data$effect < 0)
names(genes) <- data$gene
names(genesPos) <- data$gene
names(genesNeg) <- data$gene
There are 164 DE genes with postive effect and 91 DE genes with negative effect.
Step 3: PWFs
pwf <- nullp(genes,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfPos=nullp(genesPos,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfNeg=nullp(genesNeg,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
Step 4: run goseq
go <- goseq(pwf,"hg19","ensGene",test.cats=c("GO:BP"))
goPos <- goseq(pwfPos,"hg19","ensGene",test.cats=c("GO:BP"))
goNeg <- goseq(pwfNeg,"hg19","ensGene",test.cats=c("GO:BP"))
rownames(go) <- NULL
rownames(goPos) <- NULL
rownames(goNeg) <- NULL
# Fix problem with some p-values being slightly more than 1
go$over_represented_pvalue[go$over_represented_pvalue>1]=1;
go$under_represented_pvalue[go$under_represented_pvalue>1]=1;
goPos$over_represented_pvalue[goPos$over_represented_pvalue>1]=1;
goPos$under_represented_pvalue[goPos$under_represented_pvalue>1]=1;
goNeg$over_represented_pvalue[goNeg$over_represented_pvalue>1]=1;
goNeg$under_represented_pvalue[goNeg$under_represented_pvalue>1]=1;
go$q.value <- qvalue(go$over_represented_pvalue)$qvalues
goPos$q.value=qvalue(goPos$over_represented_pvalue)$qvalues
goNeg$q.value=qvalue(goNeg$over_represented_pvalue)$qvalues
go$q.value2 <- qvalue(go$under_represented_pvalue)$qvalues
goPos$q.value2=qvalue(goPos$under_represented_pvalue)$qvalues
goNeg$q.value2=qvalue(goNeg$under_represented_pvalue)$qvalues
Top 1000 Results
Over enriched categories (1)
rowN <- max(minRow, sum(go$q.value<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
| category |
term |
numInCat |
numDEInCat |
q.value |
| GO:0055091 |
phospholipid homeostasis |
7 |
4 |
0.0193404 |
| GO:0055081 |
anion homeostasis |
27 |
5 |
0.2910831 |
| GO:0019395 |
fatty acid oxidation |
77 |
7 |
0.6614516 |
| GO:0034440 |
lipid oxidation |
79 |
7 |
0.6614516 |
| GO:0070972 |
protein localization to endoplasmic reticulum |
128 |
8 |
0.7992201 |
| GO:0046320 |
regulation of fatty acid oxidation |
24 |
4 |
0.7992201 |
| GO:0006614 |
SRP-dependent cotranslational protein targeting to membrane |
107 |
7 |
0.7992201 |
| GO:0030258 |
lipid modification |
149 |
9 |
0.7992201 |
| GO:0006613 |
cotranslational protein targeting to membrane |
109 |
7 |
0.7992201 |
| GO:0045047 |
protein targeting to ER |
110 |
7 |
0.7992201 |
| GO:0072599 |
establishment of protein localization to endoplasmic reticulum |
111 |
7 |
0.7992201 |
| GO:0055088 |
lipid homeostasis |
78 |
6 |
1.0000000 |
| GO:0019083 |
viral transcription |
158 |
8 |
1.0000000 |
| GO:0006415 |
translational termination |
95 |
6 |
1.0000000 |
| GO:0043933 |
macromolecular complex subunit organization |
1450 |
36 |
1.0000000 |
| GO:0006635 |
fatty acid beta-oxidation |
57 |
5 |
1.0000000 |
| GO:0043241 |
protein complex disassembly |
195 |
9 |
1.0000000 |
| GO:0001302 |
replicative cell aging |
5 |
2 |
1.0000000 |
| GO:0032025 |
response to cobalt ion |
5 |
2 |
1.0000000 |
| GO:0019080 |
viral gene expression |
168 |
8 |
1.0000000 |
Under enriched (0)
go <- go[order(go$under_represented_pvalue),]
rowN <- max(minRow, sum(go$q.value2<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
| |
category |
term |
numInCat |
numDEInCat |
q.value2 |
| 2620 |
GO:0002520 |
immune system development |
648 |
1 |
1 |
| 2619 |
GO:0048534 |
hematopoietic or lymphoid organ development |
615 |
1 |
1 |
| 2665 |
GO:0000280 |
nuclear division |
460 |
0 |
1 |
| 2618 |
GO:0030097 |
hemopoiesis |
587 |
1 |
1 |
| 2616 |
GO:0009611 |
response to wounding |
833 |
4 |
1 |
| 4005 |
GO:0007067 |
mitotic nuclear division |
346 |
0 |
1 |
| 3147 |
GO:0002521 |
leukocyte differentiation |
353 |
0 |
1 |
| 2612 |
GO:0072521 |
purine-containing compound metabolic process |
1346 |
10 |
1 |
| 2617 |
GO:0048598 |
embryonic morphogenesis |
468 |
1 |
1 |
| 2615 |
GO:0009887 |
organ morphogenesis |
691 |
3 |
1 |
| 2611 |
GO:0055086 |
nucleobase-containing small molecule metabolic process |
1507 |
12 |
1 |
| 2610 |
GO:0006163 |
purine nucleotide metabolic process |
1313 |
10 |
1 |
| 2609 |
GO:0019693 |
ribose phosphate metabolic process |
1311 |
10 |
1 |
| 2607 |
GO:0009259 |
ribonucleotide metabolic process |
1307 |
10 |
1 |
| 2608 |
GO:0009119 |
ribonucleoside metabolic process |
1218 |
9 |
1 |
| 2613 |
GO:0051301 |
cell division |
666 |
3 |
1 |
| 2604 |
GO:0009150 |
purine ribonucleotide metabolic process |
1292 |
10 |
1 |
| 2599 |
GO:0006753 |
nucleoside phosphate metabolic process |
1461 |
12 |
1 |
| 2602 |
GO:0042278 |
purine nucleoside metabolic process |
1198 |
9 |
1 |
| 2601 |
GO:0046128 |
purine ribonucleoside metabolic process |
1195 |
9 |
1 |
Positive Effect
Over enriched categories (0)
rowN <- max(minRow, sum(goPos$q.value<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
| category |
term |
numInCat |
numDEInCat |
q.value |
| GO:0006987 |
activation of signaling protein activity involved in unfolded protein response |
63 |
5 |
1 |
| GO:0032075 |
positive regulation of nuclease activity |
65 |
5 |
1 |
| GO:0032069 |
regulation of nuclease activity |
71 |
5 |
1 |
| GO:1903050 |
regulation of proteolysis involved in cellular protein catabolic process |
105 |
6 |
1 |
| GO:0032025 |
response to cobalt ion |
5 |
2 |
1 |
| GO:0006259 |
DNA metabolic process |
838 |
19 |
1 |
| GO:1903362 |
regulation of cellular protein catabolic process |
110 |
6 |
1 |
| GO:0045732 |
positive regulation of protein catabolic process |
110 |
6 |
1 |
| GO:0009896 |
positive regulation of catabolic process |
194 |
8 |
1 |
| GO:0032434 |
regulation of proteasomal ubiquitin-dependent protein catabolic process |
75 |
5 |
1 |
| GO:0019395 |
fatty acid oxidation |
77 |
5 |
1 |
| GO:0032436 |
positive regulation of proteasomal ubiquitin-dependent protein catabolic process |
45 |
4 |
1 |
| GO:0034440 |
lipid oxidation |
79 |
5 |
1 |
| GO:0042176 |
regulation of protein catabolic process |
207 |
8 |
1 |
| GO:0046320 |
regulation of fatty acid oxidation |
24 |
3 |
1 |
| GO:1901800 |
positive regulation of proteasomal protein catabolic process |
51 |
4 |
1 |
| GO:0030968 |
endoplasmic reticulum unfolded protein response |
86 |
5 |
1 |
| GO:0034620 |
cellular response to unfolded protein |
87 |
5 |
1 |
| GO:0045862 |
positive regulation of proteolysis |
87 |
5 |
1 |
| GO:1903052 |
positive regulation of proteolysis involved in cellular protein catabolic process |
52 |
4 |
1 |
Under enriched (0)
goPos <- goPos[order(goPos$under_represented_pvalue),]
rowN <- max(minRow, sum(goPos$q.value2<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
| |
category |
term |
numInCat |
numDEInCat |
q.value2 |
| 2029 |
GO:0009611 |
response to wounding |
833 |
1 |
1 |
| 2027 |
GO:1901564 |
organonitrogen compound metabolic process |
2217 |
11 |
1 |
| 2026 |
GO:0055086 |
nucleobase-containing small molecule metabolic process |
1507 |
6 |
1 |
| 2025 |
GO:0072521 |
purine-containing compound metabolic process |
1346 |
5 |
1 |
| 2018 |
GO:0006753 |
nucleoside phosphate metabolic process |
1461 |
6 |
1 |
| 2016 |
GO:0009117 |
nucleotide metabolic process |
1456 |
6 |
1 |
| 2021 |
GO:0006163 |
purine nucleotide metabolic process |
1313 |
5 |
1 |
| 2020 |
GO:0019693 |
ribose phosphate metabolic process |
1311 |
5 |
1 |
| 2019 |
GO:1901565 |
organonitrogen compound catabolic process |
1294 |
5 |
1 |
| 2017 |
GO:0009259 |
ribonucleotide metabolic process |
1307 |
5 |
1 |
| 5976 |
GO:0033124 |
regulation of GTP catabolic process |
476 |
0 |
1 |
| 7235 |
GO:0043087 |
regulation of GTPase activity |
474 |
0 |
1 |
| 2014 |
GO:0009150 |
purine ribonucleotide metabolic process |
1292 |
5 |
1 |
| 2028 |
GO:0009887 |
organ morphogenesis |
691 |
1 |
1 |
| 2010 |
GO:1901657 |
glycosyl compound metabolic process |
1252 |
5 |
1 |
| 8412 |
GO:0048598 |
embryonic morphogenesis |
468 |
0 |
1 |
| 7370 |
GO:0043547 |
positive regulation of GTPase activity |
440 |
0 |
1 |
| 2009 |
GO:0009116 |
nucleoside metabolic process |
1238 |
5 |
1 |
| 2074 |
GO:0000280 |
nuclear division |
460 |
0 |
1 |
| 2024 |
GO:0002520 |
immune system development |
648 |
1 |
1 |
Negative Effect
Over enriched categories (0)
| category |
term |
numInCat |
numDEInCat |
q.value |
| GO:0055091 |
phospholipid homeostasis |
7 |
3 |
0.0581926 |
| GO:0055081 |
anion homeostasis |
27 |
4 |
0.0581926 |
| GO:0006415 |
translational termination |
95 |
5 |
0.0958987 |
| GO:0019083 |
viral transcription |
158 |
6 |
0.0958987 |
| GO:0019080 |
viral gene expression |
168 |
6 |
0.0958987 |
| GO:0006614 |
SRP-dependent cotranslational protein targeting to membrane |
107 |
5 |
0.0958987 |
| GO:0006413 |
translational initiation |
167 |
6 |
0.0958987 |
| GO:0006613 |
cotranslational protein targeting to membrane |
109 |
5 |
0.0958987 |
| GO:0045047 |
protein targeting to ER |
110 |
5 |
0.0958987 |
| GO:0072599 |
establishment of protein localization to endoplasmic reticulum |
111 |
5 |
0.0958987 |
| GO:0044033 |
multi-organism metabolic process |
178 |
6 |
0.0958987 |
| GO:0006414 |
translational elongation |
121 |
5 |
0.1239232 |
| GO:0000184 |
nuclear-transcribed mRNA catabolic process, nonsense-mediated decay |
118 |
5 |
0.1239232 |
| GO:0070972 |
protein localization to endoplasmic reticulum |
128 |
5 |
0.1605774 |
| GO:0055088 |
lipid homeostasis |
78 |
4 |
0.4647477 |
| GO:0034375 |
high-density lipoprotein particle remodeling |
10 |
2 |
0.7015551 |
| GO:0006612 |
protein targeting to membrane |
171 |
5 |
0.7164354 |
| GO:0043624 |
cellular protein complex disassembly |
174 |
5 |
0.8556412 |
| GO:0043691 |
reverse cholesterol transport |
11 |
2 |
0.8556412 |
| GO:0000956 |
nuclear-transcribed mRNA catabolic process |
184 |
5 |
0.8565072 |
Under enriched (0)
| |
category |
term |
numInCat |
numDEInCat |
q.value2 |
| 1591 |
GO:0050896 |
response to stimulus |
6126 |
19 |
1 |
| 1587 |
GO:0043412 |
macromolecule modification |
2854 |
7 |
1 |
| 1585 |
GO:0007154 |
cell communication |
4559 |
14 |
1 |
| 1584 |
GO:0007165 |
signal transduction |
4077 |
12 |
1 |
| 11917 |
GO:1903047 |
mitotic cell cycle process |
746 |
0 |
1 |
| 1579 |
GO:0023052 |
signaling |
4496 |
14 |
1 |
| 1580 |
GO:0044700 |
single organism signaling |
4496 |
14 |
1 |
| 1583 |
GO:0006950 |
response to stress |
2909 |
7 |
1 |
| 1581 |
GO:0006464 |
cellular protein modification process |
2739 |
7 |
1 |
| 1582 |
GO:0036211 |
protein modification process |
2739 |
7 |
1 |
| 8688 |
GO:0051301 |
cell division |
666 |
0 |
1 |
| 1575 |
GO:0048518 |
positive regulation of biological process |
3684 |
11 |
1 |
| 3140 |
GO:0006974 |
cellular response to DNA damage stimulus |
668 |
0 |
1 |
| 1590 |
GO:0045893 |
positive regulation of transcription, DNA-templated |
988 |
1 |
1 |
| 1614 |
GO:0002520 |
immune system development |
648 |
0 |
1 |
| 1589 |
GO:0010647 |
positive regulation of cell communication |
1000 |
1 |
1 |
| 1569 |
GO:0044260 |
cellular macromolecule metabolic process |
6484 |
23 |
1 |
| 1588 |
GO:0023056 |
positive regulation of signaling |
995 |
1 |
1 |
| 1563 |
GO:0051716 |
cellular response to stimulus |
4976 |
17 |
1 |
| 8260 |
GO:0048534 |
hematopoietic or lymphoid organ development |
615 |
0 |
1 |
Final Step: csv output
write.csv(go,file=paste("csv/", outFile,"_main.csv",sep=''), row.names=FALSE)
write.csv(goPos,file=paste("csv/", outFile,"Pos.csv",sep=''), row.names=FALSE)
write.csv(goNeg,file=paste("csv/", outFile,"Neg.csv",sep=''), row.names=FALSE)