This report has goseq results for fasting glucose when:
- Top 1000 genes are marked as differentially expressed
- Top genes with positive effect in top 1000 overall genes are marked as differentially expressed
- Top genes with negative effect in top 1000 overall genes are marked as differentially expressed
This report was generated on June 21 2015
Goseq results also saved in csv files located on snowwhite in directory:
/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/12junReps/csv
Step 1: Load in all the necessary data/libraries
library(goseq)
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
## Loading required package: DBI
library(qvalue)
fName <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/traits/peer_k03_GL0_all_genes.txt"
outFile <- "GL0"
data <- read.table(fName, as.is=T, header=T)
gene_length_file <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/length.composite.gene.models.gencode.v19"
gene_lengths = read.table(gene_length_file, header=T, as.is=T);
gene_lengths$gene = sapply(gene_lengths$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });
data$gene <- sapply(data$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });
data <- merge(data, gene_lengths, by="gene", all.x=T)
data <- data[order(data$p.value),]
data$q.value <- qvalue(data$p.value)$qvalues
data$rank <- seq(1,length(data[,1]))
minRow <- 20
Step 2: Create genes vectors
The first vector simply marks the top 1000 genes as differentially expressed. The second and third vectors mark the genes
with positive or negative effect in the top 1000 as differentially expressed.
genes <- as.numeric(data$rank <= 1000)
genesPos <- as.numeric(data$rank <= 1000 & data$effect > 0)
genesNeg <- as.numeric(data$rank <= 1000 & data$effect < 0)
names(genes) <- data$gene
names(genesPos) <- data$gene
names(genesNeg) <- data$gene
There are 635 DE genes with postive effect and 365 DE genes with negative effect.
Step 3: PWFs
pwf <- nullp(genes,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfPos=nullp(genesPos,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfNeg=nullp(genesNeg,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
Step 4: run goseq
go <- goseq(pwf,"hg19","ensGene",test.cats=c("GO:BP"))
goPos <- goseq(pwfPos,"hg19","ensGene",test.cats=c("GO:BP"))
goNeg <- goseq(pwfNeg,"hg19","ensGene",test.cats=c("GO:BP"))
rownames(go) <- NULL
rownames(goPos) <- NULL
rownames(goNeg) <- NULL
# Fix problem with some p-values being slightly more than 1
go$over_represented_pvalue[go$over_represented_pvalue>1]=1;
go$under_represented_pvalue[go$under_represented_pvalue>1]=1;
goPos$over_represented_pvalue[goPos$over_represented_pvalue>1]=1;
goPos$under_represented_pvalue[goPos$under_represented_pvalue>1]=1;
goNeg$over_represented_pvalue[goNeg$over_represented_pvalue>1]=1;
goNeg$under_represented_pvalue[goNeg$under_represented_pvalue>1]=1;
go$q.value <- qvalue(go$over_represented_pvalue)$qvalues
goPos$q.value=qvalue(goPos$over_represented_pvalue)$qvalues
goNeg$q.value=qvalue(goNeg$over_represented_pvalue)$qvalues
go$q.value2 <- qvalue(go$under_represented_pvalue)$qvalues
goPos$q.value2=qvalue(goPos$under_represented_pvalue)$qvalues
goNeg$q.value2=qvalue(goNeg$under_represented_pvalue)$qvalues
go <- go[which(go$numInCat < 1000),]
goPos <- goPos[which(goPos$numInCat < 1000),]
goNeg <- goNeg[which(goNeg$numInCat < 1000),]
Top 1000 Results
Over enriched categories (19)
rowN <- max(minRow, sum(go$q.value<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
| category |
term |
numInCat |
numDEInCat |
q.value |
| GO:0000184 |
nuclear-transcribed mRNA catabolic process, nonsense-mediated decay |
118 |
24 |
0.0000893 |
| GO:0006413 |
translational initiation |
167 |
27 |
0.0009412 |
| GO:0000956 |
nuclear-transcribed mRNA catabolic process |
184 |
28 |
0.0018819 |
| GO:0006402 |
mRNA catabolic process |
196 |
29 |
0.0018819 |
| GO:0006415 |
translational termination |
95 |
18 |
0.0023526 |
| GO:0006614 |
SRP-dependent cotranslational protein targeting to membrane |
107 |
19 |
0.0031269 |
| GO:0006613 |
cotranslational protein targeting to membrane |
109 |
19 |
0.0031269 |
| GO:0044265 |
cellular macromolecule catabolic process |
798 |
76 |
0.0031269 |
| GO:0045047 |
protein targeting to ER |
110 |
19 |
0.0031269 |
| GO:0072599 |
establishment of protein localization to endoplasmic reticulum |
111 |
19 |
0.0031269 |
| GO:0006401 |
RNA catabolic process |
222 |
30 |
0.0031269 |
| GO:0019395 |
fatty acid oxidation |
77 |
16 |
0.0049168 |
| GO:0034440 |
lipid oxidation |
79 |
16 |
0.0061256 |
| GO:0070972 |
protein localization to endoplasmic reticulum |
128 |
20 |
0.0067670 |
| GO:0030258 |
lipid modification |
149 |
23 |
0.0095562 |
| GO:0006414 |
translational elongation |
121 |
19 |
0.0101051 |
| GO:0009057 |
macromolecule catabolic process |
983 |
85 |
0.0214539 |
| GO:0019083 |
viral transcription |
158 |
21 |
0.0428398 |
| GO:0016071 |
mRNA metabolic process |
615 |
57 |
0.0440893 |
| GO:0006612 |
protein targeting to membrane |
171 |
22 |
0.0600740 |
Under enriched (0)
go <- go[order(go$under_represented_pvalue),]
rowN <- max(minRow, sum(go$q.value2<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
| |
category |
term |
numInCat |
numDEInCat |
q.value2 |
| 9208 |
GO:0035264 |
multicellular organism growth |
126 |
0 |
1 |
| 5078 |
GO:0052547 |
regulation of peptidase activity |
283 |
5 |
1 |
| 5077 |
GO:0006816 |
calcium ion transport |
276 |
5 |
1 |
| 5071 |
GO:0000902 |
cell morphogenesis |
993 |
37 |
1 |
| 5073 |
GO:0052548 |
regulation of endopeptidase activity |
270 |
5 |
1 |
| 5066 |
GO:0002520 |
immune system development |
648 |
21 |
1 |
| 5080 |
GO:2001236 |
regulation of extrinsic apoptotic signaling pathway |
140 |
1 |
1 |
| 5069 |
GO:0006520 |
cellular amino acid metabolic process |
416 |
11 |
1 |
| 5063 |
GO:0007155 |
cell adhesion |
912 |
34 |
1 |
| 5070 |
GO:0044057 |
regulation of system process |
312 |
7 |
1 |
| 5062 |
GO:0048534 |
hematopoietic or lymphoid organ development |
615 |
20 |
1 |
| 5053 |
GO:0000904 |
cell morphogenesis involved in differentiation |
701 |
25 |
1 |
| 5051 |
GO:0022610 |
biological adhesion |
916 |
35 |
1 |
| 5058 |
GO:0098662 |
inorganic cation transmembrane transport |
397 |
11 |
1 |
| 5068 |
GO:1901605 |
alpha-amino acid metabolic process |
187 |
3 |
1 |
| 5060 |
GO:0072511 |
divalent inorganic cation transport |
294 |
7 |
1 |
| 5045 |
GO:0009790 |
embryo development |
828 |
31 |
1 |
| 5055 |
GO:0070838 |
divalent metal ion transport |
292 |
7 |
1 |
| 5048 |
GO:0098660 |
inorganic ion transmembrane transport |
457 |
14 |
1 |
| 8730 |
GO:0008361 |
regulation of cell size |
83 |
0 |
1 |
Positive Effect
Over enriched categories (12)
rowN <- max(minRow, sum(goPos$q.value<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
| category |
term |
numInCat |
numDEInCat |
q.value |
| GO:0006986 |
response to unfolded protein |
133 |
18 |
0.0305495 |
| GO:0035966 |
response to topologically incorrect protein |
140 |
18 |
0.0305495 |
| GO:0019395 |
fatty acid oxidation |
77 |
13 |
0.0305495 |
| GO:0034440 |
lipid oxidation |
79 |
13 |
0.0305495 |
| GO:0009062 |
fatty acid catabolic process |
72 |
12 |
0.0367432 |
| GO:0072329 |
monocarboxylic acid catabolic process |
84 |
13 |
0.0367432 |
| GO:0030258 |
lipid modification |
149 |
18 |
0.0367432 |
| GO:0030968 |
endoplasmic reticulum unfolded protein response |
86 |
13 |
0.0367432 |
| GO:0006987 |
activation of signaling protein activity involved in unfolded protein response |
63 |
11 |
0.0367432 |
| GO:0034620 |
cellular response to unfolded protein |
87 |
13 |
0.0367432 |
| GO:0044265 |
cellular macromolecule catabolic process |
798 |
54 |
0.0367432 |
| GO:0032075 |
positive regulation of nuclease activity |
65 |
11 |
0.0389630 |
| GO:0035967 |
cellular response to topologically incorrect protein |
93 |
13 |
0.0582077 |
| GO:0006635 |
fatty acid beta-oxidation |
57 |
10 |
0.0630003 |
| GO:0009057 |
macromolecule catabolic process |
983 |
62 |
0.0632116 |
| GO:0032069 |
regulation of nuclease activity |
71 |
11 |
0.0649930 |
| GO:0006984 |
ER-nucleus signaling pathway |
97 |
13 |
0.0767664 |
| GO:0034389 |
lipid particle organization |
14 |
5 |
0.0869020 |
| GO:0030163 |
protein catabolic process |
596 |
42 |
0.1204515 |
| GO:0046320 |
regulation of fatty acid oxidation |
24 |
6 |
0.1533732 |
Under enriched (0)
goPos <- goPos[order(goPos$under_represented_pvalue),]
rowN <- max(minRow, sum(goPos$q.value2<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
| |
category |
term |
numInCat |
numDEInCat |
q.value2 |
| 4197 |
GO:0007155 |
cell adhesion |
912 |
18 |
1 |
| 8143 |
GO:0061448 |
connective tissue development |
186 |
0 |
1 |
| 4196 |
GO:0022610 |
biological adhesion |
916 |
19 |
1 |
| 4194 |
GO:0007186 |
G-protein coupled receptor signaling pathway |
549 |
8 |
1 |
| 4189 |
GO:0007267 |
cell-cell signaling |
915 |
19 |
1 |
| 7967 |
GO:0051216 |
cartilage development |
149 |
0 |
1 |
| 4184 |
GO:0009790 |
embryo development |
828 |
18 |
1 |
| 4188 |
GO:0006520 |
cellular amino acid metabolic process |
416 |
6 |
1 |
| 4177 |
GO:0000904 |
cell morphogenesis involved in differentiation |
701 |
15 |
1 |
| 8429 |
GO:2001236 |
regulation of extrinsic apoptotic signaling pathway |
140 |
0 |
1 |
| 4178 |
GO:0007268 |
synaptic transmission |
566 |
11 |
1 |
| 4181 |
GO:0048598 |
embryonic morphogenesis |
468 |
8 |
1 |
| 4191 |
GO:1901605 |
alpha-amino acid metabolic process |
187 |
1 |
1 |
| 4187 |
GO:0006816 |
calcium ion transport |
276 |
3 |
1 |
| 4171 |
GO:0035295 |
tube development |
490 |
9 |
1 |
| 7326 |
GO:0021953 |
central nervous system neuron differentiation |
123 |
0 |
1 |
| 7546 |
GO:0035264 |
multicellular organism growth |
126 |
0 |
1 |
| 4167 |
GO:0006140 |
regulation of nucleotide metabolic process |
613 |
13 |
1 |
| 4166 |
GO:1900542 |
regulation of purine nucleotide metabolic process |
610 |
13 |
1 |
| 4174 |
GO:0044057 |
regulation of system process |
312 |
4 |
1 |
Negative Effect
Over enriched categories (25)
| category |
term |
numInCat |
numDEInCat |
q.value |
| GO:0006415 |
translational termination |
95 |
16 |
0.0000000 |
| GO:0006414 |
translational elongation |
121 |
17 |
0.0000000 |
| GO:0000184 |
nuclear-transcribed mRNA catabolic process, nonsense-mediated decay |
118 |
17 |
0.0000000 |
| GO:0006614 |
SRP-dependent cotranslational protein targeting to membrane |
107 |
16 |
0.0000000 |
| GO:0006613 |
cotranslational protein targeting to membrane |
109 |
16 |
0.0000000 |
| GO:0045047 |
protein targeting to ER |
110 |
16 |
0.0000000 |
| GO:0072599 |
establishment of protein localization to endoplasmic reticulum |
111 |
16 |
0.0000000 |
| GO:0019083 |
viral transcription |
158 |
18 |
0.0000001 |
| GO:0019080 |
viral gene expression |
168 |
18 |
0.0000002 |
| GO:0006413 |
translational initiation |
167 |
18 |
0.0000002 |
| GO:0070972 |
protein localization to endoplasmic reticulum |
128 |
16 |
0.0000002 |
| GO:0044033 |
multi-organism metabolic process |
178 |
18 |
0.0000004 |
| GO:0000956 |
nuclear-transcribed mRNA catabolic process |
184 |
17 |
0.0000123 |
| GO:0006612 |
protein targeting to membrane |
171 |
16 |
0.0000253 |
| GO:0006402 |
mRNA catabolic process |
196 |
17 |
0.0000309 |
| GO:0043624 |
cellular protein complex disassembly |
174 |
16 |
0.0000410 |
| GO:0006401 |
RNA catabolic process |
222 |
17 |
0.0001712 |
| GO:0043241 |
protein complex disassembly |
195 |
16 |
0.0001919 |
| GO:0032984 |
macromolecular complex disassembly |
205 |
16 |
0.0003301 |
| GO:0019058 |
viral life cycle |
303 |
19 |
0.0003666 |
| GO:0042274 |
ribosomal small subunit biogenesis |
21 |
5 |
0.0079915 |
| GO:0090150 |
establishment of protein localization to membrane |
296 |
17 |
0.0079915 |
| GO:0022411 |
cellular component disassembly |
412 |
21 |
0.0088145 |
| GO:0072657 |
protein localization to membrane |
371 |
18 |
0.0413611 |
| GO:0006412 |
translation |
496 |
21 |
0.0413611 |
Under enriched (0)
| |
category |
term |
numInCat |
numDEInCat |
q.value2 |
| 3115 |
GO:0002520 |
immune system development |
648 |
1 |
1 |
| 3114 |
GO:0048534 |
hematopoietic or lymphoid organ development |
615 |
1 |
1 |
| 3113 |
GO:0030097 |
hemopoiesis |
587 |
1 |
1 |
| 3106 |
GO:1903047 |
mitotic cell cycle process |
746 |
5 |
1 |
| 3107 |
GO:0051301 |
cell division |
666 |
4 |
1 |
| 3110 |
GO:0022604 |
regulation of cell morphogenesis |
341 |
1 |
1 |
| 3108 |
GO:0002521 |
leukocyte differentiation |
353 |
1 |
1 |
| 5918 |
GO:0030098 |
lymphocyte differentiation |
235 |
0 |
1 |
| 9031 |
GO:0051052 |
regulation of DNA metabolic process |
235 |
0 |
1 |
| 3100 |
GO:0006325 |
chromatin organization |
563 |
4 |
1 |
| 3092 |
GO:0010558 |
negative regulation of macromolecule biosynthetic process |
958 |
9 |
1 |
| 3096 |
GO:0048285 |
organelle fission |
488 |
3 |
1 |
| 3081 |
GO:2000113 |
negative regulation of cellular macromolecule biosynthetic process |
913 |
9 |
1 |
| 8122 |
GO:0045216 |
cell-cell junction organization |
177 |
0 |
1 |
| 3071 |
GO:0000902 |
cell morphogenesis |
993 |
11 |
1 |
| 4977 |
GO:0010720 |
positive regulation of cell development |
178 |
0 |
1 |
| 11032 |
GO:0090068 |
positive regulation of cell cycle process |
196 |
0 |
1 |
| 3077 |
GO:0051276 |
chromosome organization |
738 |
7 |
1 |
| 3074 |
GO:0044092 |
negative regulation of molecular function |
778 |
7 |
1 |
| 3086 |
GO:0000280 |
nuclear division |
460 |
3 |
1 |
Final Step: csv output
write.csv(go,file=paste("csv/", outFile,"_main.csv",sep=''), row.names=FALSE)
write.csv(goPos,file=paste("csv/", outFile,"Pos.csv",sep=''), row.names=FALSE)
write.csv(goNeg,file=paste("csv/", outFile,"Neg.csv",sep=''), row.names=FALSE)