This report has goseq results for NGT versus T2D when:
- Genes are marked as differentially expressed with q value < 0.05
- Genes with positive effect from 1 marked as differentially expressed
- Genes with negative effect from 1 marked as differentially expressed
This report was generated on June 15 2015
Goseq results also saved in csv files located on snowwhite in directory:
/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/12junReps/csv
Step 1: Load in all the necessary data/libraries
library(goseq)
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
## Loading required package: DBI
library(qvalue)
fName <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/ngt_vs_t2d/peer_k03_all_genes.txt"
outFile <- "ngt_t2d2"
data <- read.table(fName, as.is=T, header=T)
gene_length_file <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/length.composite.gene.models.gencode.v19"
gene_lengths = read.table(gene_length_file, header=T, as.is=T);
gene_lengths$gene = sapply(gene_lengths$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });
data$gene <- sapply(data$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });
data <- merge(data, gene_lengths, by="gene", all.x=T)
data <- data[order(data$p.value),]
data$q.value <- qvalue(data$p.value)$qvalues
data$rank <- seq(1,length(data[,1]))
minRow <- 20
Step 2: Create genes vectors
The first vector simply marks the genes with FDR < 0.1 as differentially expressed. The second and third vectors mark the genes
with positive or negative effect in this list as differentially expressed.
genes <- as.numeric(data$q.value <= 0.05)
genesPos <- as.numeric(data$q.value <= 0.05 & data$effect > 0)
genesNeg <- as.numeric(data$q.value <= 0.05 & data$effect < 0)
names(genes) <- data$gene
names(genesPos) <- data$gene
names(genesNeg) <- data$gene
There are 15 DE genes with postive effect and 8 DE genes with negative effect.
Step 3: PWFs
pwf <- nullp(genes,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfPos=nullp(genesPos,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfNeg=nullp(genesNeg,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
Step 4: run goseq
go <- goseq(pwf,"hg19","ensGene",test.cats=c("GO:BP"))
goPos <- goseq(pwfPos,"hg19","ensGene",test.cats=c("GO:BP"))
goNeg <- goseq(pwfNeg,"hg19","ensGene",test.cats=c("GO:BP"))
rownames(go) <- NULL
rownames(goPos) <- NULL
rownames(goNeg) <- NULL
# Fix problem with some p-values being slightly more than 1
go$over_represented_pvalue[go$over_represented_pvalue>1]=1;
go$under_represented_pvalue[go$under_represented_pvalue>1]=1;
goPos$over_represented_pvalue[goPos$over_represented_pvalue>1]=1;
goPos$under_represented_pvalue[goPos$under_represented_pvalue>1]=1;
goNeg$over_represented_pvalue[goNeg$over_represented_pvalue>1]=1;
goNeg$under_represented_pvalue[goNeg$under_represented_pvalue>1]=1;
go$q.value <- qvalue(go$over_represented_pvalue)$qvalues
goPos$q.value=qvalue(goPos$over_represented_pvalue)$qvalues
goNeg$q.value=qvalue(goNeg$over_represented_pvalue)$qvalues
go$q.value2 <- qvalue(go$under_represented_pvalue)$qvalues
goPos$q.value2=qvalue(goPos$under_represented_pvalue)$qvalues
goNeg$q.value2=qvalue(goNeg$under_represented_pvalue)$qvalues
FDR < 0.05
Over enriched categories (0)
rowN <- max(minRow, sum(go$q.value<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
| category |
term |
numInCat |
numDEInCat |
q.value |
| GO:0021572 |
rhombomere 6 development |
1 |
1 |
1 |
| GO:0045646 |
regulation of erythrocyte differentiation |
37 |
2 |
1 |
| GO:0036034 |
mediator complex assembly |
1 |
1 |
1 |
| GO:2001176 |
regulation of mediator complex assembly |
1 |
1 |
1 |
| GO:2001178 |
positive regulation of mediator complex assembly |
1 |
1 |
1 |
| Other |
NA |
1443 |
7 |
1 |
| GO:0021730 |
trigeminal sensory nucleus development |
1 |
1 |
1 |
| GO:0021740 |
principal sensory nucleus of trigeminal nerve development |
1 |
1 |
1 |
| GO:2000795 |
negative regulation of epithelial cell proliferation involved in lung morphogenesis |
1 |
1 |
1 |
| GO:0048857 |
neural nucleus development |
54 |
2 |
1 |
| GO:0061141 |
lung ciliated cell differentiation |
2 |
1 |
1 |
| GO:0035283 |
central nervous system segmentation |
3 |
1 |
1 |
| GO:0035284 |
brain segmentation |
3 |
1 |
1 |
| GO:0021571 |
rhombomere 5 development |
3 |
1 |
1 |
| GO:2000790 |
regulation of mesenchymal cell proliferation involved in lung development |
2 |
1 |
1 |
| GO:2000791 |
negative regulation of mesenchymal cell proliferation involved in lung development |
2 |
1 |
1 |
| GO:0035563 |
positive regulation of chromatin binding |
3 |
1 |
1 |
| GO:0060486 |
Clara cell differentiation |
3 |
1 |
1 |
| GO:1901029 |
negative regulation of mitochondrial outer membrane permeabilization involved in apoptotic signaling pathway |
5 |
1 |
1 |
| GO:0032844 |
regulation of homeostatic process |
279 |
3 |
1 |
Under enriched (0)
go <- go[order(go$under_represented_pvalue),]
rowN <- max(minRow, sum(go$q.value2<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
| |
category |
term |
numInCat |
numDEInCat |
q.value2 |
| 697 |
GO:0008150 |
biological_process |
13156 |
13 |
1 |
| 696 |
GO:0009987 |
cellular process |
11765 |
12 |
1 |
| 695 |
GO:0044699 |
single-organism process |
10473 |
10 |
1 |
| 5312 |
GO:0033036 |
macromolecule localization |
2031 |
0 |
1 |
| 694 |
GO:0006950 |
response to stress |
2911 |
1 |
1 |
| 2707 |
GO:0008104 |
protein localization |
1760 |
0 |
1 |
| 693 |
GO:0051179 |
localization |
4340 |
3 |
1 |
| 692 |
GO:0044763 |
single-organism cellular process |
9512 |
10 |
1 |
| 744 |
GO:0045184 |
establishment of protein localization |
1422 |
0 |
1 |
| 691 |
GO:0019538 |
protein metabolic process |
4124 |
3 |
1 |
| 3568 |
GO:0015031 |
protein transport |
1320 |
0 |
1 |
| 688 |
GO:0008152 |
metabolic process |
9727 |
11 |
1 |
| 686 |
GO:0071704 |
organic substance metabolic process |
8973 |
10 |
1 |
| 687 |
GO:0050896 |
response to stimulus |
6127 |
6 |
1 |
| 2997 |
GO:0009719 |
response to endogenous stimulus |
1136 |
0 |
1 |
| 718 |
GO:0070727 |
cellular macromolecule localization |
1129 |
0 |
1 |
| 2399 |
GO:0006952 |
defense response |
1181 |
0 |
1 |
| 729 |
GO:0034613 |
cellular protein localization |
1124 |
0 |
1 |
| 2110 |
GO:0006508 |
proteolysis |
1110 |
0 |
1 |
| 1612 |
GO:0003008 |
system process |
1112 |
0 |
1 |
Positive Effect
Over enriched categories (0)
rowN <- max(minRow, sum(goPos$q.value<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
| category |
term |
numInCat |
numDEInCat |
q.value |
| GO:0045646 |
regulation of erythrocyte differentiation |
37 |
2 |
1 |
| GO:0036034 |
mediator complex assembly |
1 |
1 |
1 |
| GO:2001176 |
regulation of mediator complex assembly |
1 |
1 |
1 |
| GO:2001178 |
positive regulation of mediator complex assembly |
1 |
1 |
1 |
| GO:0021572 |
rhombomere 6 development |
1 |
1 |
1 |
| GO:0035563 |
positive regulation of chromatin binding |
3 |
1 |
1 |
| GO:0021571 |
rhombomere 5 development |
3 |
1 |
1 |
| GO:0035283 |
central nervous system segmentation |
3 |
1 |
1 |
| GO:0035284 |
brain segmentation |
3 |
1 |
1 |
| GO:0030218 |
erythrocyte differentiation |
95 |
2 |
1 |
| GO:0034101 |
erythrocyte homeostasis |
101 |
2 |
1 |
| GO:1901029 |
negative regulation of mitochondrial outer membrane permeabilization involved in apoptotic signaling pathway |
5 |
1 |
1 |
| GO:0002262 |
myeloid cell homeostasis |
117 |
2 |
1 |
| GO:0006357 |
regulation of transcription from RNA polymerase II promoter |
1306 |
5 |
1 |
| GO:0021546 |
rhombomere development |
6 |
1 |
1 |
| GO:0043619 |
regulation of transcription from RNA polymerase II promoter in response to oxidative stress |
8 |
1 |
1 |
| GO:0046886 |
positive regulation of hormone biosynthetic process |
8 |
1 |
1 |
| GO:0042090 |
interleukin-12 biosynthetic process |
8 |
1 |
1 |
| GO:0045075 |
regulation of interleukin-12 biosynthetic process |
8 |
1 |
1 |
| GO:0035561 |
regulation of chromatin binding |
8 |
1 |
1 |
Under enriched (0)
goPos <- goPos[order(goPos$under_represented_pvalue),]
rowN <- max(minRow, sum(goPos$q.value2<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
| |
category |
term |
numInCat |
numDEInCat |
q.value2 |
| 441 |
GO:0008150 |
biological_process |
13156 |
9 |
1 |
| 440 |
GO:0044699 |
single-organism process |
10473 |
6 |
1 |
| 439 |
GO:0050896 |
response to stimulus |
6127 |
2 |
1 |
| 437 |
GO:0009987 |
cellular process |
11765 |
8 |
1 |
| 438 |
GO:0051179 |
localization |
4340 |
1 |
1 |
| 434 |
GO:0044763 |
single-organism cellular process |
9512 |
6 |
1 |
| 433 |
GO:0051716 |
cellular response to stimulus |
4976 |
2 |
1 |
| 11466 |
GO:1901564 |
organonitrogen compound metabolic process |
2215 |
0 |
1 |
| 436 |
GO:0051234 |
establishment of localization |
3588 |
1 |
1 |
| 432 |
GO:0044710 |
single-organism metabolic process |
4810 |
2 |
1 |
| 435 |
GO:0006810 |
transport |
3504 |
1 |
1 |
| 12713 |
GO:0033036 |
macromolecule localization |
2031 |
0 |
1 |
| 10235 |
GO:0071702 |
organic substance transport |
1991 |
0 |
1 |
| 491 |
GO:0035556 |
intracellular signal transduction |
1996 |
0 |
1 |
| 431 |
GO:0007154 |
cell communication |
4561 |
2 |
1 |
| 11359 |
GO:1901135 |
carbohydrate derivative metabolic process |
1916 |
0 |
1 |
| 429 |
GO:0023052 |
signaling |
4498 |
2 |
1 |
| 430 |
GO:0044700 |
single organism signaling |
4498 |
2 |
1 |
| 424 |
GO:0008152 |
metabolic process |
9727 |
7 |
1 |
| 12722 |
GO:0019637 |
organophosphate metabolic process |
1768 |
0 |
1 |
Negative Effect
Over enriched categories (0)
| category |
term |
numInCat |
numDEInCat |
q.value |
| GO:0021730 |
trigeminal sensory nucleus development |
1 |
1 |
1 |
| GO:0021740 |
principal sensory nucleus of trigeminal nerve development |
1 |
1 |
1 |
| GO:2000795 |
negative regulation of epithelial cell proliferation involved in lung morphogenesis |
1 |
1 |
1 |
| GO:0061141 |
lung ciliated cell differentiation |
2 |
1 |
1 |
| GO:2000790 |
regulation of mesenchymal cell proliferation involved in lung development |
2 |
1 |
1 |
| GO:2000791 |
negative regulation of mesenchymal cell proliferation involved in lung development |
2 |
1 |
1 |
| GO:0060486 |
Clara cell differentiation |
3 |
1 |
1 |
| GO:0060916 |
mesenchymal cell proliferation involved in lung development |
4 |
1 |
1 |
| GO:0007196 |
adenylate cyclase-inhibiting G-protein coupled glutamate receptor signaling pathway |
4 |
1 |
1 |
| GO:0061179 |
negative regulation of insulin secretion involved in cellular response to glucose stimulus |
6 |
1 |
1 |
| GO:0021960 |
anterior commissure morphogenesis |
5 |
1 |
1 |
| GO:0072201 |
negative regulation of mesenchymal cell proliferation |
6 |
1 |
1 |
| GO:0060510 |
Type II pneumocyte differentiation |
6 |
1 |
1 |
| GO:0060509 |
Type I pneumocyte differentiation |
6 |
1 |
1 |
| GO:0071679 |
commissural neuron axon guidance |
6 |
1 |
1 |
| GO:2000794 |
regulation of epithelial cell proliferation involved in lung morphogenesis |
7 |
1 |
1 |
| GO:0007216 |
G-protein coupled glutamate receptor signaling pathway |
7 |
1 |
1 |
| GO:0060502 |
epithelial cell proliferation involved in lung morphogenesis |
8 |
1 |
1 |
| GO:0061140 |
lung secretory cell differentiation |
10 |
1 |
1 |
| GO:2000647 |
negative regulation of stem cell proliferation |
13 |
1 |
1 |
Under enriched (0)
| |
category |
term |
numInCat |
numDEInCat |
q.value2 |
| 415 |
GO:0008150 |
biological_process |
13156 |
4 |
1 |
| 414 |
GO:0043170 |
macromolecule metabolic process |
7144 |
1 |
1 |
| 3863 |
GO:0019538 |
protein metabolic process |
4124 |
0 |
1 |
| 413 |
GO:0044260 |
cellular macromolecule metabolic process |
6487 |
1 |
1 |
| 448 |
GO:0044267 |
cellular protein metabolic process |
3491 |
0 |
1 |
| 2151 |
GO:0006950 |
response to stress |
2911 |
0 |
1 |
| 447 |
GO:0043412 |
macromolecule modification |
2854 |
0 |
1 |
| 1827 |
GO:0006464 |
cellular protein modification process |
2739 |
0 |
1 |
| 6022 |
GO:0036211 |
protein modification process |
2739 |
0 |
1 |
| 2179 |
GO:0006996 |
organelle organization |
2485 |
0 |
1 |
| 412 |
GO:0009987 |
cellular process |
11765 |
4 |
1 |
| 5156 |
GO:0033036 |
macromolecule localization |
2031 |
0 |
1 |
| 411 |
GO:0060255 |
regulation of macromolecule metabolic process |
4403 |
1 |
1 |
| 406 |
GO:0071704 |
organic substance metabolic process |
8973 |
3 |
1 |
| 410 |
GO:0010467 |
gene expression |
4325 |
1 |
1 |
| 1031 |
GO:0002376 |
immune system process |
1939 |
0 |
1 |
| 6871 |
GO:0044085 |
cellular component biogenesis |
1825 |
0 |
1 |
| 409 |
GO:0090304 |
nucleic acid metabolic process |
4168 |
1 |
1 |
| 403 |
GO:0044238 |
primary metabolic process |
8723 |
3 |
1 |
| 2477 |
GO:0008104 |
protein localization |
1760 |
0 |
1 |
Final Step: csv output
write.csv(go,file=paste("csv/", outFile,"_main.csv",sep=''), row.names=FALSE)
write.csv(goPos,file=paste("csv/", outFile,"Pos.csv",sep=''), row.names=FALSE)
write.csv(goNeg,file=paste("csv/", outFile,"Neg.csv",sep=''), row.names=FALSE)