This report has goseq results for NGT versus T2D when:

  1. Genes with FDR < 0.1 marked as DE
  2. Genes from above list with positive effect marked as differentially expressed
  3. Genes from above list with negative effect marked as differentially expressed

This report was generated on June 18 2015

Goseq results also saved in csv files located on snowwhite in directory: /net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/12junReps/csv

Step 1: Load in all the necessary data/libraries

library(goseq)
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
## Loading required package: DBI
library(qvalue)

fName <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/ngt_vs_t2d/peer_k03_all_genes.txt" 
outFile <- "ngt_t2d"

data <- read.table(fName, as.is=T, header=T)

gene_length_file <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/length.composite.gene.models.gencode.v19"
gene_lengths = read.table(gene_length_file, header=T, as.is=T);
gene_lengths$gene = sapply(gene_lengths$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });

data$gene <- sapply(data$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });

data <- merge(data, gene_lengths, by="gene", all.x=T)
data <- data[order(data$p.value),]
data$q.value <- qvalue(data$p.value)$qvalues
data$rank <- seq(1,length(data[,1]))

minRow <- 20

Step 2: Create genes vectors

The first vector simply marks the top 1000 genes as differentially expressed. The second and third vectors mark the genes with positive or negative effect in the top 1000 as differentially expressed.

genes <- as.numeric(data$q.value <= 0.1)
genesPos <- as.numeric(data$q.value <= 0.1 & data$effect > 0)
genesNeg <- as.numeric(data$q.value <= 0.1 & data$effect < 0)

names(genes) <- data$gene
names(genesPos) <- data$gene
names(genesNeg) <- data$gene

There are 171 DE genes with postive effect and 149 DE genes with negative effect.

Step 3: PWFs

pwf <- nullp(genes,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfPos=nullp(genesPos,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfNeg=nullp(genesNeg,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)

Step 4: run goseq

go <- goseq(pwf,"hg19","ensGene",test.cats=c("GO:BP"))
goPos <- goseq(pwfPos,"hg19","ensGene",test.cats=c("GO:BP"))
goNeg <- goseq(pwfNeg,"hg19","ensGene",test.cats=c("GO:BP"))

rownames(go) <- NULL
rownames(goPos) <- NULL
rownames(goNeg) <- NULL

# Fix problem with some p-values being slightly more than 1
go$over_represented_pvalue[go$over_represented_pvalue>1]=1;
go$under_represented_pvalue[go$under_represented_pvalue>1]=1;
goPos$over_represented_pvalue[goPos$over_represented_pvalue>1]=1;
goPos$under_represented_pvalue[goPos$under_represented_pvalue>1]=1;
goNeg$over_represented_pvalue[goNeg$over_represented_pvalue>1]=1;
goNeg$under_represented_pvalue[goNeg$under_represented_pvalue>1]=1;

go$q.value <- qvalue(go$over_represented_pvalue)$qvalues
goPos$q.value=qvalue(goPos$over_represented_pvalue)$qvalues
goNeg$q.value=qvalue(goNeg$over_represented_pvalue)$qvalues

go$q.value2 <- qvalue(go$under_represented_pvalue)$qvalues
goPos$q.value2=qvalue(goPos$under_represented_pvalue)$qvalues
goNeg$q.value2=qvalue(goNeg$under_represented_pvalue)$qvalues

Top 1000 Results

Over enriched categories (3)

rowN <- max(minRow, sum(go$q.value<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
category term numInCat numDEInCat q.value
GO:0022904 respiratory electron transport chain 105 11 0.0048269
GO:0022900 electron transport chain 106 11 0.0048269
GO:0045333 cellular respiration 158 13 0.0075522
GO:0006120 mitochondrial electron transport, NADH to ubiquinone 42 6 0.1179422
GO:0042773 ATP synthesis coupled electron transport 54 6 0.3650270
GO:0042775 mitochondrial ATP synthesis coupled electron transport 54 6 0.3650270
GO:0015980 energy derivation by oxidation of organic compounds 319 15 0.9287550
GO:0006091 generation of precursor metabolites and energy 395 17 1.0000000
GO:0006119 oxidative phosphorylation 70 6 1.0000000
GO:0050882 voluntary musculoskeletal movement 3 2 1.0000000
GO:0055114 oxidation-reduction process 895 29 1.0000000
GO:0009123 nucleoside monophosphate metabolic process 487 19 1.0000000
GO:1902600 hydrogen ion transmembrane transport 84 6 1.0000000
GO:0071425 hematopoietic stem cell proliferation 14 3 1.0000000
GO:0035521 monoubiquitinated histone deubiquitination 4 2 1.0000000
GO:0035522 monoubiquitinated histone H2A deubiquitination 4 2 1.0000000
GO:2001256 regulation of store-operated calcium entry 5 2 1.0000000
GO:0015949 nucleobase-containing small molecule interconversion 19 3 1.0000000
GO:0070544 histone H3-K36 demethylation 6 2 1.0000000
GO:0055091 phospholipid homeostasis 7 2 1.0000000

Under enriched (0)

go <- go[order(go$under_represented_pvalue),]
rowN <- max(minRow, sum(go$q.value2<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
category term numInCat numDEInCat q.value2
2939 GO:0023052 signaling 4498 58 1
2940 GO:0044700 single organism signaling 4498 58 1
2938 GO:0007165 signal transduction 4078 51 1
2937 GO:0007154 cell communication 4561 60 1
2934 GO:0051716 cellular response to stimulus 4976 67 1
2936 GO:0022610 biological adhesion 920 6 1
2932 GO:0007166 cell surface receptor signaling pathway 2373 26 1
2935 GO:0007155 cell adhesion 916 6 1
2933 GO:0055080 cation homeostasis 423 1 1
12008 GO:1903034 regulation of response to wounding 292 0 1
2927 GO:0050776 regulation of immune response 667 4 1
6069 GO:0031589 cell-substrate adhesion 256 0 1
2931 GO:0055065 metal ion homeostasis 388 1 1
2925 GO:0051130 positive regulation of cellular component organization 628 4 1
2918 GO:0035556 intracellular signal transduction 1996 24 1
2921 GO:0071495 cellular response to endogenous stimulus 831 7 1
2930 GO:0006873 cellular ion homeostasis 365 1 1
2926 GO:0044087 regulation of cellular component biogenesis 436 2 1
2920 GO:0080134 regulation of response to stress 854 7 1
2929 GO:0030003 cellular cation homeostasis 358 1 1

Positive Effect

Over enriched categories (0)

rowN <- max(minRow, sum(goPos$q.value<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
category term numInCat numDEInCat q.value
GO:0010468 regulation of gene expression 3468 59 1
GO:0010556 regulation of macromolecule biosynthetic process 3216 54 1
GO:2000112 regulation of cellular macromolecule biosynthetic process 3131 52 1
GO:0009889 regulation of biosynthetic process 3370 54 1
GO:0031326 regulation of cellular biosynthetic process 3343 53 1
GO:0006355 regulation of transcription, DNA-templated 2853 47 1
GO:0060255 regulation of macromolecule metabolic process 4403 65 1
GO:2001141 regulation of RNA biosynthetic process 2890 47 1
GO:0051252 regulation of RNA metabolic process 2968 48 1
GO:0051171 regulation of nitrogen compound metabolic process 3755 58 1
GO:0070544 histone H3-K36 demethylation 6 2 1
GO:0009059 macromolecule biosynthetic process 4027 60 1
GO:0019219 regulation of nucleobase-containing compound metabolic process 3670 57 1
GO:0044333 Wnt signaling pathway involved in digestive tract morphogenesis 1 1 1
GO:2000056 regulation of Wnt signaling pathway involved in digestive tract morphogenesis 1 1 1
GO:2000057 negative regulation of Wnt signaling pathway involved in digestive tract morphogenesis 1 1 1
GO:0006890 retrograde vesicle-mediated transport, Golgi to ER 26 3 1
GO:0010467 gene expression 4325 62 1
GO:0006357 regulation of transcription from RNA polymerase II promoter 1306 25 1
GO:0006325 chromatin organization 563 14 1

Under enriched (0)

goPos <- goPos[order(goPos$under_represented_pvalue),]
rowN <- max(minRow, sum(goPos$q.value2<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
category term numInCat numDEInCat q.value2
2264 GO:0022610 biological adhesion 920 3 1
2266 GO:0050776 regulation of immune response 667 1 1
2263 GO:0007155 cell adhesion 916 3 1
5672 GO:0031347 regulation of defense response 463 0 1
2941 GO:0002764 immune response-regulating signaling pathway 429 0 1
2261 GO:0030030 cell projection organization 991 4 1
2265 GO:0050877 neurological system process 627 1 1
2262 GO:0031175 neuron projection development 703 2 1
9267 GO:0055080 cation homeostasis 423 0 1
3445 GO:0006520 cellular amino acid metabolic process 417 0 1
2248 GO:0023052 signaling 4498 36 1
2249 GO:0044700 single organism signaling 4498 36 1
2259 GO:0048666 neuron development 798 3 1
2244 GO:0044710 single-organism metabolic process 4810 39 1
2247 GO:0044281 small molecule metabolic process 2953 21 1
2242 GO:0007154 cell communication 4561 37 1
2677 GO:0002253 activation of immune response 374 0 1
2256 GO:0006811 ion transport 1118 5 1
2258 GO:0080134 regulation of response to stress 854 3 1
9256 GO:0055065 metal ion homeostasis 388 0 1

Negative Effect

Over enriched categories (12)

category term numInCat numDEInCat q.value
GO:0045333 cellular respiration 158 13 0.0000012
GO:0022904 respiratory electron transport chain 105 11 0.0000012
GO:0022900 electron transport chain 106 11 0.0000012
GO:0015980 energy derivation by oxidation of organic compounds 319 14 0.0004166
GO:0006091 generation of precursor metabolites and energy 395 15 0.0007362
GO:0006120 mitochondrial electron transport, NADH to ubiquinone 42 6 0.0011150
GO:0055114 oxidation-reduction process 895 22 0.0011941
GO:0042773 ATP synthesis coupled electron transport 54 6 0.0035955
GO:0042775 mitochondrial ATP synthesis coupled electron transport 54 6 0.0035955
GO:0006119 oxidative phosphorylation 70 6 0.0143490
GO:0009123 nucleoside monophosphate metabolic process 487 14 0.0328969
GO:1902600 hydrogen ion transmembrane transport 84 6 0.0328969
GO:0015992 proton transport 112 6 0.1507653
GO:0006818 hydrogen transport 114 6 0.1547348
GO:0050882 voluntary musculoskeletal movement 3 2 0.2812328
GO:0009161 ribonucleoside monophosphate metabolic process 477 12 0.3159695
GO:0044281 small molecule metabolic process 2953 38 0.4046591
GO:0046034 ATP metabolic process 441 11 0.5664303
GO:0009167 purine ribonucleoside monophosphate metabolic process 465 11 0.7616442
GO:0009126 purine nucleoside monophosphate metabolic process 466 11 0.7616442

Under enriched (0)

category term numInCat numDEInCat q.value2
1781 GO:0043170 macromolecule metabolic process 7144 34 0.5120055
1780 GO:0060255 regulation of macromolecule metabolic process 4403 18 1.0000000
1779 GO:0044260 cellular macromolecule metabolic process 6487 33 1.0000000
1778 GO:0007165 signal transduction 4078 18 1.0000000
1777 GO:0065007 biological regulation 8485 50 1.0000000
1775 GO:0050794 regulation of cellular process 7725 45 1.0000000
1776 GO:0043412 macromolecule modification 2854 11 1.0000000
1773 GO:0019222 regulation of metabolic process 5256 27 1.0000000
1771 GO:0023052 signaling 4498 22 1.0000000
1772 GO:0044700 single organism signaling 4498 22 1.0000000
1768 GO:0080090 regulation of primary metabolic process 4733 24 1.0000000
1769 GO:0006464 cellular protein modification process 2739 11 1.0000000
1770 GO:0036211 protein modification process 2739 11 1.0000000
1766 GO:0007154 cell communication 4561 23 1.0000000
1774 GO:0051246 regulation of protein metabolic process 1745 5 1.0000000
1765 GO:0050789 regulation of biological process 8100 49 1.0000000
1764 GO:0051716 cellular response to stimulus 4976 26 1.0000000
1763 GO:0019538 protein metabolic process 4124 20 1.0000000
1767 GO:0007166 cell surface receptor signaling pathway 2373 9 1.0000000
1762 GO:0031323 regulation of cellular metabolic process 4753 25 1.0000000

Final Step: csv output

write.csv(go,file=paste("csv/", outFile,"_main.csv",sep=''), row.names=FALSE)
write.csv(goPos,file=paste("csv/", outFile,"Pos.csv",sep=''), row.names=FALSE)
write.csv(goNeg,file=paste("csv/", outFile,"Neg.csv",sep=''), row.names=FALSE)