This report has goseq results for fasting glucose when:

  1. Genes with FDR < 0.1 marked as DE
  2. Genes from above list with positive effect marked as differentially expressed
  3. Genes from above list with negative effect marked as differentially expressed

This report was generated on June 18 2015

Goseq results also saved in csv files located on snowwhite in directory: /net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/12junReps/csv

Step 1: Load in all the necessary data/libraries

library(goseq)
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
## Loading required package: DBI
library(qvalue)

fName <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/traits/peer_k03_GL0_all_genes.txt"
outFile <- "GL0"

data <- read.table(fName, as.is=T, header=T)

gene_length_file <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/length.composite.gene.models.gencode.v19"
gene_lengths = read.table(gene_length_file, header=T, as.is=T);
gene_lengths$gene = sapply(gene_lengths$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });

data$gene <- sapply(data$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });

data <- merge(data, gene_lengths, by="gene", all.x=T)
data <- data[order(data$p.value),]
data$q.value <- qvalue(data$p.value)$qvalues
data$rank <- seq(1,length(data[,1]))

minRow <- 20

Step 2: Create genes vectors

The first vector simply marks the top 1000 genes as differentially expressed. The second and third vectors mark the genes with positive or negative effect in the top 1000 as differentially expressed.

genes <- as.numeric(data$q.value <= 0.1)
genesPos <- as.numeric(data$q.value <= 0.1 & data$effect > 0)
genesNeg <- as.numeric(data$q.value <= 0.1 & data$effect < 0)

names(genes) <- data$gene
names(genesPos) <- data$gene
names(genesNeg) <- data$gene

There are 164 DE genes with postive effect and 91 DE genes with negative effect.

Step 3: PWFs

pwf <- nullp(genes,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfPos=nullp(genesPos,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfNeg=nullp(genesNeg,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)

Step 4: run goseq

go <- goseq(pwf,"hg19","ensGene",test.cats=c("GO:BP"))
goPos <- goseq(pwfPos,"hg19","ensGene",test.cats=c("GO:BP"))
goNeg <- goseq(pwfNeg,"hg19","ensGene",test.cats=c("GO:BP"))

rownames(go) <- NULL
rownames(goPos) <- NULL
rownames(goNeg) <- NULL

# Fix problem with some p-values being slightly more than 1
go$over_represented_pvalue[go$over_represented_pvalue>1]=1;
go$under_represented_pvalue[go$under_represented_pvalue>1]=1;
goPos$over_represented_pvalue[goPos$over_represented_pvalue>1]=1;
goPos$under_represented_pvalue[goPos$under_represented_pvalue>1]=1;
goNeg$over_represented_pvalue[goNeg$over_represented_pvalue>1]=1;
goNeg$under_represented_pvalue[goNeg$under_represented_pvalue>1]=1;

go$q.value <- qvalue(go$over_represented_pvalue)$qvalues
goPos$q.value=qvalue(goPos$over_represented_pvalue)$qvalues
goNeg$q.value=qvalue(goNeg$over_represented_pvalue)$qvalues

go$q.value2 <- qvalue(go$under_represented_pvalue)$qvalues
goPos$q.value2=qvalue(goPos$under_represented_pvalue)$qvalues
goNeg$q.value2=qvalue(goNeg$under_represented_pvalue)$qvalues

Top 1000 Results

Over enriched categories (1)

rowN <- max(minRow, sum(go$q.value<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
category term numInCat numDEInCat q.value
GO:0055091 phospholipid homeostasis 7 4 0.0193404
GO:0055081 anion homeostasis 27 5 0.2910831
GO:0019395 fatty acid oxidation 77 7 0.6614516
GO:0034440 lipid oxidation 79 7 0.6614516
GO:0070972 protein localization to endoplasmic reticulum 128 8 0.7992201
GO:0046320 regulation of fatty acid oxidation 24 4 0.7992201
GO:0006614 SRP-dependent cotranslational protein targeting to membrane 107 7 0.7992201
GO:0030258 lipid modification 149 9 0.7992201
GO:0006613 cotranslational protein targeting to membrane 109 7 0.7992201
GO:0045047 protein targeting to ER 110 7 0.7992201
GO:0072599 establishment of protein localization to endoplasmic reticulum 111 7 0.7992201
GO:0055088 lipid homeostasis 78 6 1.0000000
GO:0019083 viral transcription 158 8 1.0000000
GO:0006415 translational termination 95 6 1.0000000
GO:0043933 macromolecular complex subunit organization 1450 36 1.0000000
GO:0006635 fatty acid beta-oxidation 57 5 1.0000000
GO:0043241 protein complex disassembly 195 9 1.0000000
GO:0001302 replicative cell aging 5 2 1.0000000
GO:0032025 response to cobalt ion 5 2 1.0000000
GO:0019080 viral gene expression 168 8 1.0000000

Under enriched (0)

go <- go[order(go$under_represented_pvalue),]
rowN <- max(minRow, sum(go$q.value2<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
category term numInCat numDEInCat q.value2
2620 GO:0002520 immune system development 648 1 1
2619 GO:0048534 hematopoietic or lymphoid organ development 615 1 1
2665 GO:0000280 nuclear division 460 0 1
2618 GO:0030097 hemopoiesis 587 1 1
2616 GO:0009611 response to wounding 833 4 1
4005 GO:0007067 mitotic nuclear division 346 0 1
3147 GO:0002521 leukocyte differentiation 353 0 1
2612 GO:0072521 purine-containing compound metabolic process 1346 10 1
2617 GO:0048598 embryonic morphogenesis 468 1 1
2615 GO:0009887 organ morphogenesis 691 3 1
2611 GO:0055086 nucleobase-containing small molecule metabolic process 1507 12 1
2610 GO:0006163 purine nucleotide metabolic process 1313 10 1
2609 GO:0019693 ribose phosphate metabolic process 1311 10 1
2607 GO:0009259 ribonucleotide metabolic process 1307 10 1
2608 GO:0009119 ribonucleoside metabolic process 1218 9 1
2613 GO:0051301 cell division 666 3 1
2604 GO:0009150 purine ribonucleotide metabolic process 1292 10 1
2599 GO:0006753 nucleoside phosphate metabolic process 1461 12 1
2602 GO:0042278 purine nucleoside metabolic process 1198 9 1
2601 GO:0046128 purine ribonucleoside metabolic process 1195 9 1

Positive Effect

Over enriched categories (0)

rowN <- max(minRow, sum(goPos$q.value<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
category term numInCat numDEInCat q.value
GO:0006987 activation of signaling protein activity involved in unfolded protein response 63 5 1
GO:0032075 positive regulation of nuclease activity 65 5 1
GO:0032069 regulation of nuclease activity 71 5 1
GO:1903050 regulation of proteolysis involved in cellular protein catabolic process 105 6 1
GO:0032025 response to cobalt ion 5 2 1
GO:0006259 DNA metabolic process 838 19 1
GO:1903362 regulation of cellular protein catabolic process 110 6 1
GO:0045732 positive regulation of protein catabolic process 110 6 1
GO:0009896 positive regulation of catabolic process 194 8 1
GO:0032434 regulation of proteasomal ubiquitin-dependent protein catabolic process 75 5 1
GO:0019395 fatty acid oxidation 77 5 1
GO:0032436 positive regulation of proteasomal ubiquitin-dependent protein catabolic process 45 4 1
GO:0034440 lipid oxidation 79 5 1
GO:0042176 regulation of protein catabolic process 207 8 1
GO:0046320 regulation of fatty acid oxidation 24 3 1
GO:1901800 positive regulation of proteasomal protein catabolic process 51 4 1
GO:0030968 endoplasmic reticulum unfolded protein response 86 5 1
GO:0034620 cellular response to unfolded protein 87 5 1
GO:0045862 positive regulation of proteolysis 87 5 1
GO:1903052 positive regulation of proteolysis involved in cellular protein catabolic process 52 4 1

Under enriched (0)

goPos <- goPos[order(goPos$under_represented_pvalue),]
rowN <- max(minRow, sum(goPos$q.value2<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
category term numInCat numDEInCat q.value2
2029 GO:0009611 response to wounding 833 1 1
2027 GO:1901564 organonitrogen compound metabolic process 2217 11 1
2026 GO:0055086 nucleobase-containing small molecule metabolic process 1507 6 1
2025 GO:0072521 purine-containing compound metabolic process 1346 5 1
2018 GO:0006753 nucleoside phosphate metabolic process 1461 6 1
2016 GO:0009117 nucleotide metabolic process 1456 6 1
2021 GO:0006163 purine nucleotide metabolic process 1313 5 1
2020 GO:0019693 ribose phosphate metabolic process 1311 5 1
2019 GO:1901565 organonitrogen compound catabolic process 1294 5 1
2017 GO:0009259 ribonucleotide metabolic process 1307 5 1
5976 GO:0033124 regulation of GTP catabolic process 476 0 1
7235 GO:0043087 regulation of GTPase activity 474 0 1
2014 GO:0009150 purine ribonucleotide metabolic process 1292 5 1
2028 GO:0009887 organ morphogenesis 691 1 1
2010 GO:1901657 glycosyl compound metabolic process 1252 5 1
8412 GO:0048598 embryonic morphogenesis 468 0 1
7370 GO:0043547 positive regulation of GTPase activity 440 0 1
2009 GO:0009116 nucleoside metabolic process 1238 5 1
2074 GO:0000280 nuclear division 460 0 1
2024 GO:0002520 immune system development 648 1 1

Negative Effect

Over enriched categories (0)

category term numInCat numDEInCat q.value
GO:0055091 phospholipid homeostasis 7 3 0.0581926
GO:0055081 anion homeostasis 27 4 0.0581926
GO:0006415 translational termination 95 5 0.0958987
GO:0019083 viral transcription 158 6 0.0958987
GO:0019080 viral gene expression 168 6 0.0958987
GO:0006614 SRP-dependent cotranslational protein targeting to membrane 107 5 0.0958987
GO:0006413 translational initiation 167 6 0.0958987
GO:0006613 cotranslational protein targeting to membrane 109 5 0.0958987
GO:0045047 protein targeting to ER 110 5 0.0958987
GO:0072599 establishment of protein localization to endoplasmic reticulum 111 5 0.0958987
GO:0044033 multi-organism metabolic process 178 6 0.0958987
GO:0006414 translational elongation 121 5 0.1239232
GO:0000184 nuclear-transcribed mRNA catabolic process, nonsense-mediated decay 118 5 0.1239232
GO:0070972 protein localization to endoplasmic reticulum 128 5 0.1605774
GO:0055088 lipid homeostasis 78 4 0.4647477
GO:0034375 high-density lipoprotein particle remodeling 10 2 0.7015551
GO:0006612 protein targeting to membrane 171 5 0.7164354
GO:0043624 cellular protein complex disassembly 174 5 0.8556412
GO:0043691 reverse cholesterol transport 11 2 0.8556412
GO:0000956 nuclear-transcribed mRNA catabolic process 184 5 0.8565072

Under enriched (0)

category term numInCat numDEInCat q.value2
1591 GO:0050896 response to stimulus 6126 19 1
1587 GO:0043412 macromolecule modification 2854 7 1
1585 GO:0007154 cell communication 4559 14 1
1584 GO:0007165 signal transduction 4077 12 1
11917 GO:1903047 mitotic cell cycle process 746 0 1
1579 GO:0023052 signaling 4496 14 1
1580 GO:0044700 single organism signaling 4496 14 1
1583 GO:0006950 response to stress 2909 7 1
1581 GO:0006464 cellular protein modification process 2739 7 1
1582 GO:0036211 protein modification process 2739 7 1
8688 GO:0051301 cell division 666 0 1
1575 GO:0048518 positive regulation of biological process 3684 11 1
3140 GO:0006974 cellular response to DNA damage stimulus 668 0 1
1590 GO:0045893 positive regulation of transcription, DNA-templated 988 1 1
1614 GO:0002520 immune system development 648 0 1
1589 GO:0010647 positive regulation of cell communication 1000 1 1
1569 GO:0044260 cellular macromolecule metabolic process 6484 23 1
1588 GO:0023056 positive regulation of signaling 995 1 1
1563 GO:0051716 cellular response to stimulus 4976 17 1
8260 GO:0048534 hematopoietic or lymphoid organ development 615 0 1

Final Step: csv output

write.csv(go,file=paste("csv/", outFile,"_main.csv",sep=''), row.names=FALSE)
write.csv(goPos,file=paste("csv/", outFile,"Pos.csv",sep=''), row.names=FALSE)
write.csv(goNeg,file=paste("csv/", outFile,"Neg.csv",sep=''), row.names=FALSE)