This report has goseq results for fasting glucose when:

  1. Top 1000 genes are marked as differentially expressed
  2. Top genes with positive effect in top 1000 overall genes are marked as differentially expressed
  3. Top genes with negative effect in top 1000 overall genes are marked as differentially expressed

This report was generated on June 21 2015

Goseq results also saved in csv files located on snowwhite in directory: /net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/12junReps/csv

Step 1: Load in all the necessary data/libraries

library(goseq)
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
## Loading required package: DBI
library(qvalue)

fName <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/traits/peer_k03_GL0_all_genes.txt"
outFile <- "GL0"

data <- read.table(fName, as.is=T, header=T)

gene_length_file <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/length.composite.gene.models.gencode.v19"
gene_lengths = read.table(gene_length_file, header=T, as.is=T);
gene_lengths$gene = sapply(gene_lengths$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });

data$gene <- sapply(data$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });

data <- merge(data, gene_lengths, by="gene", all.x=T)
data <- data[order(data$p.value),]
data$q.value <- qvalue(data$p.value)$qvalues
data$rank <- seq(1,length(data[,1]))

minRow <- 20

Step 2: Create genes vectors

The first vector simply marks the top 1000 genes as differentially expressed. The second and third vectors mark the genes with positive or negative effect in the top 1000 as differentially expressed.

genes <- as.numeric(data$rank <= 1000)
genesPos <- as.numeric(data$rank <= 1000 & data$effect > 0)
genesNeg <- as.numeric(data$rank <= 1000 & data$effect < 0)

names(genes) <- data$gene
names(genesPos) <- data$gene
names(genesNeg) <- data$gene

There are 635 DE genes with postive effect and 365 DE genes with negative effect.

Step 3: PWFs

pwf <- nullp(genes,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfPos=nullp(genesPos,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfNeg=nullp(genesNeg,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)

Step 4: run goseq

go <- goseq(pwf,"hg19","ensGene",test.cats=c("GO:BP"))
goPos <- goseq(pwfPos,"hg19","ensGene",test.cats=c("GO:BP"))
goNeg <- goseq(pwfNeg,"hg19","ensGene",test.cats=c("GO:BP"))

rownames(go) <- NULL
rownames(goPos) <- NULL
rownames(goNeg) <- NULL

# Fix problem with some p-values being slightly more than 1
go$over_represented_pvalue[go$over_represented_pvalue>1]=1;
go$under_represented_pvalue[go$under_represented_pvalue>1]=1;
goPos$over_represented_pvalue[goPos$over_represented_pvalue>1]=1;
goPos$under_represented_pvalue[goPos$under_represented_pvalue>1]=1;
goNeg$over_represented_pvalue[goNeg$over_represented_pvalue>1]=1;
goNeg$under_represented_pvalue[goNeg$under_represented_pvalue>1]=1;

go$q.value <- qvalue(go$over_represented_pvalue)$qvalues
goPos$q.value=qvalue(goPos$over_represented_pvalue)$qvalues
goNeg$q.value=qvalue(goNeg$over_represented_pvalue)$qvalues

go$q.value2 <- qvalue(go$under_represented_pvalue)$qvalues
goPos$q.value2=qvalue(goPos$under_represented_pvalue)$qvalues
goNeg$q.value2=qvalue(goNeg$under_represented_pvalue)$qvalues

go <- go[which(go$numInCat < 1000),]
goPos <- goPos[which(goPos$numInCat < 1000),]
goNeg <- goNeg[which(goNeg$numInCat < 1000),]

Top 1000 Results

Over enriched categories (19)

rowN <- max(minRow, sum(go$q.value<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
category term numInCat numDEInCat q.value
GO:0000184 nuclear-transcribed mRNA catabolic process, nonsense-mediated decay 118 24 0.0000893
GO:0006413 translational initiation 167 27 0.0009412
GO:0000956 nuclear-transcribed mRNA catabolic process 184 28 0.0018819
GO:0006402 mRNA catabolic process 196 29 0.0018819
GO:0006415 translational termination 95 18 0.0023526
GO:0006614 SRP-dependent cotranslational protein targeting to membrane 107 19 0.0031269
GO:0006613 cotranslational protein targeting to membrane 109 19 0.0031269
GO:0044265 cellular macromolecule catabolic process 798 76 0.0031269
GO:0045047 protein targeting to ER 110 19 0.0031269
GO:0072599 establishment of protein localization to endoplasmic reticulum 111 19 0.0031269
GO:0006401 RNA catabolic process 222 30 0.0031269
GO:0019395 fatty acid oxidation 77 16 0.0049168
GO:0034440 lipid oxidation 79 16 0.0061256
GO:0070972 protein localization to endoplasmic reticulum 128 20 0.0067670
GO:0030258 lipid modification 149 23 0.0095562
GO:0006414 translational elongation 121 19 0.0101051
GO:0009057 macromolecule catabolic process 983 85 0.0214539
GO:0019083 viral transcription 158 21 0.0428398
GO:0016071 mRNA metabolic process 615 57 0.0440893
GO:0006612 protein targeting to membrane 171 22 0.0600740

Under enriched (0)

go <- go[order(go$under_represented_pvalue),]
rowN <- max(minRow, sum(go$q.value2<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
category term numInCat numDEInCat q.value2
9208 GO:0035264 multicellular organism growth 126 0 1
5078 GO:0052547 regulation of peptidase activity 283 5 1
5077 GO:0006816 calcium ion transport 276 5 1
5071 GO:0000902 cell morphogenesis 993 37 1
5073 GO:0052548 regulation of endopeptidase activity 270 5 1
5066 GO:0002520 immune system development 648 21 1
5080 GO:2001236 regulation of extrinsic apoptotic signaling pathway 140 1 1
5069 GO:0006520 cellular amino acid metabolic process 416 11 1
5063 GO:0007155 cell adhesion 912 34 1
5070 GO:0044057 regulation of system process 312 7 1
5062 GO:0048534 hematopoietic or lymphoid organ development 615 20 1
5053 GO:0000904 cell morphogenesis involved in differentiation 701 25 1
5051 GO:0022610 biological adhesion 916 35 1
5058 GO:0098662 inorganic cation transmembrane transport 397 11 1
5068 GO:1901605 alpha-amino acid metabolic process 187 3 1
5060 GO:0072511 divalent inorganic cation transport 294 7 1
5045 GO:0009790 embryo development 828 31 1
5055 GO:0070838 divalent metal ion transport 292 7 1
5048 GO:0098660 inorganic ion transmembrane transport 457 14 1
8730 GO:0008361 regulation of cell size 83 0 1

Positive Effect

Over enriched categories (12)

rowN <- max(minRow, sum(goPos$q.value<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
category term numInCat numDEInCat q.value
GO:0006986 response to unfolded protein 133 18 0.0305495
GO:0035966 response to topologically incorrect protein 140 18 0.0305495
GO:0019395 fatty acid oxidation 77 13 0.0305495
GO:0034440 lipid oxidation 79 13 0.0305495
GO:0009062 fatty acid catabolic process 72 12 0.0367432
GO:0072329 monocarboxylic acid catabolic process 84 13 0.0367432
GO:0030258 lipid modification 149 18 0.0367432
GO:0030968 endoplasmic reticulum unfolded protein response 86 13 0.0367432
GO:0006987 activation of signaling protein activity involved in unfolded protein response 63 11 0.0367432
GO:0034620 cellular response to unfolded protein 87 13 0.0367432
GO:0044265 cellular macromolecule catabolic process 798 54 0.0367432
GO:0032075 positive regulation of nuclease activity 65 11 0.0389630
GO:0035967 cellular response to topologically incorrect protein 93 13 0.0582077
GO:0006635 fatty acid beta-oxidation 57 10 0.0630003
GO:0009057 macromolecule catabolic process 983 62 0.0632116
GO:0032069 regulation of nuclease activity 71 11 0.0649930
GO:0006984 ER-nucleus signaling pathway 97 13 0.0767664
GO:0034389 lipid particle organization 14 5 0.0869020
GO:0030163 protein catabolic process 596 42 0.1204515
GO:0046320 regulation of fatty acid oxidation 24 6 0.1533732

Under enriched (0)

goPos <- goPos[order(goPos$under_represented_pvalue),]
rowN <- max(minRow, sum(goPos$q.value2<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
category term numInCat numDEInCat q.value2
4197 GO:0007155 cell adhesion 912 18 1
8143 GO:0061448 connective tissue development 186 0 1
4196 GO:0022610 biological adhesion 916 19 1
4194 GO:0007186 G-protein coupled receptor signaling pathway 549 8 1
4189 GO:0007267 cell-cell signaling 915 19 1
7967 GO:0051216 cartilage development 149 0 1
4184 GO:0009790 embryo development 828 18 1
4188 GO:0006520 cellular amino acid metabolic process 416 6 1
4177 GO:0000904 cell morphogenesis involved in differentiation 701 15 1
8429 GO:2001236 regulation of extrinsic apoptotic signaling pathway 140 0 1
4178 GO:0007268 synaptic transmission 566 11 1
4181 GO:0048598 embryonic morphogenesis 468 8 1
4191 GO:1901605 alpha-amino acid metabolic process 187 1 1
4187 GO:0006816 calcium ion transport 276 3 1
4171 GO:0035295 tube development 490 9 1
7326 GO:0021953 central nervous system neuron differentiation 123 0 1
7546 GO:0035264 multicellular organism growth 126 0 1
4167 GO:0006140 regulation of nucleotide metabolic process 613 13 1
4166 GO:1900542 regulation of purine nucleotide metabolic process 610 13 1
4174 GO:0044057 regulation of system process 312 4 1

Negative Effect

Over enriched categories (25)

category term numInCat numDEInCat q.value
GO:0006415 translational termination 95 16 0.0000000
GO:0006414 translational elongation 121 17 0.0000000
GO:0000184 nuclear-transcribed mRNA catabolic process, nonsense-mediated decay 118 17 0.0000000
GO:0006614 SRP-dependent cotranslational protein targeting to membrane 107 16 0.0000000
GO:0006613 cotranslational protein targeting to membrane 109 16 0.0000000
GO:0045047 protein targeting to ER 110 16 0.0000000
GO:0072599 establishment of protein localization to endoplasmic reticulum 111 16 0.0000000
GO:0019083 viral transcription 158 18 0.0000001
GO:0019080 viral gene expression 168 18 0.0000002
GO:0006413 translational initiation 167 18 0.0000002
GO:0070972 protein localization to endoplasmic reticulum 128 16 0.0000002
GO:0044033 multi-organism metabolic process 178 18 0.0000004
GO:0000956 nuclear-transcribed mRNA catabolic process 184 17 0.0000123
GO:0006612 protein targeting to membrane 171 16 0.0000253
GO:0006402 mRNA catabolic process 196 17 0.0000309
GO:0043624 cellular protein complex disassembly 174 16 0.0000410
GO:0006401 RNA catabolic process 222 17 0.0001712
GO:0043241 protein complex disassembly 195 16 0.0001919
GO:0032984 macromolecular complex disassembly 205 16 0.0003301
GO:0019058 viral life cycle 303 19 0.0003666
GO:0042274 ribosomal small subunit biogenesis 21 5 0.0079915
GO:0090150 establishment of protein localization to membrane 296 17 0.0079915
GO:0022411 cellular component disassembly 412 21 0.0088145
GO:0072657 protein localization to membrane 371 18 0.0413611
GO:0006412 translation 496 21 0.0413611

Under enriched (0)

category term numInCat numDEInCat q.value2
3115 GO:0002520 immune system development 648 1 1
3114 GO:0048534 hematopoietic or lymphoid organ development 615 1 1
3113 GO:0030097 hemopoiesis 587 1 1
3106 GO:1903047 mitotic cell cycle process 746 5 1
3107 GO:0051301 cell division 666 4 1
3110 GO:0022604 regulation of cell morphogenesis 341 1 1
3108 GO:0002521 leukocyte differentiation 353 1 1
5918 GO:0030098 lymphocyte differentiation 235 0 1
9031 GO:0051052 regulation of DNA metabolic process 235 0 1
3100 GO:0006325 chromatin organization 563 4 1
3092 GO:0010558 negative regulation of macromolecule biosynthetic process 958 9 1
3096 GO:0048285 organelle fission 488 3 1
3081 GO:2000113 negative regulation of cellular macromolecule biosynthetic process 913 9 1
8122 GO:0045216 cell-cell junction organization 177 0 1
3071 GO:0000902 cell morphogenesis 993 11 1
4977 GO:0010720 positive regulation of cell development 178 0 1
11032 GO:0090068 positive regulation of cell cycle process 196 0 1
3077 GO:0051276 chromosome organization 738 7 1
3074 GO:0044092 negative regulation of molecular function 778 7 1
3086 GO:0000280 nuclear division 460 3 1

Final Step: csv output

write.csv(go,file=paste("csv/", outFile,"_main.csv",sep=''), row.names=FALSE)
write.csv(goPos,file=paste("csv/", outFile,"Pos.csv",sep=''), row.names=FALSE)
write.csv(goNeg,file=paste("csv/", outFile,"Neg.csv",sep=''), row.names=FALSE)