This report has goseq results for NGT versus T2D when:

  1. Top 1000 genes are marked as differentially expressed
  2. Top genes with positive effect in top 1000 overall genes are marked as differentially expressed
  3. Top genes with negative effect in top 1000 overall genes are marked as differentially expressed

This report was generated on June 21 2015

Goseq results also saved in csv files located on snowwhite in directory: /net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/12junReps/csv

Step 1: Load in all the necessary data/libraries

library(goseq)
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
## Loading required package: DBI
library(qvalue)

fName <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/ngt_vs_t2d/peer_k03_all_genes.txt" 
outFile <- "ngt_t2d"

data <- read.table(fName, as.is=T, header=T)

gene_length_file <- "/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/jun3/length.composite.gene.models.gencode.v19"
gene_lengths = read.table(gene_length_file, header=T, as.is=T);
gene_lengths$gene = sapply(gene_lengths$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });

data$gene <- sapply(data$gene, function(x){ unlist(strsplit(x, split="[.]"))[1] });

data <- merge(data, gene_lengths, by="gene", all.x=T)
data <- data[order(data$p.value),]
data$q.value <- qvalue(data$p.value)$qvalues
data$rank <- seq(1,length(data[,1]))

minRow <- 20

Step 2: Create genes vectors

The first vector simply marks the top 1000 genes as differentially expressed. The second and third vectors mark the genes with positive or negative effect in the top 1000 as differentially expressed.

genes <- as.numeric(data$rank <= 1000)
genesPos <- as.numeric(data$rank <= 1000 & data$effect > 0)
genesNeg <- as.numeric(data$rank <= 1000 & data$effect < 0)

names(genes) <- data$gene
names(genesPos) <- data$gene
names(genesNeg) <- data$gene

There are 511 DE genes with postive effect and 489 DE genes with negative effect.

Step 3: PWFs

pwf <- nullp(genes,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfPos=nullp(genesPos,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)
pwfNeg=nullp(genesNeg,"hg19","ensGene",bias.data=data$length, plot.fit=FALSE)

Step 4: run goseq

go <- goseq(pwf,"hg19","ensGene",test.cats=c("GO:BP"))
goPos <- goseq(pwfPos,"hg19","ensGene",test.cats=c("GO:BP"))
goNeg <- goseq(pwfNeg,"hg19","ensGene",test.cats=c("GO:BP"))

rownames(go) <- NULL
rownames(goPos) <- NULL
rownames(goNeg) <- NULL

# Fix problem with some p-values being slightly more than 1
go$over_represented_pvalue[go$over_represented_pvalue>1]=1;
go$under_represented_pvalue[go$under_represented_pvalue>1]=1;
goPos$over_represented_pvalue[goPos$over_represented_pvalue>1]=1;
goPos$under_represented_pvalue[goPos$under_represented_pvalue>1]=1;
goNeg$over_represented_pvalue[goNeg$over_represented_pvalue>1]=1;
goNeg$under_represented_pvalue[goNeg$under_represented_pvalue>1]=1;

go$q.value <- qvalue(go$over_represented_pvalue)$qvalues
goPos$q.value=qvalue(goPos$over_represented_pvalue)$qvalues
goNeg$q.value=qvalue(goNeg$over_represented_pvalue)$qvalues

go$q.value2 <- qvalue(go$under_represented_pvalue)$qvalues
goPos$q.value2=qvalue(goPos$under_represented_pvalue)$qvalues
goNeg$q.value2=qvalue(goNeg$under_represented_pvalue)$qvalues

go <- go[which(go$numInCat < 1000),]
goPos <- goPos[which(goPos$numInCat < 1000),]
goNeg <- goNeg[which(goNeg$numInCat < 1000),]

Top 1000 Results

Over enriched categories (16)

rowN <- max(minRow, sum(go$q.value<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
category term numInCat numDEInCat q.value
GO:0022904 respiratory electron transport chain 105 32 0.0000000
GO:0022900 electron transport chain 106 32 0.0000000
GO:0045333 cellular respiration 158 35 0.0000000
GO:1902600 hydrogen ion transmembrane transport 84 21 0.0000020
GO:0055114 oxidation-reduction process 895 90 0.0000148
GO:0006091 generation of precursor metabolites and energy 395 50 0.0000281
GO:0015980 energy derivation by oxidation of organic compounds 319 43 0.0000325
GO:0042773 ATP synthesis coupled electron transport 54 15 0.0000325
GO:0042775 mitochondrial ATP synthesis coupled electron transport 54 15 0.0000325
GO:0015992 proton transport 112 21 0.0002193
GO:0006818 hydrogen transport 114 21 0.0002873
GO:0006120 mitochondrial electron transport, NADH to ubiquinone 42 12 0.0003775
GO:0006119 oxidative phosphorylation 70 15 0.0011039
GO:0015985 energy coupled proton transport, down electrochemical gradient 19 7 0.0124564
GO:0015986 ATP synthesis coupled proton transport 19 7 0.0124564
GO:0042776 mitochondrial ATP synthesis coupled proton transport 14 6 0.0184776
GO:0021539 subthalamus development 42 10 0.0607247
GO:0048857 neural nucleus development 54 11 0.1448926
GO:0021762 substantia nigra development 40 9 0.2036624
GO:0006754 ATP biosynthetic process 36 8 0.2582218

Under enriched (0)

go <- go[order(go$under_represented_pvalue),]
rowN <- max(minRow, sum(go$q.value2<=0.05))
cat(kable(go[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
category term numInCat numDEInCat q.value2
4932 GO:0022610 biological adhesion 920 32 1
4929 GO:0007155 cell adhesion 916 32 1
4936 GO:0060271 cilium morphogenesis 154 1 1
4935 GO:0070661 leukocyte proliferation 200 2 1
4924 GO:0030031 cell projection assembly 267 5 1
4931 GO:0032943 mononuclear cell proliferation 191 2 1
4927 GO:0046651 lymphocyte proliferation 189 2 1
4915 GO:0046649 lymphocyte activation 480 13 1
4912 GO:0002520 immune system development 648 21 1
4923 GO:0051302 regulation of cell division 209 3 1
4934 GO:0044782 cilium organization 137 1 1
4933 GO:0070663 regulation of leukocyte proliferation 151 1 1
4914 GO:0042110 T cell activation 347 8 1
4930 GO:0032102 negative regulation of response to external stimulus 143 1 1
4920 GO:0050727 regulation of inflammatory response 212 3 1
4928 GO:0032944 regulation of mononuclear cell proliferation 147 1 1
4910 GO:0002684 positive regulation of immune system process 611 19 1
4926 GO:0050670 regulation of lymphocyte proliferation 146 1 1
4909 GO:0045321 leukocyte activation 561 17 1
4916 GO:0050867 positive regulation of cell activation 239 4 1

Positive Effect

Over enriched categories (0)

rowN <- max(minRow, sum(goPos$q.value<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value")],format="html"));
category term numInCat numDEInCat q.value
18 GO:0070647 protein modification by small protein conjugation or removal 731 41 0.5265730
22 GO:0035414 negative regulation of catenin import into nucleus 8 3 0.6617710
24 GO:0006333 chromatin assembly or disassembly 110 10 0.9016002
25 GO:0034728 nucleosome organization 95 9 0.9016002
26 GO:0016567 protein ubiquitination 612 34 1.0000000
27 GO:0006325 chromatin organization 563 32 1.0000000
28 GO:0032446 protein modification by small protein conjugation 654 35 1.0000000
29 GO:0070537 histone H2A K63-linked deubiquitination 3 2 1.0000000
31 GO:0031497 chromatin assembly 87 8 1.0000000
32 GO:0070602 regulation of centromeric sister chromatid cohesion 3 2 1.0000000
34 GO:0016568 chromatin modification 502 29 1.0000000
35 GO:0006334 nucleosome assembly 74 7 1.0000000
36 GO:0051276 chromosome organization 738 38 1.0000000
37 GO:0010558 negative regulation of macromolecule biosynthetic process 959 46 1.0000000
38 GO:0071824 protein-DNA complex subunit organization 128 10 1.0000000
39 GO:0016584 nucleosome positioning 4 2 1.0000000
42 GO:0015846 polyamine transport 4 2 1.0000000
43 GO:0060244 negative regulation of cell proliferation involved in contact inhibition 4 2 1.0000000
45 GO:0006282 regulation of DNA repair 54 6 1.0000000
46 GO:0010499 proteasomal ubiquitin-independent protein catabolic process 4 2 1.0000000

Under enriched (0)

goPos <- goPos[order(goPos$under_represented_pvalue),]
rowN <- max(minRow, sum(goPos$q.value2<=0.05))
cat(kable(goPos[1:rowN,c("category","term","numInCat","numDEInCat","q.value2")],format="html"));
category term numInCat numDEInCat q.value2
3655 GO:0006520 cellular amino acid metabolic process 417 2 1
3654 GO:0030001 metal ion transport 584 7 1
3651 GO:0006812 cation transport 768 11 1
3649 GO:0022610 biological adhesion 920 16 1
3647 GO:0007155 cell adhesion 916 16 1
11737 GO:1901605 alpha-amino acid metabolic process 188 0 1
3652 GO:0090066 regulation of anatomical structure size 311 2 1
3644 GO:0098662 inorganic cation transmembrane transport 397 4 1
3645 GO:0015672 monovalent inorganic cation transport 351 3 1
7773 GO:0042113 B cell activation 169 0 1
3635 GO:0001775 cell activation 767 12 1
3638 GO:0098655 cation transmembrane transport 467 6 1
6075 GO:0019932 second-messenger-mediated signaling 163 0 1
3630 GO:0006082 organic acid metabolic process 945 17 1
3650 GO:0050867 positive regulation of cell activation 239 1 1
3634 GO:0098660 inorganic ion transmembrane transport 456 6 1
3631 GO:0046903 secretion 769 13 1
3641 GO:0019058 viral life cycle 303 2 1
3623 GO:0043436 oxoacid metabolic process 935 17 1
3632 GO:0046649 lymphocyte activation 480 6 1

Negative Effect

Over enriched categories (35)

category term numInCat numDEInCat q.value
1 GO:0022904 respiratory electron transport chain 105 32 0.0000000
2 GO:0022900 electron transport chain 106 32 0.0000000
3 GO:0045333 cellular respiration 158 35 0.0000000
4 GO:0006091 generation of precursor metabolites and energy 395 45 0.0000000
5 GO:0015980 energy derivation by oxidation of organic compounds 319 40 0.0000000
6 GO:0055114 oxidation-reduction process 895 68 0.0000000
7 GO:1902600 hydrogen ion transmembrane transport 84 21 0.0000000
8 GO:0015992 proton transport 112 21 0.0000000
9 GO:0006818 hydrogen transport 114 21 0.0000000
10 GO:0042773 ATP synthesis coupled electron transport 54 15 0.0000000
11 GO:0042775 mitochondrial ATP synthesis coupled electron transport 54 15 0.0000000
12 GO:0006119 oxidative phosphorylation 70 15 0.0000001
13 GO:0006120 mitochondrial electron transport, NADH to ubiquinone 42 12 0.0000002
15 GO:0006812 cation transport 768 46 0.0000268
16 GO:0009123 nucleoside monophosphate metabolic process 487 35 0.0000281
18 GO:0015672 monovalent inorganic cation transport 351 28 0.0000564
19 GO:0015985 energy coupled proton transport, down electrochemical gradient 19 7 0.0000879
20 GO:0015986 ATP synthesis coupled proton transport 19 7 0.0000879
21 GO:0009161 ribonucleoside monophosphate metabolic process 477 33 0.0001433
22 GO:0034220 ion transmembrane transport 657 40 0.0001918
23 GO:0042776 mitochondrial ATP synthesis coupled proton transport 14 6 0.0002190
24 GO:0009167 purine ribonucleoside monophosphate metabolic process 465 32 0.0002201
25 GO:0009126 purine nucleoside monophosphate metabolic process 466 32 0.0002201
26 GO:0046034 ATP metabolic process 441 31 0.0002201
27 GO:0098655 cation transmembrane transport 467 30 0.0020848
28 GO:0098662 inorganic cation transmembrane transport 397 27 0.0020991
29 GO:0098660 inorganic ion transmembrane transport 456 29 0.0030351
30 GO:0006754 ATP biosynthetic process 36 7 0.0084068
32 GO:0009156 ribonucleoside monophosphate biosynthetic process 73 9 0.0234307
33 GO:0009127 purine nucleoside monophosphate biosynthetic process 61 8 0.0362184
34 GO:0009168 purine ribonucleoside monophosphate biosynthetic process 61 8 0.0362184
35 GO:0009206 purine ribonucleoside triphosphate biosynthetic process 46 7 0.0362184
36 GO:0009124 nucleoside monophosphate biosynthetic process 79 9 0.0385134
37 GO:0009145 purine nucleoside triphosphate biosynthetic process 47 7 0.0393327
38 GO:0009142 nucleoside triphosphate biosynthetic process 63 8 0.0395647

Under enriched (0)

category term numInCat numDEInCat q.value2
3563 GO:0006397 mRNA processing 404 2 0.7662154
3562 GO:0016568 chromatin modification 502 4 0.8450203
3556 GO:0051276 chromosome organization 738 8 0.8450203
3559 GO:0006325 chromatin organization 563 5 0.8450203
3552 GO:0043549 regulation of kinase activity 675 7 1.0000000
3545 GO:0045893 positive regulation of transcription, DNA-templated 989 13 1.0000000
3544 GO:0051338 regulation of transferase activity 782 9 1.0000000
3555 GO:0007389 pattern specification process 346 2 1.0000000
3560 GO:0030031 cell projection assembly 267 1 1.0000000
3538 GO:0045859 regulation of protein kinase activity 637 7 1.0000000
3548 GO:0019221 cytokine-mediated signaling pathway 345 2 1.0000000
8974 GO:0048705 skeletal system morphogenesis 184 0 1.0000000
3528 GO:0051247 positive regulation of protein metabolic process 951 13 1.0000000
3543 GO:0008380 RNA splicing 332 2 1.0000000
3523 GO:0006259 DNA metabolic process 838 11 1.0000000
3517 GO:0000902 cell morphogenesis 993 15 1.0000000
3522 GO:0051301 cell division 666 8 1.0000000
10481 GO:0071103 DNA conformation change 173 0 1.0000000
3518 GO:0032101 regulation of response to external stimulus 502 5 1.0000000
3506 GO:0009790 embryo development 829 12 1.0000000

Final Step: csv output

write.csv(go,file=paste("csv/", outFile,"_main.csv",sep=''), row.names=FALSE)
write.csv(goPos,file=paste("csv/", outFile,"Pos.csv",sep=''), row.names=FALSE)
write.csv(goNeg,file=paste("csv/", outFile,"Neg.csv",sep=''), row.names=FALSE)