Respiratory Electron Transport Genes

Compare the effects (betas) from differential expression results for NGT vs T2D and insulin from genes in GO:0022904 (respiratory electron transport chain)

library(knitr)
library(ggplot2)
## Loading required package: methods
library(qvalue)
## 
## Attaching package: 'qvalue'
## 
## The following object is masked from 'package:ggplot2':
## 
##     qplot
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
load("/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/cat2gene.RData")
genes <- cat2gene[["GO:0022904"]]

gene_names <- read.table("/net/snowwhite/home/beckandy/geneConversion.tab", sep="\t", header=T)

insu <- read.table("/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/traits/peer_k03_S_Insu_all_genes.txt", header=T, as.is=T)
t2d <- read.table("/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/ngt_vs_t2d/peer_k03_all_genes.txt", header=T, as.is=T)

insu <- insu[order(insu$p.value),]
t2d <- t2d[order(t2d$p.value),]

insu$q.value <- qvalue(insu$p.value)$qvalues
t2d$q.value <- qvalue(t2d$p.value)$qvalues

insu$rank <- seq(1,length(insu$p.value))
t2d$rank <- seq(1,length(t2d$p.value))
insu$DE <- insu$rank <= 1000
t2d$DE <- t2d$rank <= 1000

insu$gene <- as.character(sapply(insu$gene,function(x)strsplit(x,"[.]")[[1]][1]))
t2d$gene <- as.character(sapply(t2d$gene,function(x)strsplit(x,"[.]")[[1]][1]))

rownames(insu) <- insu$gene
rownames(t2d) <- t2d$gene

keep <- intersect(insu$gene, t2d$gene)
genes <- intersect(genes, keep)

insu <- insu[genes,]
t2d <- t2d[genes,]

#insu <- left_join(insu, gene_names[,-1], by=c("gene" = "gene_id_noversion"))
#t2d <- left_join(t2d, gene_names[,-1], by=c("gene" = "gene_id_noversion"))

Sanity check: do rows (genes) match between the two data frames?

print(sum(rownames(t2d) != rownames(insu)) == 0)
## [1] TRUE

Merge results into one data frame and save

#insu$DE <- as.numeric(insu$DE)
#t2d$DE <- as.numeric(t2d$DE)

betas <- data.frame(cbind.data.frame(insu$effect, t2d$effect, insu$DE, t2d$DE, insu$p.value, t2d$p.value, insu$q.value, t2d$q.value))
rownames(betas) <- genes
names(betas) <- c("insu_beta", "t2d_beta", "insu_de", "t2d_de", "insu_p","t2d_p", "insu_q", "t2d_q")

#0 if no DE, 1 if DE in insulin, 2 if DE in T2D, 3 if DE in both
betas$DE <- betas$insu_de + (2*betas$t2d_de)

#write.csv(betas, file="/net/csgsites/csg/beckandy/reports/resp_elec_effects.csv")

Some simple graphs and stats

kable(summary(betas), format="html")
insu_beta t2d_beta insu_de t2d_de insu_p t2d_p insu_q t2d_q DE
Min. :-0.27979 Min. :-0.7171 Mode :logical Mode :logical Min. :0.000000 Min. :0.0000188 Min. :0.00000 Min. :0.03859 Min. :0.0000
1st Qu.:-0.00907 1st Qu.:-0.5132 FALSE:90 FALSE:73 1st Qu.:0.001188 1st Qu.:0.0076977 1st Qu.:0.00489 1st Qu.:0.15273 1st Qu.:0.0000
Median : 0.06890 Median :-0.3917 TRUE :15 TRUE :32 Median :0.043490 Median :0.0453922 Median :0.06737 Median :0.29344 Median :0.0000
Mean : 0.09011 Mean :-0.3127 NA’s :0 NA’s :0 Mean :0.232388 Mean :0.1655041 Mean :0.15350 Mean :0.32738 Mean :0.7524
3rd Qu.: 0.18361 3rd Qu.:-0.1701 NA NA 3rd Qu.:0.359478 3rd Qu.:0.1939692 3rd Qu.:0.27270 3rd Qu.:0.46518 3rd Qu.:2.0000
Max. : 0.42375 Max. : 0.4342 NA NA Max. :0.979925 Max. :0.9471544 Max. :0.48598 Max. :0.74329 Max. :3.0000
kable(cor(betas[,1:2]), format="html")
insu_beta t2d_beta
insu_beta 1.0000000 0.0287681
t2d_beta 0.0287681 1.0000000

Key

  1. Not DE in either
  2. DE in insulin only
  3. DE in T2D only
  4. DE in both

Number of DE genes

kable(t(as.matrix(table(betas$DE+1))), format="html")
1 2 3 4
59 14 31 1
p1 <- ggplot(betas, aes(x=insu_beta, y=t2d_beta, colour=factor(DE+1)))+geom_point(size=3)+geom_hline(aes(yintercept=0))+geom_vline(aes(xintercept=0))+
    geom_abline(slope=1,intercept=0)
print(p1)

plot of chunk unnamed-chunk-7

Looks like there are two groups in the plot? Lets try kmeans clustering

First, determine how many clusters we should use

wss <- (nrow(betas)-1)*sum(apply(betas,2,var))
for (i in 2:15) wss[i] <- sum(kmeans(betas,centers=i, nstart=1000)$withinss)
plot(1:15, wss, type="b", xlab="Number of Clusters",ylab="Within groups sum of squares")

plot of chunk unnamed-chunk-8

Let's try 4

clus <- kmeans(betas, 4, nstart=2500)
betas <- cbind(betas, clus$cluster)
names(betas)[6] <- "cluster"
p2 <- ggplot(betas, aes(x=insu_beta, y=t2d_beta, colour=factor(cluster)))+geom_point(size=3)+stat_smooth(method="lm")
print(p2)
## geom_smooth: Only one unique x value each group.Maybe you want aes(group = 1)?

plot of chunk unnamed-chunk-9

How many genes in each cluster?

kable(t(as.matrix(table(betas$clus))), format="html")
kable(cor(betas[which(betas$clus==1),1:2]), format="html")
insu_beta t2d_beta
insu_beta NA NA
t2d_beta NA NA
kable(cor(betas[which(betas$clus==2),1:2]), format="html")
insu_beta t2d_beta
insu_beta NA NA
t2d_beta NA NA
kable(cor(betas[which(betas$clus==3),1:2]), format="html")
insu_beta t2d_beta
insu_beta NA NA
t2d_beta NA NA
kable(cor(betas[which(betas$clus==4),1:2]), format="html")
insu_beta t2d_beta
insu_beta NA NA
t2d_beta NA NA

Try cutting on line y=-0.25 + x

p3 <- p1+geom_abline(slope=1,intercept=-0.25)
print(p3)

plot of chunk unnamed-chunk-12

betas$cut <- apply(betas,1,function(x){as.numeric(x[2] < (-0.25+x[1]))})
p4 <- ggplot(betas, aes(x=insu_beta, y=t2d_beta, colour=factor(cut)))+geom_point(size=3)+geom_abline(slope=1, intercept=-0.25)+stat_smooth(method="lm")
print(p4)

plot of chunk unnamed-chunk-13

kable(cor(betas[which(betas$cut==0),1:2]), format="html")
insu_beta t2d_beta
insu_beta 1.0000000 0.5807195
t2d_beta 0.5807195 1.0000000
kable(cor(betas[which(betas$cut==1),1:2]), format="html")
insu_beta t2d_beta
insu_beta 1.0000000 0.4195754
t2d_beta 0.4195754 1.0000000
betas$gene_id_noversion <- rownames(betas)
betas <- left_join(betas,gene_names[-1,])
## Joining by: "gene_id_noversion"
## Warning in left_join_impl(x, y, by$x, by$y): joining factor and character
## vector, coercing into character vector
betas$gene_id_noversion <- NULL
write.csv(betas, file="/net/csgsites/csg/beckandy/reports/resp_elec_effects.csv")