Respiratory Electron Transport Genes

Compare the effects (betas) from differential expression results for NGT vs T2D and insulin from genes in GO:0022904 (respiratory electron transport chain)

library(knitr)
library(ggplot2)

## Loading required package: methods

library(qvalue)

## 
## Attaching package: 'qvalue'
## 
## The following object is masked from 'package:ggplot2':
## 
##     qplot

library(dplyr)

## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

load("/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/cat2gene.RData")
genes <- cat2gene[["GO:0022904"]]

gene_names <- read.table("/net/snowwhite/home/beckandy/geneConversion.tab", sep="\t", header=T)

insu <- read.table("/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/traits/peer_k03_S_Insu_all_genes.txt", header=T, as.is=T)
t2d <- read.table("/net/snowwhite/home/beckandy/tissue/datafreeze4/goseq/ngt_vs_t2d/peer_k03_all_genes.txt", header=T, as.is=T)

insu <- insu[order(insu$p.value),]
t2d <- t2d[order(t2d$p.value),]

insu$q.value <- qvalue(insu$p.value)$qvalues
t2d$q.value <- qvalue(t2d$p.value)$qvalues

insu$rank <- seq(1,length(insu$p.value))
t2d$rank <- seq(1,length(t2d$p.value))
insu$DE <- insu$rank <= 1000
t2d$DE <- t2d$rank <= 1000

insu$gene <- as.character(sapply(insu$gene,function(x)strsplit(x,"[.]")[[1]][1]))
t2d$gene <- as.character(sapply(t2d$gene,function(x)strsplit(x,"[.]")[[1]][1]))

rownames(insu) <- insu$gene
rownames(t2d) <- t2d$gene

keep <- intersect(insu$gene, t2d$gene)
genes <- intersect(genes, keep)

insu <- insu[genes,]
t2d <- t2d[genes,]

#insu <- left_join(insu, gene_names[,-1], by=c("gene" = "gene_id_noversion"))
#t2d <- left_join(t2d, gene_names[,-1], by=c("gene" = "gene_id_noversion"))

Sanity check: do rows (genes) match between the two data frames?

print(sum(rownames(t2d) != rownames(insu)) == 0)

## [1] TRUE

Merge results into one data frame and save

#insu$DE <- as.numeric(insu$DE)
#t2d$DE <- as.numeric(t2d$DE)

betas <- data.frame(cbind.data.frame(insu$effect, t2d$effect, insu$DE, t2d$DE, insu$p.value, t2d$p.value, insu$q.value, t2d$q.value))
rownames(betas) <- genes
names(betas) <- c("insu_beta", "t2d_beta", "insu_de", "t2d_de", "insu_p","t2d_p", "insu_q", "t2d_q")

#0 if no DE, 1 if DE in insulin, 2 if DE in T2D, 3 if DE in both
betas$DE <- betas$insu_de + (2*betas$t2d_de)

#write.csv(betas, file="/net/csgsites/csg/beckandy/reports/resp_elec_effects.csv")

Some simple graphs and stats

kable(summary(betas), format="html")

insu_beta	t2d_beta	insu_de	t2d_de	insu_p	t2d_p	insu_q	t2d_q	DE
Min. :-0.27979	Min. :-0.7171	Mode :logical	Mode :logical	Min. :0.000000	Min. :0.0000188	Min. :0.00000	Min. :0.03859	Min. :0.0000
1st Qu.:-0.00907	1st Qu.:-0.5132	FALSE:90	FALSE:73	1st Qu.:0.001188	1st Qu.:0.0076977	1st Qu.:0.00489	1st Qu.:0.15273	1st Qu.:0.0000
Median : 0.06890	Median :-0.3917	TRUE :15	TRUE :32	Median :0.043490	Median :0.0453922	Median :0.06737	Median :0.29344	Median :0.0000
Mean : 0.09011	Mean :-0.3127	NA’s :0	NA’s :0	Mean :0.232388	Mean :0.1655041	Mean :0.15350	Mean :0.32738	Mean :0.7524
3rd Qu.: 0.18361	3rd Qu.:-0.1701	NA	NA	3rd Qu.:0.359478	3rd Qu.:0.1939692	3rd Qu.:0.27270	3rd Qu.:0.46518	3rd Qu.:2.0000
Max. : 0.42375	Max. : 0.4342	NA	NA	Max. :0.979925	Max. :0.9471544	Max. :0.48598	Max. :0.74329	Max. :3.0000

kable(cor(betas[,1:2]), format="html")

	insu_beta	t2d_beta
insu_beta	1.0000000	0.0287681
t2d_beta	0.0287681	1.0000000

Key

Not DE in either
DE in insulin only
DE in T2D only
DE in both

Number of DE genes

kable(t(as.matrix(table(betas$DE+1))), format="html")

1	2	3	4
59	14	31	1

p1 <- ggplot(betas, aes(x=insu_beta, y=t2d_beta, colour=factor(DE+1)))+geom_point(size=3)+geom_hline(aes(yintercept=0))+geom_vline(aes(xintercept=0))+
    geom_abline(slope=1,intercept=0)
print(p1)

plot of chunk unnamed-chunk-7

Looks like there are two groups in the plot? Lets try kmeans clustering

First, determine how many clusters we should use

wss <- (nrow(betas)-1)*sum(apply(betas,2,var))
for (i in 2:15) wss[i] <- sum(kmeans(betas,centers=i, nstart=1000)$withinss)
plot(1:15, wss, type="b", xlab="Number of Clusters",ylab="Within groups sum of squares")

plot of chunk unnamed-chunk-8

Let's try 4

clus <- kmeans(betas, 4, nstart=2500)
betas <- cbind(betas, clus$cluster)
names(betas)[6] <- "cluster"
p2 <- ggplot(betas, aes(x=insu_beta, y=t2d_beta, colour=factor(cluster)))+geom_point(size=3)+stat_smooth(method="lm")
print(p2)

## geom_smooth: Only one unique x value each group.Maybe you want aes(group = 1)?

plot of chunk unnamed-chunk-9

How many genes in each cluster?

kable(t(as.matrix(table(betas$clus))), format="html")

kable(cor(betas[which(betas$clus==1),1:2]), format="html")

	insu_beta	t2d_beta
insu_beta	NA	NA
t2d_beta	NA	NA

kable(cor(betas[which(betas$clus==2),1:2]), format="html")

	insu_beta	t2d_beta
insu_beta	NA	NA
t2d_beta	NA	NA

kable(cor(betas[which(betas$clus==3),1:2]), format="html")

	insu_beta	t2d_beta
insu_beta	NA	NA
t2d_beta	NA	NA

kable(cor(betas[which(betas$clus==4),1:2]), format="html")

	insu_beta	t2d_beta
insu_beta	NA	NA
t2d_beta	NA	NA

Try cutting on line y=-0.25 + x

p3 <- p1+geom_abline(slope=1,intercept=-0.25)
print(p3)

plot of chunk unnamed-chunk-12

betas$cut <- apply(betas,1,function(x){as.numeric(x[2] < (-0.25+x[1]))})
p4 <- ggplot(betas, aes(x=insu_beta, y=t2d_beta, colour=factor(cut)))+geom_point(size=3)+geom_abline(slope=1, intercept=-0.25)+stat_smooth(method="lm")
print(p4)

plot of chunk unnamed-chunk-13

kable(cor(betas[which(betas$cut==0),1:2]), format="html")

	insu_beta	t2d_beta
insu_beta	1.0000000	0.5807195
t2d_beta	0.5807195	1.0000000

kable(cor(betas[which(betas$cut==1),1:2]), format="html")

	insu_beta	t2d_beta
insu_beta	1.0000000	0.4195754
t2d_beta	0.4195754	1.0000000

betas$gene_id_noversion <- rownames(betas)
betas <- left_join(betas,gene_names[-1,])

## Joining by: "gene_id_noversion"

## Warning in left_join_impl(x, y, by$x, by$y): joining factor and character
## vector, coercing into character vector

betas$gene_id_noversion <- NULL
write.csv(betas, file="/net/csgsites/csg/beckandy/reports/resp_elec_effects.csv")