|
上周在微信推文《如何为热图添加指定的基因标记?》为大家介绍了如何为热图指定显示特定的基因标签,如下图,内容比较受欢迎。
刚解决热图指定标签问题,可是又有小伙伴在生信交流QQ群(群号:133146835)提问如何提取热图聚类簇中的基因。废话不多说,下面就直接为大家介绍两种常用的提取热图基因簇的方法。
1.pheatmap包绘制的热图
[AppleScript] 纯文本查看 复制代码 #载入pheatmap包;
library(pheatmap)
#设置工作目录;
setwd("C:/Users/86136/Desktop/获取热图不同簇的基因")
#读入测试数据,这里有两个,先读入包含50个基因的表达量文件用于测试;
df1 <- read.csv("mat_50genes4.csv",header = T,row.names = 1)
#查看数据前6行;
head(df1)
[AppleScript] 纯文本查看 复制代码 #自定义渐变色;
mycol = colorRampPalette(c("purple", "white","orange"))(100)
#创建分组颜色条所需的数据框;
Group = factor(c(rep("TESA",3), rep("TESB",3), rep("TESC",3),rep("TESD",3)))
anndf <- data.frame(Group)
rownames(anndf) <- colnames(df1)
#自定义分组颜色条的颜色;
anncol = list(Group=c(TESA="#c77cff",TESB="#FF9999",TESC="#99CC00",TESD="#FF9900"))
#绘制热图;
#对热图中的文字字号,聚类树高度,渐变颜色条做了自定义;
#同时,根据聚类结果在行和列方向都添加了“gap”;
htmap<- pheatmap(df1, scale = "row",
border="white",
cellwidth = 18,
cellheight = 6,
color = mycol,
cutree_rows=4,
cutree_cols=4,
fontsize = 7,
fontsize_col = 6,
fontsize_row = 6,
treeheight_row=30,
treeheight_col=30,
annotation_col=anndf,
annotation_colors=anncol,
annotation_legend=F,
legend_breaks=c(-2.5,-1.5,0,1.5,2.5),
legend_labels=c("-2.5","-1.5","0","1.5","2.5"),
angle_col = 90)
[AppleScript] 纯文本查看 复制代码 #按照热图的聚类顺序获取全部基因列表;
order <- htmap$tree_row$order
genelist <- row.names(df1)[order]
#查看前8个;
genelist[1:8]
[AppleScript] 纯文本查看 复制代码 #提取热图的行方向(基因)的聚类树;
cluster <- htmap$tree_row
#绘制聚类树;
plot(cluster,hang = -1,cex=0.6,axes=FALSE,ann=FALSE)
[AppleScript] 纯文本查看 复制代码 #对聚类树进行分簇;
cut <- cutree(cluster,4)
cut
#然后,获取第4簇的genes,注意第4簇位于热图的上方;
(genes<- names(cut)[cut==4])
#提取相应的表达量数据;
(genesdf <- df1[genes,])
[AppleScript] 纯文本查看 复制代码 #导出到本地;
write.csv(genesdf,file = "genesdf_4.csv")
2.使用ComplexHeatmap包绘制的热图
[AppleScript] 纯文本查看 复制代码 #载入ComplexHeatmap和circlize包;
library(ComplexHeatmap)
library(circlize)
#读入测试数据,这里有两个,先读入包含50个基因的表达量文件用于测试;
df1 <- read.csv("mat_50genes4.csv",header = T,row.names = 1)
#对数据进行归一化;
#由于scale函数默认对列进行归一化,因此这里做了两次转置;
df_scaled <- t(scale(t(df1)))
#查看归一化后的数据前6行;
head(df_scaled)
[AppleScript] 纯文本查看 复制代码 #计算距离矩阵,dist默认对矩阵的行进行计算;
d <- dist(df_scaled,method = "euclidean")
#然后进行分层聚类(Hierarchical cluster);
cluster2 <- hclust(d, method = "complete")
#绘制聚类树;
plot(cluster2,hang = -1,cex=0.6,axes=FALSE,ann=FALSE)
[AppleScript] 纯文本查看 复制代码 #同样的方法,对聚类树进行分簇;
cut <- cutree(cluster2,4)
cut
#然后,获取第4簇的genes,注意第4簇位于热图的上方;
(genes<- names(cut)[cut==4])
#提取相应的表达量数据;
(genesdf <- df1[genes,])
[AppleScript] 纯文本查看 复制代码 #导出到本地;
write.csv(genesdf,file = "genesdf_4.csv")
#接下来获得数据的范围;
range(df_scaled)
#[1] -1.532186 2.878927
#然后,根据数据范围建立自定义颜色映射关系;
#green-red;
col_fun = colorRamp2(c(-1.6,0.6,2.8), c("greenyellow","white", "red"))
#准备分组颜色条数据;
class = anno_block(gp = gpar(fill = c("#c77cff","#FF9999","#99CC00","#FF9900"),
col="white"),height = unit(5, "mm"),
labels = c("TESA", "TESB", "TESD","TESC"),
labels_gp = gpar(col = "white", fontsize = 8,fontface="bold"))
group= HeatmapAnnotation(group=class)
#绘制聚类热图,注意,Heatmap函数其实可以直接指定现有的聚类树的;
Heatmap(df_scaled,
rect_gp = gpar(col="white"),
row_names_gp = gpar(fontsize = 6),
column_names_gp = gpar(fontsize = 8),
col = col_fun,
column_split = 4,
row_split = 4,
column_title = NULL,
row_title = NULL,
cluster_rows = TRUE,
show_row_dend = TRUE,
top_annotation =group,
name = "Exp")
[AppleScript] 纯文本查看 复制代码 #当基因数较多时可以隐藏行方向的聚类树;
Heatmap(df_scaled,
rect_gp = gpar(col="white"),
row_names_gp = gpar(fontsize = 6),
column_names_gp = gpar(fontsize = 8),
col = col_fun,
column_split = 4,
row_split = 4,
column_title = NULL,
row_title = NULL,
cluster_rows = TRUE,
show_row_dend = FALSE,
top_annotation =group,
name = "Exp")
可见, ComplexHeatmap包和pheatmap包完全一样,默认都是使用hclust()函数进行聚类。因此,二者提取基因簇的方法也基本相同。好啦,今天的内容就分享到这里啦~
本文作者:基迪奥-莫北
|
本帖子中包含更多资源
您需要 登录 才可以下载或查看,没有帐号?立即注册
x
|