在上一節(jié),由于大部分細(xì)胞(868個(gè))都被歸為上皮細(xì)胞群中(Fig2 c),這868個(gè)細(xì)胞可被分成5個(gè)cluster,接著對(duì)這5個(gè)cluster細(xì)胞進(jìn)行探索。我們使用一組來自對(duì)乳腺腫塊的非監(jiān)督分析的基因表達(dá)特征對(duì)5個(gè)cluster進(jìn)行了研究。這些基因表達(dá)特征通過比較三陰性乳腺癌(TNBC)的四個(gè)亞型(ERBB2 amplicon,Luminal Subtype 、Basal epithelial-cell enriched 和Luminal epithelial gene cluster containing ER)而建立。先看看這5個(gè)clusters的basal細(xì)胞來源的細(xì)胞群有多少。大多數(shù)TNBC是基底樣腫瘤,它們與多種TNBC型亞型重疊,與非固有基底TNBCs相比,與克隆異質(zhì)性增加有關(guān)。(備注:這篇文獻(xiàn)用到了很多apply循環(huán),大家仔細(xì)琢磨,大概意思能看懂就行,然后可以把它應(yīng)用到自己的數(shù)據(jù)中)
## 讀取數(shù)據(jù) basal_PNAS_all <- read.table("data/genes_for_basal_vs_non_basal_tnbc_PNAS.txt" , header = TRUE, sep = "\t" )#提取Basal.epithelial.cell.enriched.cluster的基因 basal_PNAS_long <- basal_PNAS_all$Basal .epithelial.cell.enriched.cluster#合并剩下17個(gè)基因 basal_PNAS <- intersect(basal_PNAS_long, rownames(mat_ct)) > basal_PNAS [1] "SOX9" "GALNT3" "CDH3" "LAMC2" "CX3CL1" "TRIM29" "KRT17" "KRT5" "CHI3L2" [10] "SLPI" "NFIB" "MRAS" "TGFB2" "CAPN6" "DMD" "FABP7" "CXCL1" #算出17個(gè)basal_PNAS基因在1112個(gè)細(xì)胞的表達(dá)平均值 basal_PNAS_avg_exprs <- apply(mat_ct[match(basal_PNAS, rownames(mat_ct)),], 2, mean)#檢查一下數(shù)據(jù) all.equal(names(basal_PNAS_avg_exprs), colnames(mat_ct))#提取868個(gè)上皮細(xì)胞群體的17個(gè)basal_PNAS基因表達(dá)平均值 basal_PNAS_avg_exprs <- basal_PNAS_avg_exprs[which (pd_ct$cell_types_cl_all == "epithelial" )]#檢查一下數(shù)據(jù) all.equal(colnames(HSMM_allepith_clustering), names(basal_PNAS_avg_exprs))#把17個(gè)basal_PNAS基因表達(dá)平均值賦給HSMM_allepith_clustering,以便于后續(xù)分析 pData(HSMM_allepith_clustering)$basal_PNAS_avg_exprs <- basal_PNAS_avg_exprs#畫figS9b plot_cell_clusters(HSMM_allepith_clustering, 1, 2, color = "basal_PNAS_avg_exprs" , cell_size = 2) + facet_wrap(~patient) + scale_color_continuous(low = "yellow" , high = "blue" )
#畫figS9a plot_cell_clusters(HSMM_allepith_clustering, 1, 2, color = "basal_PNAS_avg_exprs" , cell_size = 2) + facet_wrap(~Cluster) + scale_color_continuous(low = "yellow" , high = "blue" )
figS9a:大多數(shù)TNBC樣本都有basal gene signature的表達(dá)。figS9b:在868個(gè)上皮細(xì)胞群中,cluster2的basal gene signature表達(dá)量最豐富。
接著,使用另外一個(gè)基因表達(dá)特征數(shù)據(jù)集TNBCtype4 signatures(Lehman_signature),這個(gè)signatures根據(jù)基因表達(dá)變化將TNBC細(xì)胞分為6個(gè)類:basal_like_1、basal_like_2、immunomodulatory、mesenchymal、mesenchymal_stem_like和luminal_ar。作者將基因表達(dá)特征中上調(diào)基因的平均表達(dá)值減去下調(diào)基因的平均表達(dá)值,將差值作為每個(gè)細(xì)胞在TNBCtype4 signatures (basal_like_1、basal_like_2、mesenchymal和luminal_ar)中的每個(gè)基因表達(dá)值,挑選最高基因表達(dá)值對(duì)應(yīng)的signature,將其分配給對(duì)應(yīng)的細(xì)胞。
#讀取數(shù)據(jù) lehman_long <- read.table("data/Lehman_signature.txt" , sep = "\t" , header = TRUE, stringsAsFactors = FALSE)#這個(gè)for循環(huán)提取了lehman_long里面的四列g(shù)ene、regulation、no_samples和signature,建立一個(gè)data.rrame for (i in 0:5) { gene <- "gene" regulation <- "regulation" no_samples <- "no_samples" signature <- "signature" if (i == 0) { lehman <- lehman_long[, 1:4] lehman <- lehman[-which (lehman$signature == "" ),] } if (i > 0) { gene <- paste("gene" , i, sep = "." ) regulation <- paste("regulation" , i, sep = "." ) no_samples <- paste("no_samples" , i, sep = "." ) signature <- paste("signature" , i, sep = "." ) mat_to_bind <- lehman_long[, c(gene, regulation, no_samples, signature)] colnames(mat_to_bind) <- c("gene" , "regulation" , "no_samples" , "signature" ) if (length(which (is.na(mat_to_bind$no_samples ))) > 0 ) mat_to_bind <- mat_to_bind[-which (mat_to_bind$signature == "" ),] lehman <- rbind(lehman, mat_to_bind) } }#刪掉一些mat_ct沒有檢測(cè)到的基因 lehman <- lehman[which (!is.na(match(lehman$gene , rownames(mat_ct)))),] lehman_signatures <- unique(lehman$signature ) lehman_avg_exps <- apply(mat_ct, 2, function (x){ mns <- matrix(NA, nrow = length(lehman_signatures), ncol = 2) rownames(mns) <- lehman_signatures for (s in 1:length(lehman_signatures)) { sign <- lehman_signatures[s] # current signature lehman_here <- lehman %>% dplyr::filter(signature == sign) lehman_here_up <- lehman_here %>% dplyr::filter(regulation == "UP" ) lehman_here_down <- lehman_here %>% dplyr::filter(regulation == "DOWN" ) idx_genes_up <- match(lehman_here_up$gene , rownames(mat_ct)) idx_genes_down <- match(lehman_here_down$gene , rownames(mat_ct)) mns[s,] <- c(mean(x[idx_genes_up]), mean(x[idx_genes_down])) #算上調(diào)、下調(diào)的基因在樣本中的平均表達(dá)值 } return (mns) })#檢查數(shù)據(jù) all.equal(colnames(lehman_avg_exps), rownames(pd_ct))#只看868個(gè)上皮細(xì)胞的情況 lehman_avg_exprs_epithelial <- lehman_avg_exps[,which (pd_ct$cell_types_cl_all == "epithelial" )]#提取lehman_avg_exps前面6行,對(duì)應(yīng)的是up lehman_avg_ups <- lehman_avg_exps[c(1:6), ] rownames(lehman_avg_ups) <- lehman_signatures all.equal(colnames(lehman_avg_ups), rownames(pd_ct)) lehman_avg_ups_epithelial <- lehman_avg_ups[,which (pd_ct$cell_types_cl_all == "epithelial" )]#提取lehman_avg_exps后面6行,對(duì)應(yīng)的是down lehman_avg_downs <- lehman_avg_exps[c(7:12),] rownames(lehman_avg_downs) <- lehman_signatures all.equal(colnames(lehman_avg_downs), rownames(pd_ct)) lehman_avg_downs_epithelial <- lehman_avg_downs[,which (pd_ct$cell_types_cl_all == "epithelial" )]#上調(diào)基因的平均表達(dá)值減去下調(diào)基因的平均表達(dá)值 lehman_avg_both <- lehman_avg_ups - lehman_avg_downs all.equal(colnames(lehman_avg_both), rownames(pd_ct))#挑選最高基因表達(dá)值對(duì)應(yīng)的signature,將其分配給對(duì)應(yīng)的細(xì)胞。 assignments_lehman_both <- apply(lehman_avg_both, 2, function (x){rownames(lehman_avg_both)[which.max(x)]}) assignments_lehman_both_epithelial <- assignments_lehman_both[which (pd_ct$cell_types_cl_all == "epithelial" )]#刪除immunomodulatory和mesenchymal_stem_like signature lehman_avg_both_epithelial_new <- lehman_avg_both_epithelial[-which (rownames(lehman_avg_both_epithelial) %in % c("immunomodulatory" , "mesenchymal_stem_like" )),] assignments_lehman_both_epithelial_new <- apply(lehman_avg_both_epithelial_new, 2, function (x){rownames(lehman_avg_both_epithelial_new)[which.max(x)]})
接下來畫圖,同樣地,需要對(duì)heatmap函數(shù)代碼進(jìn)行修改。
ha_lehman_epith_pat <- list()for (i in 1:length(patients_now)) { if (i == 1) ha_lehman_epith_pat[[i]] <- HeatmapAnnotation(df=data.frame(cluster_all = clusterings_sep_allepith[[i]]), col = list(cluster_all = c("1" = "#ee204d" , "2" = "#17806d" , "3" = "#b2ec5d" , "4" = "#cda4de" , "5" = "#1974d2" )), annotation_name_side = "left" , annotation_name_gp = gpar(fontsize = 12), annotation_legend_param = list(list(title_position = "topcenter" , title = "cluster" )), show_annotation_name = FALSE, gap = unit(c(2), "mm" ), show_legend = FALSE) if (i > 1 && i != 5 ) ha_lehman_epith_pat[[i]] <- HeatmapAnnotation(df=data.frame(cluster_all = clusterings_sep_allepith[[i]]), col = list(cluster_all = c("1" = "#ee204d" , "2" = "#17806d" , "3" = "#b2ec5d" , "4" = "#cda4de" , "5" = "#1974d2" )), annotation_name_side = "left" , annotation_name_gp = gpar(fontsize = 12), annotation_legend_param = list(list(title_position = "topcenter" , title = "cluster" )), show_annotation_name = FALSE, gap = unit(c(2), "mm" ), show_legend = FALSE) if (i == 5) ha_lehman_epith_pat[[i]] <- HeatmapAnnotation(df=data.frame(cluster_all = clusterings_sep_allepith[[i]]), col = list(cluster_all = c("1" = "#ee204d" , "2" = "#17806d" , "3" = "#b2ec5d" , "4" = "#cda4de" , "5" = "#1974d2" )), annotation_name_side = "right" , annotation_name_gp = gpar(fontsize = 12), annotation_legend_param = list(list(title_position = "topcenter" ,title = "cluster" )), show_annotation_name = FALSE, gap = unit(c(2), "mm" ), show_legend = TRUE) }#檢查數(shù)據(jù) all.equal(names(lehmans_epith_pat_both), patients_now)#將basal signature添加進(jìn)去,以便后續(xù)作圖 lehmans_epith_pat_both_wbasal_new <- lehmans_epith_pat_both_newfor (i in 1:length(patients_now)) { lehmans_epith_pat_both_wbasal_new[[i]] <- rbind(lehmans_epith_pat_both_new[[i]], pData(HSMM_allepith_clustering)$basal_PNAS_avg_exprs [which (HSMM_allepith_clustering$patient == patients_now[i])]) rownames(lehmans_epith_pat_both_wbasal_new[[i]])[5] <- "intrinsic_basal" }# 畫圖 ht_sep_lehmans_both_wbasal_new <- Heatmap(lehmans_epith_pat_both_wbasal_new[[1]], col = colorRamp2(c(-0.7, 0, 1), c("blue" ,"white" , "red" )), cluster_rows = FALSE, show_column_names = FALSE, column_title = patients_now[1], column_title_gp = gpar(fontsize = 12), top_annotation = ha_lehman_epith_pat[[1]], name = patients_now[1], show_row_names = FALSE, heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) + Heatmap(lehmans_epith_pat_both_wbasal_new[[2]], col = colorRamp2(c(-0.7, 0, 1), c("blue" ,"white" , "red" )), cluster_rows = FALSE, show_column_names = FALSE, column_title = patients_now[2], column_title_gp = gpar(fontsize = 12), top_annotation = ha_lehman_epith_pat[[2]], name = patients_now[2], show_row_names = FALSE, heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) + Heatmap(lehmans_epith_pat_both_wbasal_new[[3]], col = colorRamp2(c(-0.7, 0, 1), c("blue" ,"white" , "red" )), cluster_rows = FALSE, show_column_names = FALSE, column_title = patients_now[3], column_title_gp = gpar(fontsize = 12), top_annotation = ha_lehman_epith_pat[[3]], name = patients_now[3], show_row_names = FALSE, heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) + Heatmap(lehmans_epith_pat_both_wbasal_new[[4]], col = colorRamp2(c(-0.7, 0, 1), c("blue" ,"white" , "red" )), cluster_rows = FALSE, show_column_names = FALSE, column_title = patients_now[4], column_title_gp = gpar(fontsize = 12), top_annotation = ha_lehman_epith_pat[[4]], name = patients_now[4], show_row_names = FALSE, heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) + Heatmap(lehmans_epith_pat_both_wbasal_new[[5]], col = colorRamp2(c(-0.7, 0, 1), c("blue" ,"white" , "red" )), cluster_rows = FALSE, show_column_names = FALSE, column_title = patients_now[5], column_title_gp = gpar(fontsize = 12), top_annotation = ha_lehman_epith_pat[[5]], name = patients_now[5], show_row_names = FALSE, heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) + Heatmap(lehmans_epith_pat_both_wbasal_new[[6]], col = colorRamp2(c(-0.7, 0, 1), c("blue" ,"white" , "red" )), cluster_rows = FALSE, row_names_side = "right" , column_title = patients_now[6], column_title_gp = gpar(fontsize = 12), top_annotation = ha_lehman_epith_pat[[6]], name = patients_now[6], show_column_names = FALSE, heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9)))#畫fig3d print (draw(ht_sep_lehmans_both_wbasal_new, annotation_legend_side = "right" ))
我們只需要把右邊注釋條PS一下,就可以達(dá)到跟文獻(xiàn)的圖片一模一樣了。
#檢查數(shù)據(jù) all.equal(colnames(HSMM_allepith_clustering), names(assignments_lehman_both_epithelial_new)) pData(HSMM_allepith_clustering)$assignments_lehman_both_new <- assignments_lehman_both_epithelial_new 畫fig3g plot_cell_clusters(HSMM_allepith_clustering, 1, 2, color = "assignments_lehman_both_new" , cell_size = 2) + facet_wrap(~patient)
#畫figS8 plot_cell_clusters(HSMM_allepith_clustering, 1, 2, color = "assignments_lehman_both_new" , cell_size = 2) + facet_wrap(~Cluster)
cluster4也富集了 Basal-Like 1 signature,而cluster3高度富集了TNBCtype 中的“Luminal Androgen Receptor” signature。為了更清楚的看到上皮細(xì)胞群的5個(gè)cluster對(duì)應(yīng)的TNBCtype signatures的平均表達(dá)量,接著繼續(xù)探索下去...
clust_avg_lehman_both_new <- matrix(NA, nrow = length(unique(HSMM_allepith_clustering$Cluster )), ncol = nrow(lehman_avg_both_epithelial_new))#列名:cluster1,cluster2,cluster3,cluster4,cluster5 rownames(clust_avg_lehman_both_new) <- paste("clust" , c(1:length(unique(HSMM_allepith_clustering$Cluster ))), sep = "" )#行名:basal_like_1、basal_like_2、mesenchymal、luminal_ar" colnames(clust_avg_lehman_both_new) <- rownames(lehman_avg_both_epithelial_new)#算出每個(gè)cluster的signatures 平均值 for (c in 1:length(unique(HSMM_allepith_clustering$Cluster ))) { clust_avg_lehman_both_new[c,] <- apply(lehman_avg_both_epithelial_new[,which (HSMM_allepith_clustering$Cluster == c)], 1, mean) } clust_avg_lehman_both_new <- as.data.frame(clust_avg_lehman_both_new)#增加一列cluster clust_avg_lehman_both_new$Cluster <- rownames(clust_avg_lehman_both_new)#拆分?jǐn)?shù)據(jù) clust_avg_lehman_melt_new <- melt(clust_avg_lehman_both_new, "Cluster" )#畫fig3e ggplot(clust_avg_lehman_melt_new, aes(Cluster, value, fill = factor(variable), color = factor(variable), shape = factor(variable))) + geom_point(size = 3, stroke = 1) + scale_shape_discrete(solid = T) + #guides(colour = guide_legend(override.aes = list(size=3))) + ylab("average expression of signature in cluster" ) + xlab("cluster" ) + ylim(c(-0.35, 0.5))
可以看到:cluster2和4強(qiáng)烈地表達(dá)Basal-Like 1 signature,而cluster3顯著表達(dá)Basal-Like 2 signature和 luminal_AR signature
接著,讀取另外一個(gè)ML_signature(mature luminal signature),將上調(diào)基因的平均表達(dá)量中減去下調(diào)基因的平均表達(dá)量,計(jì)算出three normal breast signatures下每個(gè)細(xì)胞的表達(dá)量(Lim et al., 2009a),然后將每個(gè)細(xì)胞分配給其具有最高表達(dá)量的signatures。這三個(gè)normal breast signatures 是:mature luminal (ML),basal和luminal progenitor (LP),在每個(gè)signatures中,都有對(duì)應(yīng)的上調(diào)基因和下調(diào)基因。
ml_signature_long <- read.table("data/ML_signature.txt" , sep = "\t" , header = TRUE)if (length(which (ml_signature_long$Symbol == "" )) > 0)#將空白行去掉 ml_signature_long <- ml_signature_long[-which (ml_signature_long$Symbol == "" ),] #按照基因字母進(jìn)行排序,如果基因字母有一樣的,則按照Average.log.fold.change絕對(duì)值的負(fù)數(shù)進(jìn)行從小到大排序 ml_signature_long <- ml_signature_long[order(ml_signature_long$Symbol , -abs(ml_signature_long$Average .log.fold.change) ), ]#對(duì)基因取唯一值,去重復(fù) ml_signature_long <- ml_signature_long[ !duplicated(ml_signature_long$Symbol ), ]#總共有818個(gè)基因 ml_signature <- ml_signature_long[which (!is.na(match(ml_signature_long$Symbol , rownames(mat_ct)))), ]#上調(diào)基因有384個(gè) ml_up <- ml_signature[which (ml_signature$Average .log.fold.change > 0), ]#下調(diào)基因有197個(gè) ml_down <- ml_signature[which (ml_signature$Average .log.fold.change < 0), ]#匹配一下 idx_ml_up <- match(ml_up$Symbol , rownames(mat_ct)) idx_ml_down <- match(ml_down$Symbol , rownames(mat_ct))#讀取basal signature,處理過程跟上面的一樣的。 basal_signature_long <- read.table("data/basal_signature.txt" , sep = "\t" , header = TRUE)if (length(which (basal_signature_long$Symbol == "" )) > 0) basal_signature_long <- basal_signature_long[-which (basal_signature_long$Symbol == "" ),] basal_signature_long <- basal_signature_long[order(basal_signature_long$Symbol , -abs(basal_signature_long$Average .log.fold.change) ), ] basal_signature_long <- basal_signature_long[ !duplicated(basal_signature_long$Symbol ), ]#總共有1335個(gè)基因 basal_signature <- basal_signature_long[which (!is.na(match(basal_signature_long$Symbol , rownames(mat_ct)))), ]#上調(diào)基因有588個(gè) basal_up <- basal_signature[which (basal_signature$Average .log.fold.change > 0), ]#下調(diào)基因有757個(gè) basal_down <- basal_signature[which (basal_signature$Average .log.fold.change < 0), ] idx_basal_up <- match(basal_up$Symbol , rownames(mat_ct)) idx_basal_down <- match(basal_down$Symbol , rownames(mat_ct))#讀取LP signature,還是同樣的操作 lp_signature_long <- read.table("data/Lp_signature.txt" , sep = "\t" , header = TRUE)if (length(which (lp_signature_long$Symbol == "" )) > 0) lp_signature_long <- lp_signature_long[-which (lp_signature_long$Symbol == "" ),] lp_signature_long <- lp_signature_long[order(lp_signature_long$Symbol , -abs(lp_signature_long$Average .log.fold.change) ), ] lp_signature_long <- lp_signature_long[ !duplicated(lp_signature_long$Symbol ), ] lp_signature <- lp_signature_long[which (!is.na(match(lp_signature_long$Symbol , rownames(mat_ct)))), ] lp_up <- lp_signature[which (lp_signature$Average .log.fold.change > 0), ] lp_down <- lp_signature[which (lp_signature$Average .log.fold.change < 0), ] idx_lp_up <- match(lp_up$Symbol , rownames(mat_ct)) idx_lp_down <- match(lp_down$Symbol , rownames(mat_ct))#對(duì)ML、basal和LP 3個(gè)signatures基因,將上調(diào)基因的表達(dá)值減去下調(diào)基因表達(dá)值,并分別返回結(jié)果。 normsig_avg_exprs <- apply(mat_ct, 2, function (x){ avg_ml_up <- mean(x[idx_ml_up]) avg_ml_down <- mean(x[idx_ml_down]) avg_ml_both <- avg_ml_up - avg_ml_down avg_basal_up <- mean(x[idx_basal_up]) avg_basal_down <- mean(x[idx_basal_down]) avg_basal_both <- avg_basal_up - avg_basal_down avg_lp_up <- mean(x[idx_lp_up]) avg_lp_down <- mean(x[idx_lp_down]) avg_lp_both <- avg_lp_up - avg_lp_down return (c(avg_ml_up, avg_basal_up, avg_lp_up, avg_ml_both, avg_basal_both, avg_lp_both)) }) rownames(normsig_avg_exprs) <- c("avg_ml_up" , "avg_basal_up" , "avg_lp_up" , "avg_ml_both" , "avg_basal_both" , "avg_lp_both" )#檢查數(shù)據(jù) all.equal(colnames(normsig_avg_exprs), rownames(pd_ct))#只看上皮細(xì)胞群 normsig_avg_exprs_epithelial <- normsig_avg_exprs[,which (pd_ct$cell_types_cl_all == "epithelial" )] normsig_avg_ups <- normsig_avg_exprs[c(1:3), ] all.equal(colnames(normsig_avg_ups), rownames(pd_ct)) normsig_avg_ups_epithelial <- normsig_avg_ups[,which (pd_ct$cell_types_cl_all == "epithelial" )] normsig_avg_both <- normsig_avg_exprs[c(4:6),] all.equal(colnames(normsig_avg_both), rownames(pd_ct)) normsig_avg_both_epithelial <- normsig_avg_both[,which (pd_ct$cell_types_cl_all == "epithelial" )]#挑選最大值=上調(diào)基因的平均表達(dá)值最大數(shù)值分配給對(duì)應(yīng)的細(xì)胞類型 assignments_normsig_ups <- apply(normsig_avg_ups, 2, function (x){rownames(normsig_avg_ups)[which.max(x)]}) assignments_normsig_ups_epithelial <- assignments_normsig_ups[which (pd_ct$cell_types_cl_all == "epithelial" )]#上調(diào)基因的平均表達(dá)值-下調(diào)基因的平均表達(dá)值的最大數(shù)值分配給對(duì)應(yīng)的細(xì)胞類型 assignments_normsig_both <- apply(normsig_avg_both, 2, function (x){rownames(normsig_avg_both)[which.max(x)]}) assignments_normsig_both_epithelial <- assignments_normsig_both[which (pd_ct$cell_types_cl_all == "epithelial" )]# heatmaps on normal signatures per patient pd_ct_epith <- pd_ct[which (pd_ct$cell_types_cl_all == "epithelial" ),] normsig_epith_pat_both <- list() normsig_epith_pat_ups <- list() pds_epith_ct <- list()for (i in 1:length(patients_now)) { normsig_epith_pat_both[[i]] <- normsig_avg_both_epithelial[,which (pd_ct_epith$patient == patients_now[i])] normsig_epith_pat_ups[[i]] <- normsig_avg_ups_epithelial[,which (pd_ct_epith$patient == patients_now[i])] pds_epith_ct[[i]] <- pds_ct[[i]][which (pds_ct[[i]]$cell_types_cl_all == "epithelial" ),] } names(normsig_epith_pat_both) <- patients_now names(normsig_epith_pat_ups) <- patients_now names(pds_epith_ct) <- patients_now ht_sep_normsig_both <- Heatmap(normsig_epith_pat_both[[1]], col = colorRamp2(c(-0.7, -0.2, 0.7), c("blue" ,"white" , "red" )), cluster_rows = FALSE, show_column_names = FALSE, column_title = patients_now[1], top_annotation = ha_lehman_epith_pat[[1]], column_title_gp = gpar(fontsize = 12), show_row_names = FALSE, name = patients_now[1], heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) + Heatmap(normsig_epith_pat_both[[2]], col = colorRamp2(c(-0.7, -0.2, 0.7), c("blue" ,"white" , "red" )), cluster_rows = FALSE, show_column_names = FALSE, column_title = patients_now[2], column_title_gp = gpar(fontsize = 12), top_annotation = ha_lehman_epith_pat[[2]], name = patients_now[2], show_row_names = FALSE, heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) + Heatmap(normsig_epith_pat_both[[3]], col = colorRamp2(c(-0.7, -0.2, 0.7), c("blue" ,"white" , "red" )), cluster_rows = FALSE, show_column_names = FALSE, column_title = patients_now[3], column_title_gp = gpar(fontsize = 12), top_annotation = ha_lehman_epith_pat[[3]], name = patients_now[3], show_row_names = FALSE, top_annotation_height = unit(c(2), "cm" ), heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) + Heatmap(normsig_epith_pat_both[[4]], col = colorRamp2(c(-0.7, -0.2, 0.7), c("blue" ,"white" , "red" )), cluster_rows = FALSE, show_column_names = FALSE, column_title = patients_now[4], column_title_gp = gpar(fontsize = 12), top_annotation = ha_lehman_epith_pat[[4]], name = patients_now[4], show_row_names = FALSE, top_annotation_height = unit(c(2), "cm" ), heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) + Heatmap(normsig_epith_pat_both[[5]], col = colorRamp2(c(-0.7, -0.2, 0.7), c("blue" ,"white" , "red" )), cluster_rows = FALSE, show_column_names = FALSE, column_title = patients_now[5], column_title_gp = gpar(fontsize = 12), top_annotation = ha_lehman_epith_pat[[5]], name = patients_now[5], show_row_names = FALSE, top_annotation_height = unit(c(2), "cm" ), heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) + Heatmap(normsig_epith_pat_both[[6]], col = colorRamp2(c(-0.7, -0.2, 0.7), c("blue" ,"white" , "red" )), cluster_rows = FALSE, row_names_side = "right" , column_title = patients_now[6], column_title_gp = gpar(fontsize = 12), top_annotation = ha_lehman_epith_pat[[6]], name = patients_now[6], show_column_names = FALSE, top_annotation_height = unit(c(2), "cm" ), heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9)))#畫fig3b.pdf print (draw(ht_sep_normsig_both, annotation_legend_side = "bottom" ))
# 每個(gè)樣本的normal signatures 數(shù)目 all.equal(colnames(HSMM_allepith_clustering), names(assignments_normsig_both_epithelial)) pData(HSMM_allepith_clustering)$assignments_normsig_both <- assignments_normsig_both_epithelial pData(HSMM_allepith_clustering)$assignments_normsig_ups <- assignments_normsig_ups_epithelial#畫fig3f plot_cell_clusters(HSMM_allepith_clustering, 1, 2, color = "assignments_normsig_both" , cell_size = 2) + facet_wrap(~patient)
#每個(gè)clusters 的normal signatures 數(shù)目 plot_cell_clusters(HSMM_allepith_clustering, 1, 2, color = "assignments_normsig_both" , cell_size = 2) + facet_wrap(~Cluster)
#檢查數(shù)據(jù) all.equal(HSMM_allepith_clustering$Cluster , clustering_allepith) all.equal(colnames(normsig_avg_both_epithelial), colnames(HSMM_allepith_clustering)) clust_avg_normsig_both <- matrix(NA, nrow = length(unique(HSMM_allepith_clustering$Cluster )), ncol = nrow(normsig_avg_both_epithelial)) rownames(clust_avg_normsig_both) <- paste("clust" , c(1:length(unique(HSMM_allepith_clustering$Cluster ))), sep = "" ) colnames(clust_avg_normsig_both) <- rownames(normsig_avg_both_epithelial)#算上皮細(xì)胞群的avg_both 平均表達(dá)值,接下來還是同樣的操作 for (c in 1:length(unique(HSMM_allepith_clustering$Cluster ))) { clust_avg_normsig_both[c,] <- apply(normsig_avg_both_epithelial[,which (HSMM_allepith_clustering$Cluster == c)], 1, mean) } clust_avg_normsig_both <- as.data.frame(clust_avg_normsig_both) clust_avg_normsig_both$Cluster <- rownames(clust_avg_normsig_both) clust_avg_normsig_melt <- melt(clust_avg_normsig_both, "Cluster" )#畫fig3e ggplot(clust_avg_normsig_melt, aes(Cluster, value, fill = factor(variable), color = factor(variable), shape = factor(variable))) + geom_point(size = 3, stroke = 1) + scale_shape_discrete(solid = T) + ylab("average expression of signature in cluster" ) + xlab("cluster" ) + ylim(c(-0.35, 0.5))
fig3e:Clusters 2和Clusters4 強(qiáng)烈表達(dá) LP signature, 而cluster 3則高表達(dá) ML signature.
接著為了進(jìn)一步探究臨床相關(guān)性,作者使用了三個(gè)臨床相關(guān)的gene signatures,進(jìn)一步探究這868個(gè)上皮細(xì)胞的特征,這868個(gè)上皮細(xì)胞真的被研究到很徹底,真的佩服,這工作量好大....
第一個(gè)gene signatures:70-gene prognostic signature ,該signatures最初是從對(duì)有無轉(zhuǎn)移復(fù)發(fā)患者的原發(fā)腫瘤之間差異表達(dá)基因的分析中得出的,總共70個(gè)基因。
mammaprint_long <- read.table("data/mammaprint_sig_new.txt" , header = TRUE, sep = "\t" ) mammaprint <- apply(mammaprint_long, 2, function (x){return (intersect(x, rownames(mat_ct)))})[,1] mammaprint_avg_exprs <- apply(mat_ct[match(mammaprint, rownames(mat_ct)),], 2, mean) all.equal(names(mammaprint_avg_exprs), colnames(mat_ct)) mammaprint_avg_exprs <- mammaprint_avg_exprs[which (pd_ct$cell_types_cl_all == "epithelial" )] all.equal(colnames(HSMM_allepith_clustering), names(mammaprint_avg_exprs)) pData(HSMM_allepith_clustering)$mammaprint_avg_exprs <- mammaprint_avg_exprs# 畫figS13b plot_cell_clusters(HSMM_allepith_clustering, 1, 2, color = "mammaprint_avg_exprs" , cell_size = 2) + facet_wrap(~patient) + scale_color_continuous(low = "yellow" , high = "blue" )
# 畫figS13a plot_cell_clusters(HSMM_allepith_clustering, 1, 2, color = "mammaprint_avg_exprs" , cell_size = 2) + facet_wrap(~Cluster) + scale_color_continuous(low = "yellow" , high = "blue" )
第二個(gè)gene signatures:49-gene metastatic burden signature.該signatures可以區(qū)分了患者來源的小鼠TNBC異種移植模型中單個(gè)循環(huán)轉(zhuǎn)移細(xì)胞所產(chǎn)生的高轉(zhuǎn)移負(fù)荷和低轉(zhuǎn)移負(fù)荷,總共包括49個(gè)基因。
zenawerb_long <- read.table("data/werb_49_metastasis_sig.txt" , header = TRUE, sep = "\t" ) zenawerb <- apply(zenawerb_long, 2, function (x){return (intersect(x, rownames(mat_ct)))})[,1] zenawerb_avg_exprs <- apply(mat_ct[match(zenawerb, rownames(mat_ct)),], 2, mean) all.equal(names(zenawerb_avg_exprs), colnames(mat_ct)) zenawerb_avg_exprs <- zenawerb_avg_exprs[which (pd_ct$cell_types_cl_all == "epithelial" )] all.equal(colnames(HSMM_allepith_clustering), names(zenawerb_avg_exprs)) pData(HSMM_allepith_clustering)$zenawerb_avg_exprs <- zenawerb_avg_exprs#畫figS14b plot_cell_clusters(HSMM_allepith_clustering, 1, 2, color = "zenawerb_avg_exprs" , cell_size = 2) + facet_wrap(~patient) + scale_color_continuous(low = "yellow" , high = "blue" )
#畫figS14a plot_cell_clusters(HSMM_allepith_clustering, 1, 2, color = "zenawerb_avg_exprs" , cell_size = 2) + facet_wrap(~Cluster) + scale_color_continuous(low = "yellow" , high = "blue" )
第三個(gè)gene siganatures:從接受手術(shù)前化療治療的原發(fā)性乳腺癌患者的殘存腫瘤群體中富集的基因中獲得的,包括354個(gè)基因。
artega_long <- read.table("data/artega_sig.txt" , header = TRUE, sep = "\t" ) artega <- apply(artega_long, 2, function (x){return (intersect(x, rownames(mat_ct)))})[,1] artega_avg_exprs <- apply(mat_ct[match(artega, rownames(mat_ct)),], 2, mean) all.equal(names(artega_avg_exprs), colnames(mat_ct)) artega_avg_exprs <- artega_avg_exprs[which (pd_ct$cell_types_cl_all == "epithelial" )] all.equal(colnames(HSMM_allepith_clustering), names(artega_avg_exprs)) pData(HSMM_allepith_clustering)$artega_avg_exprs <- artega_avg_exprs 畫figS15a plot_cell_clusters(HSMM_allepith_clustering, 1, 2, color = "artega_avg_exprs" , cell_size = 2) + facet_wrap(~patient) + scale_color_continuous(low = "yellow" , high = "blue" )
#畫figS15b plot_cell_clusters(HSMM_allepith_clustering, 1, 2, color = "artega_avg_exprs" , cell_size = 2) + facet_wrap(~Cluster) + scale_color_continuous(low = "yellow" , high = "blue" )
#將3個(gè)gene signatures的表達(dá)值合成一個(gè)數(shù)據(jù)框 prognosis_sig <- cbind(mammaprint_avg_exprs, zenawerb_avg_exprs, artega_avg_exprs)#取行名 colnames(prognosis_sig) <- c("mammaprint" , "zenawerb" , "artega" ) prognosis_epith_pat <- list()for (i in 1:length(patients_now)) { prognosis_epith_pat[[i]] <- t(prognosis_sig)[,which (pd_ct_epith$patient == patients_now[i])] } names(prognosis_epith_pat) <- patients_nowfor (i in 1:length(patients_now)) { print (all.equal(colnames(prognosis_epith_pat[[1]]), rownames(pds_epith_ct[[1]]))) print (all.equal(names(clusterings_sep_allepith[[1]]), colnames(prognosis_epith_pat[[1]]))) } ht_sep_prognosis <- Heatmap(prognosis_epith_pat[[1]], cluster_rows = FALSE, col = colorRamp2(c(-0.2, 0.2, 1), c("blue" ,"white" , "red" )), show_column_names = FALSE, column_title = patients_now[1], top_annotation = ha_lehman_epith_pat[[1]], column_title_gp = gpar(fontsize = 12), show_row_names = FALSE, name = patients_now[1], heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) + Heatmap(prognosis_epith_pat[[2]], col = colorRamp2(c(-0.2, 0.2, 1), c("blue" ,"white" , "red" )), cluster_rows = FALSE, show_column_names = FALSE, column_title = patients_now[2], column_title_gp = gpar(fontsize = 12), top_annotation = ha_lehman_epith_pat[[2]], name = patients_now[2], show_row_names = FALSE, heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) + Heatmap(prognosis_epith_pat[[3]], col = colorRamp2(c(-0.2, 0.2, 1), c("blue" ,"white" , "red" )), cluster_rows = FALSE, show_column_names = FALSE, column_title = patients_now[3], column_title_gp = gpar(fontsize = 12), top_annotation = ha_lehman_epith_pat[[3]], name = patients_now[3], show_row_names = FALSE, heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) + Heatmap(prognosis_epith_pat[[4]], col = colorRamp2(c(-0.2, 0.2, 1), c("blue" ,"white" , "red" )), cluster_rows = FALSE, show_column_names = FALSE, column_title = patients_now[4], column_title_gp = gpar(fontsize = 12), top_annotation = ha_lehman_epith_pat[[4]], name = patients_now[4], show_row_names = FALSE, heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) + Heatmap(prognosis_epith_pat[[5]], col = colorRamp2(c(-0.2, 0.2, 1), c("blue" ,"white" , "red" )), cluster_rows = FALSE, show_column_names = FALSE, column_title = patients_now[5], column_title_gp = gpar(fontsize = 12), top_annotation = ha_lehman_epith_pat[[5]], name = patients_now[5], show_row_names = FALSE, heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) + Heatmap(prognosis_epith_pat[[6]], col = colorRamp2(c(-0.2, 0.2, 1), c("blue" ,"white" , "red" )), cluster_rows = FALSE, row_names_side = "right" , column_title = patients_now[6], column_title_gp = gpar(fontsize = 12), top_annotation = ha_lehman_epith_pat[[6]], name = patients_now[6], show_column_names = FALSE, heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9)))#畫fig4a print (draw(ht_sep_prognosis, annotation_legend_side = "right" ))