新課發(fā)布在B站了,馬上有熱心的粉絲看完后寫(xiě)了配套筆記: 多個(gè)亞群各自merker基因聯(lián)合進(jìn)行GO以及KEGG分析
在前面幾節(jié)我們已經(jīng)知道各個(gè)細(xì)胞亞群的maerker基因,接下來(lái)我們對(duì)這些marker基因進(jìn)行功能注釋和富集分析。 讀取數(shù)據(jù)rm(list=ls()) library(Seurat) library(gplots) library(ggplot2) load('sce.markers.all_10_celltype.Rdata')
功能注釋library(clusterProfiler) library(org.Hs.eg.db) ids=bitr(sce.markers$gene,'SYMBOL','ENTREZID','org.Hs.eg.db') ## 將SYMBOL轉(zhuǎn)成ENTREZID sce.markers=merge(sce.markers,ids,by.x='gene',by.y='SYMBOL') View(sce.markers)
可見(jiàn)已經(jīng)在數(shù)據(jù)中添加ENTREZID列,接下來(lái)進(jìn)行kegg注釋?zhuān)?/p>## 函數(shù)split()可以按照分組因子,把向量,矩陣和數(shù)據(jù)框進(jìn)行適當(dāng)?shù)姆纸M。 ## 它的返回值是一個(gè)列表,代表分組變量每個(gè)水平的觀測(cè)。 gcSample=split(sce.markers$ENTREZID, sce.markers$cluster) ## KEGG xx <- compareCluster(gcSample, fun = "enrichKEGG", organism = "hsa", pvalueCutoff = 0.05 ) p <- dotplot(xx) p + theme(axis.text.x = element_text( angle = 45, vjust = 0.5, hjust = 0.5 ))
同樣的進(jìn)行GO功能注釋 ## GO xx <- compareCluster(gcSample, fun = "enrichGO", OrgDb = "org.Hs.eg.db", ont = "BP", pAdjustMethod = "BH", pvalueCutoff = 0.01, qvalueCutoff = 0.05 ) p <- dotplot(xx) p + theme(axis.text.x = element_text( angle = 45, vjust = 0.5, hjust = 0.5 ))
差異分析后的GO以及KEGG分析具體差異分析方法前面已經(jīng)講過(guò),經(jīng)過(guò)差異分析后會(huì)得到上下調(diào)基因,此時(shí)可對(duì)上下調(diào)基因進(jìn)行GO和KEGG分析。 讀取數(shù)據(jù)rm(list = ls()) library(Seurat) library(ggplot2) library(patchwork) library(dplyr) load(file = 'basic.sce.pbmc.Rdata') sce=pbmc sce = sce[, Idents(sce) %in% c("FCGR3A+ Mono", "CD14+ Mono")] # 挑選細(xì)胞
差異分析deg=FindMarkers(object = sce, ident.1 = 'FCGR3A+ Mono', ident.2 = 'CD14+ Mono', test.use='MAST' ) ## MAST在單細(xì)胞領(lǐng)域較為常用 head(deg) save(deg,file = 'deg-by-MAST-for-mono-2-cluster.Rdata')
火山圖參考https://www.jianshu.com/p/ba05e790d8c3 單細(xì)胞的logFC不像普通測(cè)序那樣大于1很多,但是p值小于0.05賊多,所以火山圖參考一篇CNS 文章,只畫(huà)了p值的分界線(xiàn) degdf <- deg degdf$symbol <- rownames(deg) logFC_t=0 P.Value_t = 1e-28 degdf$change = ifelse(degdf$p_val_adj < P.Value_t & degdf$avg_log2FC < 0,"down", ifelse(degdf$p_val_adj < P.Value_t & degdf$avg_log2FC > 0,"up","stable")) ggplot(degdf, aes(avg_log2FC, -log10(p_val_adj))) + geom_point(alpha=0.4, size=3.5, aes(color=change)) + ylab("-log10(Pvalue)")+ scale_color_manual(values=c("blue", "grey","red"))+ geom_hline(yintercept = -log10(P.Value_t),lty=4,col="black",lwd=0.8) + theme_bw()
功能注釋## 獲取上下調(diào)基因 gene_up=rownames(deg[deg$avg_log2FC > 0,]) gene_down=rownames(deg[deg$avg_log2FC < 0,]) ## 把SYMBOL改為ENTREZID library(org.Hs.eg.db) gene_up=as.character(na.omit(AnnotationDbi::select(org.Hs.eg.db, keys = gene_up, columns = 'ENTREZID', keytype = 'SYMBOL')[,2])) gene_down=as.character(na.omit(AnnotationDbi::select(org.Hs.eg.db, keys = gene_down, columns = 'ENTREZID', keytype = 'SYMBOL')[,2])) library(clusterProfiler) ## 以上調(diào)基因?yàn)槔?,下調(diào)基因同理 ## KEGG gene_up <- unique(gene_up) kk.up <- enrichKEGG(gene = gene_up, organism = "hsa", pvalueCutoff = 0.9, qvalueCutoff = 0.9) dotplot(kk.up)
## GO go.up <- enrichGO(gene = gene_up, OrgDb = org.Hs.eg.db, ont = "BP" , pAdjustMethod = "BH", pvalueCutoff = 0.99, qvalueCutoff = 0.99, readabl = TRUE) dotplot(go.up)
差異分析后的GSEA分析## 上一步差異分析得到差異基因列表deg后取出,p值和log2FC nrDEG = deg[,c('avg_log2FC', 'p_val')] colnames(nrDEG)=c('log2FoldChange','pvalue') ##更改列名 library(org.Hs.eg.db) library(clusterProfiler) ## 把SYMBOL轉(zhuǎn)換為ENTREZID,可能有部分丟失 gene <- bitr(rownames(nrDEG), fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db) ## 基因名、ENTREZID、logFC一一對(duì)應(yīng)起來(lái) gene$logFC <- nrDEG$log2FoldChange[match(gene$SYMBOL,rownames(nrDEG))] ## 構(gòu)建genelist geneList=gene$logFC names(geneList)=gene$ENTREZID geneList=sort(geneList,decreasing = T) # 降序,按照l(shuí)ogFC的值來(lái)排序 ## GSEA分析 kk_gse <- gseKEGG(geneList = geneList, organism = 'hsa', nPerm = 1000, minGSSize = 10, pvalueCutoff = 0.9, verbose = FALSE) kk_gse=DOSE::setReadable(kk_gse, OrgDb='org.Hs.eg.db',keyType='ENTREZID') sortkk<-kk_gse[order(kk_gse$enrichmentScore, decreasing = T),] library(enrichplot) gseaplot2(kk_gse, "hsa04510", color = "firebrick", rel_heights=c(1, .2, .6))
## 展示排名前四的通路 gseaplot2(kk_gse, row.names(sortkk)[1:4])
## 把p值標(biāo)上去 gseaplot2(kk_gse, "hsa04510", color = "firebrick", rel_heights=c(1, .2, .6), pvalue_table = TRUE)
|