添加基因組注釋,需要注意添加的基因組版本要和上游分析用到的一致: granges(HC) # GRanges object with 170827 ranges and 0 metadata columns: # seqnames ranges strand # <Rle> <IRanges> <Rle> # [1] chr1 629474-630381 * # [2] chr1 633581-634476 * # [3] chr1 778268-779188 * # [4] chr1 816903-817798 * # [5] chr1 819473-820427 * # ... ... ... ... # [170823] GL000219.1 50733-51595 * # [170824] GL000219.1 99273-100155 * # [170825] GL000219.1 165071-165956 * # [170826] GL000219.1 168659-169301 * # [170827] KI270713.1 21470-22359 * # ------- # seqinfo: 28 sequences from an unspecified genome; no seqlengths
#可以看到,features的基因組范圍,前面表示染色體的部分,有些不是標(biāo)準(zhǔn)的chr,也就是EnsDb.Hsapiens.v86種沒有 #而是,chromosome scaffolds,e.g. (KI270713.1),所以去除一下 HC <- HC[as.vector(seqnames(granges(HC)) %in% standardChromosomes(granges(HC))), ] AA <- AA[as.vector(seqnames(granges(AA)) %in% standardChromosomes(granges(AA))), ] SD <- SD[as.vector(seqnames(granges(SD)) %in% standardChromosomes(granges(SD))), ]
#添加基因組注釋 # extract gene annotations from EnsDb # 基因和基因組注釋信息是后續(xù)計算TSS富集得分,核小體含量和基因活躍度得分所必需的。 # 同樣,你得保證這里選擇的基因組版本得和你上游數(shù)據(jù)處理時用到的基因組版本一致。 library(AnnotationHub) ah <- AnnotationHub() query(ah, "EnsDb.Hsapiens.v98")#Search for the Ensembl 98 EnsDb for Homo sapiens on AnnotationHub ensdb_v98 <- ah[["AH75011"]]#這個版本的注釋文件可以保存一下,如果后續(xù)其他數(shù)據(jù)分析還用得到的話 # save(ensdb_v98, file = 'ensdb_v98.RData') annotations <- GetGRangesFromEnsDb(ensdb = ensdb_v98) # annotation1 <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86) seqlevels(annotations) <- paste0('chr', seqlevels(annotations)) genome(annotations) <- "hg38"
# add the gene information to the object Annotation(HC) <- annotations Annotation(AA) <- annotations Annotation(SD) <- annotations
數(shù)據(jù)質(zhì)控,scATAC的數(shù)據(jù)質(zhì)控,signac的QC指標(biāo)主要有Nucleosome banding pattern,TSS富集分?jǐn)?shù),Total number of fragments in peaks,F(xiàn)raction of fragments in peaks,基因組黑名單區(qū)域reads比率。不要糾結(jié)于別人設(shè)置了什么樣的閾值,在我看來,只要是合理的,閾值都是個性化調(diào)整的,要針對自己實際的數(shù)據(jù)情況,不要盲目。signac提供了兩個作圖,DensityScatter,FragmentHistogram,可以借助設(shè)定指控指標(biāo),不同的樣本可以設(shè)置不同的質(zhì)控標(biāo)準(zhǔn)。 #接下來計算質(zhì)控指標(biāo) atac = list(HC,AA,SD) names(atac) <- c("HC","AA","SD")
#計算nucleosome signal score,TSS enrichment score #add fraction of reads in peaks,# add blacklist ratio for(i in seq_along(atac)){
atac[[i]] <- NucleosomeSignal(object = atac[[i]]) atac[[i]] <- TSSEnrichment(object = atac[[i]]) atac[[i]]$pct_reads_in_peaks <- atac[[i]]$peak_region_fragments / atac[[i]]$passed_filters * 100 atac[[i]]$blacklist_ratio <- FractionCountsInRegion(object = atac[[i]], assay = 'peaks',regions = blacklist_hg38_unified)
}
#plot一下nCount_peaks和TSS.enrichment密度圖,并且展示5%,10%,90%,95%百分位上的數(shù)值,可以作為質(zhì)控參考 DensityScatter(atac[["HC"]], x = 'nCount_peaks', y = 'TSS.enrichment', log_x = TRUE, quantiles = TRUE) DensityScatter(atac[["AA"]], x = 'nCount_peaks', y = 'TSS.enrichment', log_x = TRUE, quantiles = TRUE) DensityScatter(atac[["SD"]], x = 'nCount_peaks', y = 'TSS.enrichment', log_x = TRUE, quantiles = TRUE)
#按照nucleosome_signal高低對細(xì)胞進(jìn)行分組,并plot不同fragment頻率分布圖,也可以作為質(zhì)控參考 for(i in seq_along(atac)){ atac[[i]]$nucleosome_group <- ifelse(atac[[i]]$nucleosome_signal > 5, 'NS > 5', 'NS <5') }
FragmentHistogram(object = atac[["HC"]], group.by = 'nucleosome_group') FragmentHistogram(object = atac[["AA"]], group.by = 'nucleosome_group') FragmentHistogram(object = atac[["SD"]], group.by = 'nucleosome_group')
#plot Vlnplot VlnPlot(object = atac[["SD"]], features = c('peak_region_fragments', 'TSS.enrichment', 'blacklist_ratio', 'nucleosome_signal', 'pct_reads_in_peaks'), pt.size = 0.1, ncol = 5)
#QC for(i in seq_along(atac)){
atac[[i]] <- subset(x = atac[[i]], subset = peak_region_fragments > 1000 & peak_region_fragments < 20000 & pct_reads_in_peaks > 15 & blacklist_ratio < 0.005 & nucleosome_signal < 4 & TSS.enrichment > 3)
}
質(zhì)控后,可以展示一下質(zhì)控后數(shù)據(jù)的質(zhì)量,這里我們與ArchR來一次聯(lián)動,我覺得ArchR的可視化挺好,用它的函數(shù)可視化! #展示質(zhì)控后結(jié)果數(shù)據(jù),這里我們與ArchR夢幻聯(lián)動,主要是我覺得ArchR質(zhì)控圖可視化看著還可以 # devtools::install_github("GreenleafLab/ArchR", ref="master", repos = BiocManager::repositories())
#TSS & nCount_peaks 密度圖 library(ArchR) library(cowplot)
densityplot_list <- list() for (i in seq_along(atac)) {
df <- data.frame("nFrags"=log10(atac[[i]]$nCount_peaks),"TSSEnrichment" = atac[[i]]$TSS.enrichment) p <- ggPoint( x = df[,1], y = df[,2], colorDensity = TRUE, continuousSet = "sambaNight", xlabel = "Log10(Unique Fragments)", ylabel = "TSS Enrichment")+ geom_hline(yintercept = 3, lty = "dashed") + geom_vline(xintercept = log10(min(atac[[i]]$nCount_peaks)), lty = "dashed")+ ggtitle(paste0(names(atac)[i],"\n","Cells Pass Filter =",ncol(atac[[i]])))
densityplot_list[[i]] <- p }
plot_grid(densityplot_list[[1]], densityplot_list[[2]],densityplot_list[[3]],ncol = 3)
#fragment頻率分布圖 fix(FragmentHistogram) FragmentHistogram(object = atac[["HC"]], mycols = "#2488F0")+ggtitle("HC")+ FragmentHistogram(object = atac[["AA"]], mycols = "#51127CFF")+ggtitle("AA")+ FragmentHistogram(object = atac[["SD"]], mycols = "#B51F29")+ggtitle("SD")
降維聚類: for(i in seq_along(atac)){ atac[[i]] <- RunTFIDF(atac[[i]])#歸一化處理 atac[[i]] <- FindTopFeatures(atac[[i]], min.cutoff = 'q5')#特征值選擇,對應(yīng)scRNA就是高變基因 atac[[i]] <- RunSVD(atac[[i]]) } for(i in seq_along(atac)){ atac[[i]] <- RunUMAP(object = atac[[i]], reduction = 'lsi', dims = 2:30) atac[[i]] <- FindNeighbors(object = atac[[i]], reduction = 'lsi', dims = 2:30) atac[[i]] <- FindClusters(object = atac[[i]], verbose = FALSE, algorithm = 3, resolution = 0.8)#resolution也可以多設(shè)置,自行調(diào)整 }
DimPlot(atac[['HC']], label = T)+ggtitle('HC')+ DimPlot(atac[['AA']], label = T)+ggtitle('AA')+ DimPlot(atac[['SD']], label = T)+ggtitle('SD')
#add gene activity matrix
gene.activities <- list() for(i in seq_along(atac)){ gene.activities[[i]] <- GeneActivity(atac[[i]]) }
#將ene activity matrix添加到seurat,添加一個新的assay RNA。 for(i in seq_along(atac)){
atac[[i]][['RNA']] <- CreateAssayObject(counts = gene.activities[[i]]) atac[[i]] <- NormalizeData(object = atac[[i]],assay = 'RNA',normalization.method = 'LogNormalize',scale.factor = median(atac[[i]]$nCount_RNA)) }
接下來就可以對數(shù)據(jù)進(jìn)行注釋了!
|