## 1.設(shè)置當(dāng)前工作目錄setwd("./ggmsa") ## 2.安裝和導(dǎo)入R包# install.packages("ggmsa") library(ggmsa) library(ggplot2) ## 3.R包簡(jiǎn)要信息help(package = "ggmsa") # Package: ggmsa # Title: Plot Multiple Sequence Alignment using 'ggplot2' # Version: 0.0.4 # Authors@R: c( person("Guangchuang", "Yu", email = "guangchuangyu@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-6485-8781")), # person("Lang", "Zhou", email = "nyzhoulang@gmail.com", role = "aut"), # person("Huina", "Huang", email = "1185796994@qq.com", role = "ctb")) # Description: Supports visualizing multiple sequence alignment of DNA and protein sequences using 'ggplot2'. It supports a number of colour schemes, including Chemistry, Clustal, Shapely, Taylor and Zappo. Multiple sequence alignment can easily be combined with other 'ggplot2' plots, such as aligning a phylogenetic tree produced by 'ggtree' with multiple sequence alignment. # Depends: R (>= 3.5.0) # Imports: Biostrings, ggplot2, magrittr, tidyr, utils, stats, stringr # Suggests: ape, cowplot, ggtree, knitr, methods, seqmagick # License: Artistic-2.0 # Encoding: UTF-8 # LazyData: true # RoxygenNote: 7.1.0 # VignetteBuilder: knitr # NeedsCompilation: no # Packaged: 2020-05-28 08:15:32 UTC; ygc # Author: Guangchuang Yu [aut, cre] (<https:///0000-0002-6485-8781>), # Lang Zhou [aut], # Huina Huang [ctb] # Maintainer: Guangchuang Yu <guangchuangyu@gmail.com> # Repository: CRAN # Date/Publication: 2020-05-28 10:50:10 UTC # Built: R 3.6.3; ; 2020-05-29 14:03:22 UTC; windows ls(package:ggmsa) # [1] "available_colors" "available_fonts" "available_msa" # [4] "facet_msa" "geom_asterisk" "geom_GC" # [7] "geom_msa" "geom_seed" "geom_seqlogo" # [10] "ggmotif" "ggmsa" "tidy_msa" ## 4.測(cè)試# Plot multiple sequence alignment using ggplot2 with multiple color schemes supported. # Supports visualizing multiple sequence alignment of DNA and protein sequences using ggplot2 It supports a number of colour schemes, including Chemistry, Clustal, Shapely, Taylor and Zappo. Multiple sequence alignment can easily be combined with other ‘ggplot2’ plots, such as aligning a phylogenetic tree produced by ‘ggtree’ with multiple sequence alignment. ### 4.1 Load sample data# Three sample data are shipped with the ggmsa package. Note that ggmsa supports not only fasta files but other objects as well. available_msa()can be used to list MSA objects currently available. available_msa() # files currently available: # .fasta # XStringSet objects from 'Biostrings' package: # DNAStringSet RNAStringSet AAStringSet BStringSet DNAMultipleAlignment RNAMultipleAlignment AAMultipleAlignment # bin objects from 'seqmagick' package: # DNAbin AAbin protein_sequences <- system.file("extdata", "sample.fasta", package = "ggmsa") miRNA_sequences <- system.file("extdata", "seedSample.fa", package = "ggmsa") nt_sequences <- system.file("extdata", "LeaderRepeat_All.fa", package = "ggmsa") path.package("ggmsa") # [1] "C:/Users/lenovo/Documents/R/win-library/3.6/ggmsa" # Visualizing Multiple Sequence Alignments # ### 4.2 The most simple code to use ggmsa:?ggmsa #@ 簡(jiǎn)單繪制 ggmsa(protein_sequences, start = 265, end = 300) #@ 調(diào)整參數(shù),實(shí)現(xiàn)個(gè)性化繪制多序列比對(duì)圖 ggmsa(protein_sequences, start = 265, end = 300, font = "TimesNewRoman", color = "Clustal", char_width = 0.8, none_bg = T, seq_name = T) ggmsa(protein_sequences, start = 265, end = 300, font = "TimesNewRoman", color = "Chemistry_AA", char_width = 0.8, none_bg = F) # Colour Schemes # available_colors() # color schemes for nucleotide sequences currently available: # Chemistry_NT Shapely_NT Taylor_NT Zappo_NT # color schemes for AA sequences currently available: # Clustal Chemistry_AA Shapely_AA Zappo_AA Taylor_AA ### 4.3 Clustal X Colour Scheme(Default)#@ This is an emulation of the default colourscheme used for alignments in Clustal X, a graphical interface for the ClustalW multiple sequence alignment program. Each residue in the alignment is assigned a colour if the amino acid profile of the alignment at that position meets some minimum criteria specific for the residue type. ggmsa(protein_sequences, start = 320, end = 360, color = "Clustal") ### 4.4 Color by Chemistry#@ Amino acids are colored according to their side chain chemistry: ggmsa(protein_sequences, start = 320, end = 360, color = "Chemistry_AA") ### 4.5 Color by Shapely#@ This color scheme matches the RasMol amino acid and RasMol nucleotide color schemes, which are, in turn, based on Robert Fletterick’s “Shapely models”. ggmsa(protein_sequences, start = 320, end = 360, color = "Shapely_AA") ### 4.6 Color by Taylor#@ This color scheme is taken from Taylor(Taylor 1997) and is also used in JalView(Waterhouse et al. 2009). ggmsa(protein_sequences, start = 320, end = 360, color = "Taylor_AA") ### 4.7 Color by Zappo#@ This scheme colors residues according to their physico-chemical properties, and is also used in JalView(Waterhouse et al. 2009). ggmsa(protein_sequences, start = 320, end = 360, color = "Zappo_AA") ### 4.8 Font#@ Several classic font for MSA are shipped in the package. In the same ways, you can use available_fonts() to list font currently available available_fonts() # font families currently available: # helvetical mono TimesNewRoman DroidSansMono # helvetical ggmsa(protein_sequences, start = 320, end = 360, font = "helvetical", color = "Chemistry_AA") # TimesNewRoman ggmsa(protein_sequences, start = 320, end = 360, font = "TimesNewRoman", color = "Chemistry_AA") # DroidSansMono ggmsa(protein_sequences, start = 320, end = 360, font = "DroidSansMono", color = "Chemistry_AA") #@ If you specify font = NULL, only tiles will be plot. ggmsa(protein_sequences, start = 320, end = 360, font = NULL, color = "Chemistry_AA", seq_name = F) ggmsa(protein_sequences, start = 320, end = 360, font = NULL, color = "Chemistry_AA", seq_name = T) ### 4.9 Characters width#@ Characters width can be specified by char_width. Defaults is 0.9. ggmsa(protein_sequences, start = 320, end = 360, char_width = 0.5, color = "Chemistry_AA") ### 4.10 Background#@ Background can be specified by none_bg. If none_bg = TRUE, only the character will be plot. ggmsa(protein_sequences, start = 320, end = 360, none_bg = TRUE) + theme_void() ### 4.11 Position Highligthed#@ Position Highligthed can be specified by posHighligthed. The none_bg = FALSE when you specified position Highligthed by posHighligthed # 不連續(xù)高亮 ggmsa(protein_sequences, 164, 213, color = "Chemistry_AA", posHighligthed = c(185, 190)) ggmsa(protein_sequences, 164, 213, color = "Chemistry_AA", posHighligthed = c(180, 190, 200)) # 連續(xù)高亮 ggmsa(protein_sequences, 164, 213, color = "Chemistry_AA", posHighligthed = c(180:200)) ### 4.12 Sequence names#@ Sequence names Defaults is ‘NULL’ which indicates that the sequence name is displayed when font = NULL, but ‘font = char’ will not be displayed. If seq_name = TRUE the sequence name will be displayed when you need it. ggmsa(protein_sequences, 164, 213, color = "Chemistry_AA", seq_name = TRUE) #2 If seq_name = FALSE the sequence name will not be displayed in any case. ggmsa(protein_sequences, 164, 213, font = NULL, color = "Chemistry_AA", seq_name = FALSE) ## 5.結(jié)束# RUNRPTEST("./ggmsa", rpackage = "ggmsa",install_method = "website", rpackage_repository = "cran") sessionInfo() # R version 3.6.3 (2020-02-29) # Platform: x86_64-w64-mingw32/x64 (64-bit) # Running under: Windows 10 x64 (build 18363) # # Matrix products: default # # locale: # [1] LC_COLLATE=Chinese (Simplified)_China.936 # [2] LC_CTYPE=Chinese (Simplified)_China.936 # [3] LC_MONETARY=Chinese (Simplified)_China.936 # [4] LC_NUMERIC=C # [5] LC_TIME=Chinese (Simplified)_China.936 # # attached base packages: # [1] stats graphics grDevices utils datasets methods # [7] base # # other attached packages: # [1] ggplot2_3.3.0 ggmsa_0.0.4 # # loaded via a namespace (and not attached): # [1] Rcpp_1.0.4.6 pillar_1.4.4 compiler_3.6.3 # [4] XVector_0.26.0 tools_3.6.3 zlibbioc_1.32.0 # [7] digest_0.6.25 packrat_0.5.0 lifecycle_0.2.0 # [10] tibble_3.0.1 gtable_0.3.0 pkgconfig_2.0.3 # [13] rlang_0.4.6 rstudioapi_0.11 seqmagick_0.1.3 # [16] parallel_3.6.3 withr_2.2.0 dplyr_0.8.5 # [19] stringr_1.4.0 Biostrings_2.54.0 S4Vectors_0.24.3 # [22] vctrs_0.3.0 IRanges_2.20.2 stats4_3.6.3 # [25] grid_3.6.3 tidyselect_1.1.0 glue_1.4.1 # [28] R6_2.4.1 purrr_0.3.4 tidyr_1.1.0 # [31] farver_2.0.3 magrittr_1.5 scales_1.1.1 # [34] ellipsis_0.3.1 BiocGenerics_0.32.0 assertthat_0.2.1 # [37] colorspace_1.4-1 labeling_0.3 stringi_1.4.6 # [40] munsell_0.5.0 crayon_1.3.4 #@ 兩篇參考文獻(xiàn),有興趣的同學(xué)讀一下# Taylor, W R. 1997. “Residual Colours: A Proposal for Aminochromography.” Protein Eng 10 (7): 743–46. # Waterhouse, A. M., J. B. Procter, D. M. Martin, M Clamp, and G. J. Barton. 2009. “Jalview Version 2–a Multiple Sequence Alignment Editor and Analysis Workbench.” Bioinformatics 25 (9): 1189. |
|