Annotate Hotspots.
annotate_hotspots.Rd
Annotate MAF-like data frome with a hot_spot column indicating recurrent mutations.
Usage
annotate_hotspots(
mutation_maf,
recurrence_min = 5,
analysis_base = c("FL--DLBCL", "BL--DLBCL"),
p_thresh = 0.05
)
Details
This function takes an already loaded MAF data frame with the mutation_maf
parameter.
The user can then control the minimum number of recurrences for mutations to be included with recurrance_min
,
The default is 5. analysis_base
controls the base name go hotspot output directory.
Lastly, p_thresh
sets the p value threshold, default is 0.05.
Examples
my_metadata = suppressMessages(get_gambl_metadata())
# get a few SSMs to annotate
some_coding_ssm = get_coding_ssm(these_samples_metadata = my_metadata,
projection = "grch37",
this_seq_type = "genome") %>%
dplyr::filter(Hugo_Symbol %in% c("EZH2","MEF2B","MYD88","KMT2D")) %>%
dplyr::arrange(Hugo_Symbol)
#> Warning: One or more parsing issues, call `problems()` on your data frame for details,
#> e.g.:
#> dat <- vroom(...)
#> problems(dat)
# peek at the data
dplyr::select(some_coding_ssm,1:10,37) %>% head()
#> genomic_data Object
#> Genome Build: grch37
#> Showing first 10 rows:
#> Hugo_Symbol Entrez_Gene_Id Center NCBI_Build Chromosome Start_Position
#> 1 EZH2 0 . GRCh37 7 148504773
#> 2 EZH2 0 . GRCh37 7 148504791
#> 3 EZH2 0 . GRCh37 7 148504802
#> 4 EZH2 0 . GRCh37 7 148506215
#> 5 EZH2 0 . GRCh37 7 148506437
#> 6 EZH2 0 . GRCh37 7 148506437
#> End_Position Strand Variant_Classification Variant_Type HGVSp_Short
#> 1 148504773 + Missense_Mutation SNP p.Y741H
#> 2 148504791 + Missense_Mutation SNP p.Q735K
#> 3 148504802 + Splice_Region SNP <NA>
#> 4 148506215 + Missense_Mutation SNP p.I715F
#> 5 148506437 + Missense_Mutation SNP p.A692V
#> 6 148506437 + Missense_Mutation SNP p.A692V
hot_ssms = annotate_hotspots(some_coding_ssm)
#> Adding missing grouping variables: `SYMBOL`
#> Adding missing grouping variables: `SYMBOL`
hot_ssms %>%
dplyr::filter(!is.na(hot_spot)) %>%
dplyr::select(1:10,37,hot_spot)
#> genomic_data Object
#> Genome Build: grch37
#> Showing first 10 rows:
#> Hugo_Symbol Entrez_Gene_Id Center NCBI_Build Chromosome Start_Position
#> 1 EZH2 0 . GRCh37 7 148508727
#> 2 EZH2 0 . GRCh37 7 148508727
#> 3 EZH2 0 . GRCh37 7 148508727
#> 4 EZH2 0 . GRCh37 7 148508727
#> 5 EZH2 0 . GRCh37 7 148508727
#> 6 EZH2 0 . GRCh37 7 148508727
#> 7 EZH2 0 . GRCh37 7 148508727
#> 8 EZH2 0 . GRCh37 7 148508727
#> 9 EZH2 0 . GRCh37 7 148508727
#> 10 EZH2 0 . GRCh37 7 148508727
#> End_Position Strand Variant_Classification Variant_Type HGVSp_Short hot_spot
#> 1 148508727 + Missense_Mutation SNP p.Y646F TRUE
#> 2 148508727 + Missense_Mutation SNP p.Y646F TRUE
#> 3 148508727 + Missense_Mutation SNP p.Y646F TRUE
#> 4 148508727 + Missense_Mutation SNP p.Y646F TRUE
#> 5 148508727 + Missense_Mutation SNP p.Y646F TRUE
#> 6 148508727 + Missense_Mutation SNP p.Y646F TRUE
#> 7 148508727 + Missense_Mutation SNP p.Y646F TRUE
#> 8 148508727 + Missense_Mutation SNP p.Y646F TRUE
#> 9 148508727 + Missense_Mutation SNP p.Y646F TRUE
#> 10 148508727 + Missense_Mutation SNP p.Y646F TRUE
if (FALSE) { # \dontrun{
#This example will raise an error due to the user supplying an unsupported genome build:
more_coding_ssm = get_coding_ssm(
these_samples_metadata = my_metadata,
projection = "hg38",
this_seq_type = "capture") %>%
dplyr::filter(Hugo_Symbol %in% c("EZH2","MEF2B","MYD88","KMT2D")) %>%
dplyr::arrange(Hugo_Symbol)
# peek at the data
dplyr::select(more_coding_ssm,1:10,37) %>% head()
more_hot_ssms = annotate_hotspots(more_coding_ssm)
more_hot_ssms %>%
dplyr::filter(!is.na(hot_spot)) %>%
dplyr::select(1:10,37,hot_spot)
} # }