Annotate Hotspots.
annotate_hotspots.Rd
Annotate MAF-like data frome with a hot_spot column indicating recurrent mutations.
Examples
my_metadata = get_gambl_metadata()
#> Using the bundled metadata in GAMBLR.data...
all_coding_ssm = get_coding_ssm(these_samples_metadata = my_metadata,
projection = "grch37",
this_seq_type = "genome") %>%
dplyr::filter(Hugo_Symbol %in% c("EZH2",
"MEF2B","MYD88","KMT2D")) %>%
dplyr::arrange(Hugo_Symbol)
#> Using the bundled SSM calls (.maf) calls in GAMBLR.data...
#> id_ease: WARNING! 1838 samples in the provided metadata were removed because their seq types are not the same as in the `set_type` argument. Use `verbose = TRUE` to see their IDs.
#> after linking with metadata, we have mutations from 817 samples
hot_ssms = annotate_hotspots(all_coding_ssm)
hot_ssms %>% dplyr::filter(!is.na(hot_spot)) %>%
dplyr::select(1:5,37,hot_spot)
#> genomic_data Object
#> Genome Build: grch37
#> Showing first 10 rows:
#> Hugo_Symbol Entrez_Gene_Id Center NCBI_Build Chromosome HGVSp_Short hot_spot
#> 1 EZH2 0 . GRCh37 7 p.Y646N TRUE
#> 2 EZH2 0 . GRCh37 7 p.Y646H TRUE
#> 3 EZH2 0 . GRCh37 7 p.Y646N TRUE
#> 4 EZH2 0 . GRCh37 7 p.Y646F TRUE
#> 5 EZH2 0 . GRCh37 7 p.Y646N TRUE
#> 6 EZH2 0 . GRCh37 7 p.Y646N TRUE
#> 7 EZH2 0 . GRCh37 7 p.Y646F TRUE
#> 8 EZH2 0 . GRCh37 7 p.Y646S TRUE
#> 9 EZH2 0 . GRCh37 7 p.Y646S TRUE
#> 10 EZH2 0 . GRCh37 7 p.Y646F TRUE