Skip to contents

Annotate MAF-like data frome with a hot_spot column indicating recurrent mutations.

Usage

annotate_hotspots(
  mutation_maf,
  recurrence_min = 5,
  analysis_base = c("FL--DLBCL", "BL--DLBCL"),
  p_thresh = 0.05
)

Arguments

mutation_maf

A data frame in MAF format.

recurrence_min

minimum number of recurrences for mutation to be included, default is 5.

analysis_base

Base name for hot spot output directory.

p_thresh

P value threshold, default is 0.05.

Value

The same data frame with one additional column "hot_spot".

Details

This function takes an already loaded MAF data frame with the mutation_maf parameter. The user can then control the minimum number of recurrences for mutations to be included with recurrance_min, The default is 5. analysis_base controls the base name go hotspot output directory. Lastly, p_thresh sets the p value threshold, default is 0.05.

Examples

my_metadata = suppressMessages(get_gambl_metadata())
# get a few SSMs to annotate
some_coding_ssm = get_coding_ssm(these_samples_metadata = my_metadata,
                                projection = "grch37",
                                this_seq_type = "genome") %>% 
                  dplyr::filter(Hugo_Symbol %in% c("EZH2","MEF2B","MYD88","KMT2D")) %>%
                  dplyr::arrange(Hugo_Symbol)
#> Warning: One or more parsing issues, call `problems()` on your data frame for details,
#> e.g.:
#>   dat <- vroom(...)
#>   problems(dat)
# peek at the data
dplyr::select(some_coding_ssm,1:10,37) %>% head()
#> genomic_data Object
#> Genome Build: grch37 
#> Showing first 10 rows:
#>   Hugo_Symbol Entrez_Gene_Id Center NCBI_Build Chromosome Start_Position
#> 1        EZH2              0      .     GRCh37          7      148504773
#> 2        EZH2              0      .     GRCh37          7      148504791
#> 3        EZH2              0      .     GRCh37          7      148504802
#> 4        EZH2              0      .     GRCh37          7      148506215
#> 5        EZH2              0      .     GRCh37          7      148506437
#> 6        EZH2              0      .     GRCh37          7      148506437
#>   End_Position Strand Variant_Classification Variant_Type HGVSp_Short
#> 1    148504773      +      Missense_Mutation          SNP     p.Y741H
#> 2    148504791      +      Missense_Mutation          SNP     p.Q735K
#> 3    148504802      +          Splice_Region          SNP        <NA>
#> 4    148506215      +      Missense_Mutation          SNP     p.I715F
#> 5    148506437      +      Missense_Mutation          SNP     p.A692V
#> 6    148506437      +      Missense_Mutation          SNP     p.A692V

hot_ssms = annotate_hotspots(some_coding_ssm)
#> Adding missing grouping variables: `SYMBOL`
#> Adding missing grouping variables: `SYMBOL`
hot_ssms %>% 
   dplyr::filter(!is.na(hot_spot)) %>% 
   dplyr::select(1:10,37,hot_spot) 
#> genomic_data Object
#> Genome Build: grch37 
#> Showing first 10 rows:
#>    Hugo_Symbol Entrez_Gene_Id Center NCBI_Build Chromosome Start_Position
#> 1         EZH2              0      .     GRCh37          7      148508727
#> 2         EZH2              0      .     GRCh37          7      148508727
#> 3         EZH2              0      .     GRCh37          7      148508727
#> 4         EZH2              0      .     GRCh37          7      148508727
#> 5         EZH2              0      .     GRCh37          7      148508727
#> 6         EZH2              0      .     GRCh37          7      148508727
#> 7         EZH2              0      .     GRCh37          7      148508727
#> 8         EZH2              0      .     GRCh37          7      148508727
#> 9         EZH2              0      .     GRCh37          7      148508727
#> 10        EZH2              0      .     GRCh37          7      148508727
#>    End_Position Strand Variant_Classification Variant_Type HGVSp_Short hot_spot
#> 1     148508727      +      Missense_Mutation          SNP     p.Y646F     TRUE
#> 2     148508727      +      Missense_Mutation          SNP     p.Y646F     TRUE
#> 3     148508727      +      Missense_Mutation          SNP     p.Y646F     TRUE
#> 4     148508727      +      Missense_Mutation          SNP     p.Y646F     TRUE
#> 5     148508727      +      Missense_Mutation          SNP     p.Y646F     TRUE
#> 6     148508727      +      Missense_Mutation          SNP     p.Y646F     TRUE
#> 7     148508727      +      Missense_Mutation          SNP     p.Y646F     TRUE
#> 8     148508727      +      Missense_Mutation          SNP     p.Y646F     TRUE
#> 9     148508727      +      Missense_Mutation          SNP     p.Y646F     TRUE
#> 10    148508727      +      Missense_Mutation          SNP     p.Y646F     TRUE

if (FALSE) { # \dontrun{
#This example will raise an error due to the user supplying an unsupported genome build:
more_coding_ssm = get_coding_ssm(
                                these_samples_metadata = my_metadata,
                                projection = "hg38",
                                this_seq_type = "capture") %>% 
                  dplyr::filter(Hugo_Symbol %in% c("EZH2","MEF2B","MYD88","KMT2D")) %>%
                  dplyr::arrange(Hugo_Symbol)
# peek at the data
dplyr::select(more_coding_ssm,1:10,37) %>% head()

more_hot_ssms = annotate_hotspots(more_coding_ssm)
more_hot_ssms %>% 
   dplyr::filter(!is.na(hot_spot)) %>% 
   dplyr::select(1:10,37,hot_spot) 
} # }