Skip to contents

Create a highly customizable oncoplot.

Usage

prettyOncoplot(
  maf_df,
  gene_cnv_df,
  binned_cnv_df,
  genes,
  include_noncoding = NULL,
  keepGeneOrder = FALSE,
  keepSampleOrder = FALSE,
  highlightHotspots = FALSE,
  these_samples_metadata,
  genes_CN_thresh,
  metadataColumns,
  numericMetadataColumns,
  expressionColumns = c(),
  numericMetadataMax,
  sortByColumns,
  sortByGenes,
  arrange_descending = FALSE,
  removeNonMutated = FALSE,
  minMutationPercent = 0,
  mutAlpha = 1,
  recycleOncomatrix = FALSE,
  splitColumnName,
  splitGeneGroups,
  showTumorSampleBarcode = FALSE,
  groupNames,
  hide_annotations,
  hide_annotations_tracks = FALSE,
  annotate_specific_genes = FALSE,
  this_forest_object = NULL,
  custom_colours = NULL,
  hideTopBarplot = TRUE,
  tally_all_mutations = FALSE,
  tally_all_mutations_max = 1000,
  hideSideBarplot = FALSE,
  box_col = NA,
  annoAlpha = 1,
  legend_direction = "horizontal",
  ylim = NULL,
  legend_position = "bottom",
  legend_row = 3,
  legend_col = 3,
  metadataBarHeight = 1.5,
  metadataBarFontsize = 5,
  metadataSide = "bottom",
  legendFontSize = 10,
  fontSizeGene = 6,
  annotation_row = 2,
  annotation_col = 1,
  verbose = FALSE,
  cluster_rows = FALSE,
  cluster_cols = FALSE,
  clustering_distance_rows = "binary",
  clustering_distance_cols = "binary",
  split_rows_kmeans,
  split_columns_kmeans,
  dry_run = FALSE,
  simplify_annotation = FALSE,
  simplify_bg_colour = NA,
  stacked = FALSE,
  return_inputs = FALSE,
  gap = 0,
  use_raster = NULL,
  plot_width = NULL,
  plot_height = NULL,
  show_any_legend = TRUE,
  pct_side = "left",
  pctFontSize = 6,
  row_names_side = "right",
  show_pct = TRUE,
  hide_annotation_name = FALSE,
  cnv_df
)

Arguments

maf_df

A maf as data frame containing the mutations you want to plot.

gene_cnv_df

An optional data frame of CN status for genes you want included (rows = sample_id, columns = Hugo_Symbol) See [GAMBLR.results::get_cnv_and_ssm_status] for more information.

binned_cnv_df

An optional data frame with the genome-wide CN status of your samples in genomic bins see [GAMBLR.utils::segmented_data_to_cn_matrix] for more information.

genes

An optional vector of genes to restrict your plot to.

include_noncoding

List of non-coding regions to be included, default is NULL. Specify like this: include_noncoding=list("NFKBIZ" = c("3'UTR"), "HNRNPH1" = "Splice_Region")

keepGeneOrder

Set to TRUE if you want to preserve the gene order specified.

keepSampleOrder

Set to TRUE if you want to preserve the sample order specified. The default value is FALSE and respects all of the specified ordering.

highlightHotspots

Set to TRUE to highlight hot spots. Default is FALSE.

these_samples_metadata

Data frame containing metadata for your samples.

genes_CN_thresh

A named vector specifying the genes whose copy number status should be incorporated. The names must be the gene symbols and the values should be integers that indicate the maximum or minimum CN states to consider for that gene. For example: 'REL'=4 would show CN 4 or higher; 'TP53'=1 would show heterozygous and homozygous deletions 'BCL2'=3 would show single-copy gains or higher

metadataColumns

A vector containing the categorical column names you want to plot below.

numericMetadataColumns

A vector containing the numeric columns you want to plot below.

expressionColumns

Optional variable for retreiving expression values for a specific gene(s).

numericMetadataMax

A numeric vector of cutoffs to apply to numeric columns above.

sortByColumns

A vector containing the column names you want to sort columns (patients) on.

arrange_descending

A Boolean parameter. Set to TRUE to sort metadata in descending fashion. Default is FALSE.

removeNonMutated

Set to TRUE to drop unmutated cases.

minMutationPercent

Only genes mutated in more than minMutationPercent % patients will be included.

mutAlpha

Optional alpha to apply to mutation colours.

recycleOncomatrix

Set to TRUE most of the time to reuse the oncomatrix saved by maftools.

splitColumnName

Optional argument to indicate which metadata column to split on.

splitGeneGroups

Split genes into groups for better seperation (between different gene-groups) in prettyOncoplot.

showTumorSampleBarcode

Optional argument for showing tumor barcode. Default is FALSE.

groupNames

optional vector of group names to be displayed above heatmap. Should be the same length as the number of groups that will be shown. Default is NULL (no labels).

hide_annotations

Hide annotations for specifc ashms. argument takes a list with annotations.

hide_annotations_tracks

When hide_annotations is supplied with a list of columns, this parameter can optionally also not display those columns as the annotation track. Accepts TRUE and FALSE (default).

annotate_specific_genes

Optional argument, specifying whether the features should be labelled according to their significance in one of the pathologies. Default is FALSE (no annotation).

this_forest_object

If annotate_specific_genes is specified, this arguments takes the output of GAMBLR::prettyForestPlot directly to determine the annotations.

custom_colours

Provide named vector (or named list of vectors) containing custom annotation colours if you do not want to use standardized pallette.

hideTopBarplot

Optional argument for removing top bar plot. Default value is TRUE.

tally_all_mutations

Optional argument. Set to TRUE to tally all mutations. Default is FALSE.

tally_all_mutations_max

Optional argument. Default is 1000.

hideSideBarplot

Optional argument for removing side bar plot. Default value is FALSE.

box_col

Colour of boxes for outlining mutations (can be problematic with larger oncoprints).

annoAlpha

Optional alpha to apply to annotation colours.

legend_direction

Direction of legend, default is "horizontal".

ylim

Limit for y-axis.

legend_position

Position of legend, default is "bottom".

legend_row

Fiddle with these to widen or narrow your legend.

legend_col

Fiddle with these to widen or narrow your legend.

metadataBarHeight

Optional argument to adjust the height of bar with annotations. The default is 1.5.

metadataBarFontsize

Optional argument to control for the font size of metadata annotations. The default is 5.

legendFontSize

Font size for legend, default is 10.

fontSizeGene

Font size for gene labels (default 6).

annotation_row

Row for annotations, default is 2.

annotation_col

Column for annotations, default is 1.

verbose

Set to TRUE to enable verbose mode (debugging messages.

cluster_rows

Force clustering of genes with correlated mutation patterns

cluster_cols

Force clustering of patients with correlated mutation patterns

clustering_distance_rows

Distance metric used for clustering when cluster_rows = TRUE

clustering_distance_cols

Distance metric used for clustering when cluster_cols = TRUE

split_rows_kmeans

K value for k-means clustering on rows

split_columns_kmeans

K value for k-means clustering on columns

dry_run

Set to TRUE to more efficiently view the clustering result while debugging cluster_rows/clustering_distance_rows

simplify_annotation

Collapse/group the variant effect categories to only 3 options. This is a much faster option for when many patients/genes are included.

simplify_bg_colour

When simplify_annotation is called, adjust the color of the background by passign a value to this argument. Default is NA.

stacked

Deprecated. See [GAMBLR.viz::prettyStackedOncoplot] for this functionality.

return_inputs

Optional flag to return the plot and various other internal objects such as the underlying mutation matrix.

gap

Size of gap between columns represented as a proportion of the full width of the column. Default 0 (no gap).

use_raster

Whether to rasterize image

show_pct

TRUE by default. Set to FALSE to hide percentage.

hide_annotation_name

Default: FALSE

cluster_numeric_rows

Deprecated. See [GAMBLR.viz::prettyStackedOncoplot] for this functionality.

cluster_numeric_cols

Deprecated. See [GAMBLR.viz::prettyStackedOncoplot] for this functionality.

numeric_heatmap_type

Deprecated. See [GAMBLR.viz::prettyStackedOncoplot] for this functionality.

numeric_heatmap_location

Deprecated. See [GAMBLR.viz::prettyStackedOncoplot] for this functionality.

Value

By default, nothing unless return_inputs is specified, in which case it returns a named list that contains different things depending on how the function was run At the very least, it will contain the Heatmap object a logical matrix indicating the mutation status of each gene and patient shown in the output.

Details

Generatss an oncoplot (a.k.a. oncoprint) that is pretty using ComplexHeatmap. The metadata is expected to follow the structure and column naming used in GAMBL. If you provide your own non-GAMBL samples and metadata, you must include at least the following columns with these names. The first one should match the Tumor_Sample_Barcode in the MAF object or onco_matrix you provide. sample_id, pathology

Examples


suppressMessages(
  suppressWarnings({
# load packages
library(grid)
library(dplyr)
library(GAMBLR.open)

# Using GAMBLR.open
maf_metadata <- GAMBLR.open::get_gambl_metadata(
                               seq_type_filter = "genome") %>%
  dplyr::filter(pathology %in% c("FL", "DLBCL"),
                study == "FL_Dreval")


maf_data <- get_coding_ssm(
  these_samples_metadata = maf_metadata
)
}))

# define some genes of interest
fl_genes <- GAMBLR.data::lymphoma_genes %>%
  dplyr::filter(FL_Tier == 1) %>%
  pull(Gene)

dlbcl_genes <- GAMBLR.data::lymphoma_genes %>%
  dplyr::filter(DLBCL_Tier == 1, !Gene %in% fl_genes) %>%
  pull(Gene)

genes <- c(fl_genes, dlbcl_genes)

# For splitting into gene sets
split_genes <- c(rep("FL", length(fl_genes)),
                 rep("DLBCL", length(dlbcl_genes)))
names(split_genes) <- genes

suppressMessages(
  suppressWarnings({
prettyOncoplot(
  maf_df = maf_data, genes = genes,
  these_samples_metadata = maf_metadata,
  splitGeneGroups = split_genes,
  minMutationPercent = 5
)
}))


# Was that too slow for you? Enable the simplify_annotation
# parameter for a quicker result.

suppressMessages(
  suppressWarnings({

prettyOncoplot(
  maf_df = maf_data, genes = genes,
  these_samples_metadata = maf_metadata,
  splitGeneGroups = split_genes,
  minMutationPercent = 5,
  simplify_annotation = TRUE
)

}))


# Want to include copy number? You have two options.
# Option 1:
# Incorporate CN status of specific genes into your oncoplot
# along with mutations.
# There are two ways to go about this.
# The original way involves using the helper function get_cnv_and_ssm_status

gene_regions <- data.frame(
  gene_id = c(
    "REL", "CDKN2A",
    "MIR17HG", "TP53", "ATM", "FAS", "SMARCA4", "B2M", "TNFRSF14",
    "TMEM30A", "TNFAIP3", "BCL2"
  ),
  cn_thresh = c(4, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 3)
)

# this data frame specifies the threshold and directionality for
# each gene's copy number state to display on the oncoplot.
# Amplifications will be shown for REL and MIR17HG, gains
# for BCL2, deletions for the rest
print(gene_regions)
#>     gene_id cn_thresh
#> 1       REL         4
#> 2    CDKN2A         1
#> 3   MIR17HG         4
#> 4      TP53         1
#> 5       ATM         1
#> 6       FAS         1
#> 7   SMARCA4         1
#> 8       B2M         1
#> 9  TNFRSF14         1
#> 10  TMEM30A         1
#> 11  TNFAIP3         1
#> 12     BCL2         3

suppressMessages(
  suppressWarnings({

gene_cnv <- GAMBLR.results::get_cnv_and_ssm_status(
  only_cnv = "all",
  these_samples_metadata = get_gambl_metadata(),
  genes_and_cn_threshs = gene_regions
)




prettyOncoplot(
  maf_df = maf_data, genes = c(
    "CREBBP", "EZH2", "MYD88",
    "TCF3", "BCL2", "BCL7A",
    "MEF2B", "POU2F2", "POU2AF1",
    "ID3", "MYC",
    "RRAGC", "TCL1A", "KMT2D",
    "PIM1", "CD79B", "TMSB4X",
    "TMEM30A", "TNFAIP3"
  ),
  these_samples_metadata = maf_metadata,
  cluster_rows = TRUE,
  metadataColumns = c(
    "pathology",
    "lymphgen",
    "seq_type",
    "ffpe_or_frozen"
  ),
  cluster_cols = FALSE,
  simplify_annotation = TRUE,
  cnv_df = gene_cnv,
  sortByColumns = c("pathology", "lymphgen")
)
}))



# Option 2:
# The second way to incorporate copy number relies
# instead on a binned copy number matrix
# If you already have one on hand, this is clearly the preferred approach!
# First let's make one with the help of segmented_data_to_cn_matrix
if (FALSE) { # \dontrun{
all_segments <- get_cn_segments(these_samples_metadata = maf_metadata)
all_states_binned <- segmented_data_to_cn_matrix(
  seg_data = all_segments,
  strategy = "auto_split",
  n_bins_split = 1000,
  fill_missing_with = "avg_ploidy",
  adjust_for_ploidy = TRUE,
  these_samples_metadata = maf_metadata
)


# Note: adjust_for_ploidy = TRUE ensures the relative
# CN status is used for high-ploidy cases

# as before, we need to specify which genes we want
# CN events shown for and what direction (gain or loss)
# This is done a bit more easily with the genes_CN_thresh option.

CN_thresh <- c(
  "REL" = 4,
  "CDKN2A" = 1,
  "MIR17HG" = 4,
  "TP53" = 1,
  "TNFRSF14" = 1,
  "TNFAIP3" = 1
)


prettyOncoplot(
  maf_df = maf_data,
  binned_cnv_df = all_states_binned,
  genes_CN_thresh = CN_thresh,
  genes = head(genes, 25),
  these_samples_metadata = maf_metadata,
  cluster_rows = TRUE,
  metadataColumns = c(
    "pathology",
    "genetic_subgroup",
    "seq_type",
    "ffpe_or_frozen"
  ),
  cluster_cols = FALSE,
  simplify_annotation = TRUE,
  sortByColumns = c("pathology", "genetic_subgroup"),
  minMutationPercent = 0
)

} # }