Skip to contents

Retrieve all copy number segments from the GAMBL outputs

Usage

get_cn_segments(
  these_samples_metadata,
  projection = "grch37",
  flavour = "combined",
  this_seq_type,
  fill_missing_with = "nothing",
  verbose = FALSE
)

Arguments

these_samples_metadata

User must provide a metadata table to restrict the data to the samples in your table. The metadata also ensures the proper handling of duplicate sample_id across seq_types and ensures the seq_type in the metadata faithfully represents the seq_type of the data

projection

Desired genome coordinate system for returned CN segments. Default is "grch37".

flavour

Specify what pipeline or source of data to use. Available options are "combined" or "battenberg". Battenberg outputs are incomplete.

this_seq_type

Deprecated.

fill_missing_with

Specify how to fill values in dummy segments that were created to satisfy GISTIC. The default is "nothing", which causes these to be dropped so empty regions can be handled in subsequent processing steps. For creating a GISTIC input, you would typically want to set this to "avg_ploidy". This is taken care of for you by GAMBLR.utils::prepare_gistic_inputs

verbose

Set to TRUE for a chattier experience

Value

A data frame with CN segments for the specified region.

Details

This merely loads and returns all the seg_data available for a projection (genome build) and can assign a single value to dummy segments if they are present/identified in the source file

Examples

# Example for just exome/capture samples:
# Get metadata for just a few capture samples
capture_metadata <- suppressMessages(get_gambl_metadata()) %>%
  dplyr::filter(seq_type == "capture") %>%
  head()

# Load the copy number segments for capture samples using hg38 projection
capture_segments_hg38 <- get_cn_segments(
  these_samples_metadata = capture_metadata,
  projection = "hg38"
)
#> dummy segments are not annotated in the inputs
#> fill_missing_with parameter will be ignored
print(capture_segments_hg38)
#> SEG Data Object
#> Genome Build: hg38 
#> Showing first 10 rows:
#>                 ID chrom    start      end LOH_flag log.ratio seg_seq_type
#> 1  00-22011_tumorB  chr1    10001    69372       NA    0.0000      capture
#> 2  00-22011_tumorB  chr1    69373 10335564       NA    1.0000      capture
#> 3  00-22011_tumorB  chr1 10335564 12723042       NA    0.0000      capture
#> 4  00-22011_tumorB  chr1 12723043 13184564       NA    0.0000      capture
#> 5  00-22011_tumorB  chr1 13184565 33302847       NA    0.0000      capture
#> 6  00-22011_tumorB  chr1 33302847 33309697       NA    2.5453      capture
#> 7  00-22011_tumorB  chr1 33309697 40850005       NA    0.0000      capture
#> 8  00-22011_tumorB  chr1 40850005 40922631       NA    2.8912      capture
#> 9  00-22011_tumorB  chr1 40922631 47396477       NA    0.0000      capture
#> 10 00-22011_tumorB  chr1 47396477 47439416       NA    2.0000      capture
#>          CN
#> 1   2.00000
#> 2   4.00000
#> 3   2.00000
#> 4   2.00000
#> 5   2.00000
#> 6  11.67459
#> 7   2.00000
#> 8  14.83775
#> 9   2.00000
#> 10  8.00000

genome_metadata <- suppressMessages(get_gambl_metadata()) %>%
  dplyr::filter(seq_type == "genome") %>%
  head()
# Create a metadata table with a mix of seq_types
mixed_seq_type_meta <- dplyr::bind_rows(capture_metadata, genome_metadata)
## We can load the copy number segments for all samples across seq_types
capture_segments_default <- get_cn_segments(
  these_samples_metadata = mixed_seq_type_meta
)
#> dummy segments are not annotated in the inputs
#> fill_missing_with parameter will be ignored
dplyr::group_by(capture_segments_default, ID) %>%
  dplyr::summarize(n = dplyr::n())
#> # A tibble: 12 × 2
#>    ID                    n
#>    <chr>             <int>
#>  1 00-12637_CLC02086   116
#>  2 00-14595_tumorB     144
#>  3 00-14595_tumorC     166
#>  4 00-14595_tumorD     142
#>  5 00-22011_tumorB     598
#>  6 FL1011T1          22548
#>  7 FL1011T2            134
#>  8 PA003               225
#>  9 PA007               297
#> 10 PA065              3903
#> 11 PA081               200
#> 12 PTCL002_tumour      195
# Note the default projection is "grch37"
print(capture_segments_default)
#> SEG Data Object
#> Genome Build: grch37 
#> Showing first 10 rows:
#>                 ID chrom    start      end LOH_flag log.ratio seg_seq_type
#> 1  00-22011_tumorB     1    10001    69372       NA    0.0000      capture
#> 2  00-22011_tumorB     1    69373 10395622       NA    1.0000      capture
#> 3  00-22011_tumorB     1 10395622 12783034       NA    0.0000      capture
#> 4  00-22011_tumorB     1 12783034 13292951       NA   -2.0000      capture
#> 5  00-22011_tumorB     1 13292951 33768448       NA    0.0000      capture
#> 6  00-22011_tumorB     1 33768448 33775298       NA    2.5453      capture
#> 7  00-22011_tumorB     1 33775298 41315677       NA    0.0000      capture
#> 8  00-22011_tumorB     1 41315677 41388303       NA    2.8912      capture
#> 9  00-22011_tumorB     1 41388303 47862149       NA    0.0000      capture
#> 10 00-22011_tumorB     1 47862149 47905088       NA    2.0000      capture
#>          CN
#> 1   2.00000
#> 2   4.00000
#> 3   2.00000
#> 4   0.50000
#> 5   2.00000
#> 6  11.67459
#> 7   2.00000
#> 8  14.83775
#> 9   2.00000
#> 10  8.00000