Skip to contents

Retrieve Manta SVs for one or many samples

Usage

get_manta_sv(
  these_samples_metadata = NULL,
  projection = "grch37",
  region,
  min_vaf = 0.1,
  min_score = 40,
  pass_filters = TRUE,
  verbose = FALSE,
  chromosome,
  qstart,
  qend,
  pairing_status,
  these_sample_ids = NULL,
  ...
)

Arguments

these_samples_metadata

A metadata data frame to limit the result to sample_ids within it

projection

The projection genome build. Default is grch37.

region

Specify a single region to fetch SVs anchored within using the format "chrom:start-end"

min_vaf

The minimum tumour VAF for a SV to be returned. Default is 0.1.

min_score

The lowest Manta somatic score for a SV to be returned. Default is 40.

pass_filters

If TRUE (default) only return SVs that are annotated with PASS in the FILTER column. Set to FALSE to keep all variants, regardless if they PASS the filters.

verbose

Set to FALSE to minimize the output to console. Default is TRUE. This parameter also dictates the verbose-ness of any helper function internally called inside the main function.

chromosome

DEPRECATED. Use region instead.

qstart

DEPRECATED. Use region instead.

qend

DEPRECATED. Use region instead.

pairing_status

DEPRECATED.

these_sample_ids

DEPRECATED. Subset your metadata and supply `these_samples_metadata“ instead.

...

Any additional parameters.

Details

Retrieve Manta SVs with additional VCF information to allow for filtering of high-confidence variants. To get SV calls for multiple samples, supply a metadata table via these_samples_metadata that has been subset to only those samples. The results will be restricted to the sample_ids within that data frame. This function can also restrict the returned breakpoints within a genomic region specified via region (in chr:start-end format). Useful filtering parameters are also available, use min_vaf to set the minimum tumour VAF for a SV to be returned and min_score to set the lowest Manta somatic score for a SV to be returned. In addition, the user can chose to return all variants, even the ones not passing the filter criteria. To do so, set pass_filters = FALSE (defaults to TRUE).

Examples

# lazily get every SV in the table with default quality filters
all_sv <- get_manta_sv()
#> Using the bundled metadata in GAMBLR.data...
#> Using the bundled Manta SV (.bedpe) calls in GAMBLR.data...
head(all_sv)
#> genomic_data Object
#> Genome Build: grch37 
#> Showing first 10 rows:
#>   CHROM_A   START_A     END_A CHROM_B   START_B     END_B
#> 1       1 161658631 161658631       3  16509907  16509907
#> 2       1 161663959 161663959       9  37363320  37363320
#> 3       1 161663959 161663959       9  37363320  37363320
#> 4      11  65267283  65267283      14 106110907 106110907
#> 5      11  65267422  65267422      14 106110905 106110905
#> 6      13  91976545  91976545      14 106211857 106211857
#>                      manta_name SCORE STRAND_A STRAND_B tumour_sample_id
#> 1      MantaBND:21171:0:1:0:0:0   133        +        +         FL2002T1
#> 2     MantaBND:206628:0:1:0:0:0   122        +        +  09-15842_tumorA
#> 3     MantaBND:195941:0:1:0:0:0   151        +        +  09-15842_tumorB
#> 4   MantaBND:152220:0:1:0:0:0:0    88        +        -        15-38154T
#> 5   MantaBND:152220:0:1:0:0:0:0   135        -        +        15-38154T
#> 6 MantaBND:18:59794:59817:0:1:0    90        -        +        15-31924T
#>   normal_sample_id VAF_tumour  DP pair_status FILTER
#> 1          FL2002N      0.331 127     matched   PASS
#> 2  09-15842_normal      0.281 196     matched   PASS
#> 3  09-15842_normal      0.364 187     matched   PASS
#> 4        15-38154N      0.150 167     matched   PASS
#> 5        15-38154N      0.290 169     matched   PASS
#> 6        15-31924N      0.365  85     matched   PASS

# get all SVs for just one cohort
cohort_meta = suppressMessages(get_gambl_metadata()) %>% 
              dplyr::filter(cohort == "DLBCL_cell_lines")

some_sv <- get_manta_sv(these_samples_metadata = cohort_meta, verbose=FALSE)
#> Using the bundled Manta SV (.bedpe) calls in GAMBLR.data...
head(some_sv)
#> genomic_data Object
#> Genome Build: grch37 
#> Showing first 10 rows:
#>   CHROM_A   START_A     END_A CHROM_B  START_B    END_B
#> 1      14 106329462 106329462      18 60774579 60774579
#> 2      14 106329465 106329465      18 60793497 60793497
#> 3      14 106330466 106330466      18 60793914 60793914
#> 4      14 106349765 106349765      18 60793914 60793914
#> 5      14 106379091 106379091      18 60793492 60793492
#> 6      14 106380227 106380227      18 60774578 60774578
#>                  manta_name SCORE STRAND_A STRAND_B tumour_sample_id
#> 1 MantaBND:220769:1:2:0:0:0   134        +        -        SU-DHL-10
#> 2 MantaBND:194451:1:2:0:0:0   103        +        -           DOHH-2
#> 3 MantaBND:217561:1:2:0:0:0   182        +        -         SU-DHL-4
#> 4 MantaBND:217561:0:1:0:0:0   198        -        +         SU-DHL-4
#> 5 MantaBND:194451:0:1:0:0:0    91        -        +           DOHH-2
#> 6 MantaBND:220769:0:1:0:0:0   169        -        +        SU-DHL-10
#>   normal_sample_id VAF_tumour DP pair_status FILTER
#> 1        14-11247N      0.318 66   unmatched   PASS
#> 2        14-11247N      0.290 69   unmatched   PASS
#> 3        14-11247N      0.474 57   unmatched   PASS
#> 4        14-11247N      0.500 62   unmatched   PASS
#> 5        14-11247N      0.300 60   unmatched   PASS
#> 6        14-11247N      0.578 45   unmatched   PASS
nrow(some_sv)
#> [1] 13

# get the SVs in a region around MYC
# WARNING: This is not the best way to find MYC SVs.
# Use annotate_sv on the full SV set instead.
myc_region_hg38 = "chr8:127710883-127761821"
myc_region_grch37 = "8:128723128-128774067"

hg38_myc_locus_sv <- get_manta_sv(region = myc_region_hg38,
                                projection = "hg38",
                                verbose = FALSE)
#> Using the bundled metadata in GAMBLR.data...
#> Using the bundled Manta SV (.bedpe) calls in GAMBLR.data...
head(hg38_myc_locus_sv)
#> genomic_data Object
#> Genome Build: hg38 
#> Showing first 10 rows:
#>   CHROM_A   START_A     END_A CHROM_B   START_B     END_B
#> 1    chr2  88860304  88860306    chr8 127751936 127751938
#> 2    chr2  88860417  88860417    chr8 127751955 127751955
#> 3    chr2  88861500  88861500    chr8 127748752 127748752
#> 4    chr3 187811601 187811601    chr8 127745649 127745649
#> 5    chr8 127741233 127741234   chr12  25049104  25049105
#> 6    chr8 127713694 127713694   chr14 105857950 105857950
#>                     manta_name SCORE STRAND_A STRAND_B
#> 1  MantaBND:194837:0:1:0:0:0:0   102        +        +
#> 2  MantaBND:194837:0:1:0:0:0:0    73        -        -
#> 3   MantaBND:1102030:0:1:0:0:0    89        +        +
#> 4     MantaBND:48510:0:2:0:0:0   106        -        +
#> 5    MantaBND:174836:0:1:0:0:0   219        +        +
#> 6 MantaBND:1:10030:23823:0:0:0   109        -        +
#>            tumour_sample_id          normal_sample_id VAF_tumour  DP
#> 1 BLGSP-71-27-00414-01A-01E BLGSP-71-27-00414-10A-01D      0.171 280
#> 2 BLGSP-71-27-00414-01A-01E BLGSP-71-27-00414-10A-01D      0.117 230
#> 3 BLGSP-71-30-00647-01A-01E BLGSP-71-06-00286-99A-01D      0.283  46
#> 4                  FL1008T2                   FL1008N      0.171 245
#> 5                  FL1018T2                   FL1018N      0.323 288
#> 6 BLGSP-71-06-00280-01A-01D BLGSP-71-06-00280-99A-01D      0.272 235
#>   pair_status FILTER
#> 1     matched   PASS
#> 2     matched   PASS
#> 3   unmatched   PASS
#> 4     matched   PASS
#> 5     matched   PASS
#> 6     matched   PASS
nrow(hg38_myc_locus_sv)
#> [1] 260

incorrect_myc_locus_sv <- get_manta_sv(region = myc_region_grch37,
                                projection = "hg38",
                                verbose = FALSE)
#> Using the bundled metadata in GAMBLR.data...
#> Using the bundled Manta SV (.bedpe) calls in GAMBLR.data...
head(incorrect_myc_locus_sv)
#> genomic_data Object
#> Genome Build: hg38 
#> Showing first 10 rows:
#>  [1] CHROM_A          START_A          END_A            CHROM_B         
#>  [5] START_B          END_B            manta_name       SCORE           
#>  [9] STRAND_A         STRAND_B         tumour_sample_id normal_sample_id
#> [13] VAF_tumour       DP               pair_status      FILTER          
#> <0 rows> (or 0-length row.names)
nrow(incorrect_myc_locus_sv)
#> [1] 0
# The effect of specifying the wrong coordinate is evident

# Despite potentially being incomplete, we can nonetheless
# annotate these directly for more details
annotated_myc_hg38 = suppressMessages(
         annotate_sv(hg38_myc_locus_sv, genome_build = "hg38")
)
head(annotated_myc_hg38)
#>   chrom1    start1      end1 chrom2    start2      end2 name score strand1
#> 1      8 127741233 127741234     12  25049104  25049105    .   219       +
#> 2      8 127713694 127713694     14 105857950 105857950    .   109       -
#> 3      8 127716025 127716934     14 105862581 105863164    .   112       +
#> 4      8 127716523 127716523     14 105862757 105862757    .   173       -
#> 5      8 127718148 127718148     14 105860256 105860256    .   152       +
#> 6      8 127718150 127718150     14 105860564 105860564    .   163       -
#>   strand2          tumour_sample_id gene partner   fusion
#> 1       +                  FL1018T2  MYC    LRMP LRMP-MYC
#> 2       + BLGSP-71-06-00280-01A-01D  MYC     IGH  IGH-MYC
#> 3       - BLGSP-71-06-00084-01A-01D  MYC     IGH  IGH-MYC
#> 4       + BLGSP-71-06-00084-01A-01D  MYC     IGH  IGH-MYC
#> 5       - BLGSP-71-08-00036-01A-01D  MYC     IGH  IGH-MYC
#> 6       + BLGSP-71-08-00036-01A-01D  MYC     IGH  IGH-MYC
table(annotated_myc_hg38$partner)
#> 
#> BCL6  DMD  IGH  IGK  IGL LRMP PAX5 
#>    1    2  247    3    2    1    4 
# The usual MYC partners are seen here