Get Manta SVs
get_manta_sv.Rd
Retrieve Manta SVs for one or many samples
Usage
get_manta_sv(
these_samples_metadata = NULL,
projection = "grch37",
region,
min_vaf = 0.1,
min_score = 40,
pass_filters = TRUE,
verbose = FALSE,
chromosome,
qstart,
qend,
pairing_status,
these_sample_ids = NULL,
...
)
Arguments
- these_samples_metadata
A metadata data frame to limit the result to sample_ids within it
- projection
The projection genome build. Default is grch37.
- region
Specify a single region to fetch SVs anchored within using the format "chrom:start-end"
- min_vaf
The minimum tumour VAF for a SV to be returned. Default is 0.1.
- min_score
The lowest Manta somatic score for a SV to be returned. Default is 40.
- pass_filters
If TRUE (default) only return SVs that are annotated with PASS in the FILTER column. Set to FALSE to keep all variants, regardless if they PASS the filters.
- verbose
Set to FALSE to minimize the output to console. Default is TRUE. This parameter also dictates the verbose-ness of any helper function internally called inside the main function.
- chromosome
DEPRECATED. Use
region
instead.- qstart
DEPRECATED. Use
region
instead.- qend
DEPRECATED. Use
region
instead.- pairing_status
DEPRECATED.
- these_sample_ids
DEPRECATED. Subset your metadata and supply `these_samples_metadata“ instead.
- ...
Any additional parameters.
Details
Retrieve Manta SVs with additional VCF information to allow for
filtering of high-confidence variants.
To get SV calls for multiple samples, supply a metadata table via
these_samples_metadata
that has been subset to only those samples.
The results will be restricted to the sample_ids within that data frame.
This function can also restrict the returned breakpoints within a genomic
region specified via region
(in chr:start-end format).
Useful filtering parameters are also available, use min_vaf
to set the
minimum tumour VAF for a SV to be returned and min_score
to set the lowest Manta somatic score for a SV to be returned.
In addition, the user can chose to return all variants, even
the ones not passing the filter criteria. To do so,
set pass_filters = FALSE
(defaults to TRUE).
Examples
# lazily get every SV in the table with default quality filters
all_sv <- get_manta_sv()
#> Using the bundled metadata in GAMBLR.data...
#> Using the bundled Manta SV (.bedpe) calls in GAMBLR.data...
head(all_sv)
#> genomic_data Object
#> Genome Build: grch37
#> Showing first 10 rows:
#> CHROM_A START_A END_A CHROM_B START_B END_B
#> 1 1 161658631 161658631 3 16509907 16509907
#> 2 1 161663959 161663959 9 37363320 37363320
#> 3 1 161663959 161663959 9 37363320 37363320
#> 4 11 65267283 65267283 14 106110907 106110907
#> 5 11 65267422 65267422 14 106110905 106110905
#> 6 13 91976545 91976545 14 106211857 106211857
#> manta_name SCORE STRAND_A STRAND_B tumour_sample_id
#> 1 MantaBND:21171:0:1:0:0:0 133 + + FL2002T1
#> 2 MantaBND:206628:0:1:0:0:0 122 + + 09-15842_tumorA
#> 3 MantaBND:195941:0:1:0:0:0 151 + + 09-15842_tumorB
#> 4 MantaBND:152220:0:1:0:0:0:0 88 + - 15-38154T
#> 5 MantaBND:152220:0:1:0:0:0:0 135 - + 15-38154T
#> 6 MantaBND:18:59794:59817:0:1:0 90 - + 15-31924T
#> normal_sample_id VAF_tumour DP pair_status FILTER
#> 1 FL2002N 0.331 127 matched PASS
#> 2 09-15842_normal 0.281 196 matched PASS
#> 3 09-15842_normal 0.364 187 matched PASS
#> 4 15-38154N 0.150 167 matched PASS
#> 5 15-38154N 0.290 169 matched PASS
#> 6 15-31924N 0.365 85 matched PASS
# get all SVs for just one cohort
cohort_meta = suppressMessages(get_gambl_metadata()) %>%
dplyr::filter(cohort == "DLBCL_cell_lines")
some_sv <- get_manta_sv(these_samples_metadata = cohort_meta, verbose=FALSE)
#> Using the bundled Manta SV (.bedpe) calls in GAMBLR.data...
head(some_sv)
#> genomic_data Object
#> Genome Build: grch37
#> Showing first 10 rows:
#> CHROM_A START_A END_A CHROM_B START_B END_B
#> 1 14 106329462 106329462 18 60774579 60774579
#> 2 14 106329465 106329465 18 60793497 60793497
#> 3 14 106330466 106330466 18 60793914 60793914
#> 4 14 106349765 106349765 18 60793914 60793914
#> 5 14 106379091 106379091 18 60793492 60793492
#> 6 14 106380227 106380227 18 60774578 60774578
#> manta_name SCORE STRAND_A STRAND_B tumour_sample_id
#> 1 MantaBND:220769:1:2:0:0:0 134 + - SU-DHL-10
#> 2 MantaBND:194451:1:2:0:0:0 103 + - DOHH-2
#> 3 MantaBND:217561:1:2:0:0:0 182 + - SU-DHL-4
#> 4 MantaBND:217561:0:1:0:0:0 198 - + SU-DHL-4
#> 5 MantaBND:194451:0:1:0:0:0 91 - + DOHH-2
#> 6 MantaBND:220769:0:1:0:0:0 169 - + SU-DHL-10
#> normal_sample_id VAF_tumour DP pair_status FILTER
#> 1 14-11247N 0.318 66 unmatched PASS
#> 2 14-11247N 0.290 69 unmatched PASS
#> 3 14-11247N 0.474 57 unmatched PASS
#> 4 14-11247N 0.500 62 unmatched PASS
#> 5 14-11247N 0.300 60 unmatched PASS
#> 6 14-11247N 0.578 45 unmatched PASS
nrow(some_sv)
#> [1] 13
# get the SVs in a region around MYC
# WARNING: This is not the best way to find MYC SVs.
# Use annotate_sv on the full SV set instead.
myc_region_hg38 = "chr8:127710883-127761821"
myc_region_grch37 = "8:128723128-128774067"
hg38_myc_locus_sv <- get_manta_sv(region = myc_region_hg38,
projection = "hg38",
verbose = FALSE)
#> Using the bundled metadata in GAMBLR.data...
#> Using the bundled Manta SV (.bedpe) calls in GAMBLR.data...
head(hg38_myc_locus_sv)
#> genomic_data Object
#> Genome Build: hg38
#> Showing first 10 rows:
#> CHROM_A START_A END_A CHROM_B START_B END_B
#> 1 chr2 88860304 88860306 chr8 127751936 127751938
#> 2 chr2 88860417 88860417 chr8 127751955 127751955
#> 3 chr2 88861500 88861500 chr8 127748752 127748752
#> 4 chr3 187811601 187811601 chr8 127745649 127745649
#> 5 chr8 127741233 127741234 chr12 25049104 25049105
#> 6 chr8 127713694 127713694 chr14 105857950 105857950
#> manta_name SCORE STRAND_A STRAND_B
#> 1 MantaBND:194837:0:1:0:0:0:0 102 + +
#> 2 MantaBND:194837:0:1:0:0:0:0 73 - -
#> 3 MantaBND:1102030:0:1:0:0:0 89 + +
#> 4 MantaBND:48510:0:2:0:0:0 106 - +
#> 5 MantaBND:174836:0:1:0:0:0 219 + +
#> 6 MantaBND:1:10030:23823:0:0:0 109 - +
#> tumour_sample_id normal_sample_id VAF_tumour DP
#> 1 BLGSP-71-27-00414-01A-01E BLGSP-71-27-00414-10A-01D 0.171 280
#> 2 BLGSP-71-27-00414-01A-01E BLGSP-71-27-00414-10A-01D 0.117 230
#> 3 BLGSP-71-30-00647-01A-01E BLGSP-71-06-00286-99A-01D 0.283 46
#> 4 FL1008T2 FL1008N 0.171 245
#> 5 FL1018T2 FL1018N 0.323 288
#> 6 BLGSP-71-06-00280-01A-01D BLGSP-71-06-00280-99A-01D 0.272 235
#> pair_status FILTER
#> 1 matched PASS
#> 2 matched PASS
#> 3 unmatched PASS
#> 4 matched PASS
#> 5 matched PASS
#> 6 matched PASS
nrow(hg38_myc_locus_sv)
#> [1] 260
incorrect_myc_locus_sv <- get_manta_sv(region = myc_region_grch37,
projection = "hg38",
verbose = FALSE)
#> Using the bundled metadata in GAMBLR.data...
#> Using the bundled Manta SV (.bedpe) calls in GAMBLR.data...
head(incorrect_myc_locus_sv)
#> genomic_data Object
#> Genome Build: hg38
#> Showing first 10 rows:
#> [1] CHROM_A START_A END_A CHROM_B
#> [5] START_B END_B manta_name SCORE
#> [9] STRAND_A STRAND_B tumour_sample_id normal_sample_id
#> [13] VAF_tumour DP pair_status FILTER
#> <0 rows> (or 0-length row.names)
nrow(incorrect_myc_locus_sv)
#> [1] 0
# The effect of specifying the wrong coordinate is evident
# Despite potentially being incomplete, we can nonetheless
# annotate these directly for more details
annotated_myc_hg38 = suppressMessages(
annotate_sv(hg38_myc_locus_sv, genome_build = "hg38")
)
head(annotated_myc_hg38)
#> chrom1 start1 end1 chrom2 start2 end2 name score strand1
#> 1 8 127741233 127741234 12 25049104 25049105 . 219 +
#> 2 8 127713694 127713694 14 105857950 105857950 . 109 -
#> 3 8 127716025 127716934 14 105862581 105863164 . 112 +
#> 4 8 127716523 127716523 14 105862757 105862757 . 173 -
#> 5 8 127718148 127718148 14 105860256 105860256 . 152 +
#> 6 8 127718150 127718150 14 105860564 105860564 . 163 -
#> strand2 tumour_sample_id gene partner fusion
#> 1 + FL1018T2 MYC LRMP LRMP-MYC
#> 2 + BLGSP-71-06-00280-01A-01D MYC IGH IGH-MYC
#> 3 - BLGSP-71-06-00084-01A-01D MYC IGH IGH-MYC
#> 4 + BLGSP-71-06-00084-01A-01D MYC IGH IGH-MYC
#> 5 - BLGSP-71-08-00036-01A-01D MYC IGH IGH-MYC
#> 6 + BLGSP-71-08-00036-01A-01D MYC IGH IGH-MYC
table(annotated_myc_hg38$partner)
#>
#> BCL6 DMD IGH IGK IGL LRMP PAX5
#> 1 2 247 3 2 1 4
# The usual MYC partners are seen here