Get Combined SV.
get_combined_sv.Rd
Retrieve Combined Manta and GRIDSS-derived SVs.
Usage
get_combined_sv(
these_samples_metadata,
projection = "grch37",
oncogenes,
region,
min_vaf = 0,
these_sample_ids
)
Arguments
- these_samples_metadata
a GAMBL metadata frame containing only the samples you want returned
- projection
The projection genome build. Default is "grch37".
- oncogenes
A character vector of genes commonly involved in translocations.
- region
Optional, region formatted like chrX:1234-5678 (chromosome can be prefixed or not prefixed) instead of specifying chromosome, start and end separately.
- min_vaf
The minimum tumour VAF for a SV to be returned. Recommended: 0. (default: 0)
- these_sample_ids
DEPRECATED
You may want to include one or more e.g.: c("CCND1", "BCL2","MYC") NOTE: If you are looking for SV affecting oncogenes you are more likely going to want to pass the full output to GAMBLR.utils::annotate_sv
Value
A data frame in a bedpe-like format with additional columns that allow filtering of high-confidence SVs.
Details
The bedpe files this function loads were pre-filtered for a minimum VAF of 0.05, and SVs affecting common translocation regions (BCL2, BCL6, MYC, CCND1) were whitelisted (e.g. no VAF filter applied). Therefore if you wish to post-filter the SVs we recommend doing so as needed. Further, the input bedpe file is annotated with oncogenes and superenhancers from naive and germinal centre B-cells. You can subset to events affecting certain loci using the "oncogenes" argument. Try one of the following, similar, functions; get_manta_sv
Examples
# lazily get every SV in the table with default quality filters
all_sv <- get_combined_sv()
dplyr::select(all_sv,1:14)
#> genomic_data Object
#> Genome Build: grch37
#> Showing first 10 rows:
#> CHROM_A START_A END_A CHROM_B START_B END_B
#> 1 1 9894 10014 16 69889 69998
#> 2 1 10173 10984 12 94748 95533
#> 3 1 10176 10711 11 134946192 134946783
#> 4 1 10269 10794 16 59726 60283
#> 5 1 10285 10286 8 146301390 146301391
#> 6 1 10287 10688 15 102521012 102521550
#> 7 1 10308 10837 12 95037 95505
#> 8 1 10347 10631 15 102520227 102520677
#> 9 1 10437 10438 8 146301390 146301391
#> 10 1 10437 10438 8 146301390 146301391
#> manta_name SCORE STRAND_A STRAND_B tumour_sample_id
#> 1 MantaBND:10:1901:1907:0:0:0:1 NA - + 00-14595_tumorD
#> 2 MantaBND:1:5198:6695:0:0:0:1 NA + + CLC03470
#> 3 MantaBND:23:3075:4071:0:0:0:1 NA + + 14-14094T
#> 4 MantaBND:3:3538:3557:0:0:0:1 NA + - 07-13339T
#> 5 MantaBND:5:1923:1927:0:0:0:1 NA + + 09-41114T
#> 6 MantaBND:13192:0:1:0:0:0:0 NA + + 835-02-03TD
#> 7 MantaBND:1:6049:6050:1:0:0:0 NA + + 4687-03-01BD
#> 8 MantaBND:11:3940:4135:0:0:0:1 NA - - 12-34927T
#> 9 MantaBND:2:7221:7224:0:1:0:1 NA + + 102-01-01TD
#> 10 MantaBND:2:1723:1728:0:0:0:0 NA + + 102-0202-1DVT
#> normal_sample_id VAF_tumour DP
#> 1 00-14595_normal 0.400 10
#> 2 14-11247N 0.136 44
#> 3 14-11247N 0.186 59
#> 4 14-11247N 0.175 40
#> 5 14-11247N 0.118 110
#> 6 14-11247Normal 0.312 32
#> 7 14-11247Normal 0.250 52
#> 8 14-11247N 0.135 104
#> 9 14-11247Normal 0.520 25
#> 10 14-11247Normal 0.630 27
# get all SVs for just one cohort
cohort_meta = suppressMessages(get_gambl_metadata()) %>%
dplyr::filter(cohort == "DLBCL_cell_lines")
some_sv <- get_combined_sv(these_samples_metadata = cohort_meta)
dplyr::select(some_sv,1:14)
#> genomic_data Object
#> Genome Build: grch37
#> Showing first 10 rows:
#> CHROM_A START_A END_A CHROM_B START_B END_B
#> 1 1 1346152 1346155 1 111802594 111802597
#> 2 1 1739478 1739484 1 1770671 1770677
#> 3 1 1826839 1826841 1 1833712 1833714
#> 4 1 1827338 1827339 1 8741628 8741629
#> 5 1 2317295 2317296 1 16648172 16648173
#> 6 1 4472986 4472989 1 6530057 6530060
#> 7 1 6290197 6290198 1 16693372 16693373
#> 8 1 6438168 6438173 1 6445895 6445900
#> 9 1 6438168 6438173 1 6445895 6445900
#> 10 1 10005435 10005437 4 28617083 28617085
#> manta_name SCORE STRAND_A STRAND_B tumour_sample_id
#> 1 MantaBND:14352:0:1:0:0:0:1 NA + + HBL-1
#> 2 MantaDUP:TANDEM:14103:0:1:0:0:0_bp1 NA - + OCI-Ly10
#> 3 MantaBND:14359:0:1:0:0:0:1 NA + + SU-DHL-4
#> 4 MantaBND:14359:0:2:0:0:0:0 NA - - SU-DHL-4
#> 5 MantaBND:12526:0:1:0:0:0:1 NA + + SU-DHL-6
#> 6 MantaDEL:15336:0:1:0:1:0_bp1 NA + - SU-DHL-5
#> 7 MantaBND:12875:0:1:0:0:0:0 NA - - SU-DHL-6
#> 8 MantaDEL:16045:0:1:0:0:0_bp1 NA + - HT
#> 9 MantaDEL:15520:0:1:0:0:0_bp1 NA + - SU-DHL-5
#> 10 MantaBND:15925:0:1:0:0:0:0 NA - - WSU-NHL
#> normal_sample_id VAF_tumour DP
#> 1 14-11247N 0.189 95
#> 2 14-11247N 0.193 119
#> 3 14-11247N 0.213 94
#> 4 14-11247N 0.300 80
#> 5 14-11247N 0.323 62
#> 6 14-11247N 0.345 113
#> 7 14-11247N 0.234 77
#> 8 14-11247N 0.247 81
#> 9 14-11247N 1.000 30
#> 10 14-11247N 0.176 91
nrow(some_sv)
#> [1] 3519
# get the SVs in a region around MYC
# WARNING: This is not the best way to find MYC SVs.
# Use annotate_sv on the full SV set instead.
myc_region_hg38 = "chr8:127710883-127761821"
myc_region_grch37 = "8:128723128-128774067"
hg38_myc_locus_sv <- get_combined_sv(region = myc_region_hg38,
projection = "hg38")
dplyr::select(hg38_myc_locus_sv,1:14)
#> genomic_data Object
#> Genome Build: hg38
#> Showing first 10 rows:
#> CHROM_A START_A END_A CHROM_B START_B END_B
#> 1 chr1 6803016 6803030 chr8 127731317 127731331
#> 2 chr1 8253139 8253144 chr8 127756470 127756475
#> 3 chr1 50392925 50392927 chr8 127747754 127747756
#> 4 chr1 100041646 100041649 chr8 127753200 127753203
#> 5 chr1 149968016 149968030 chr8 127720533 127720547
#> 6 chr1 180261380 180261390 chr8 127747224 127747234
#> 7 chr1 182119529 182119530 chr8 127747267 127747268
#> 8 chr1 202928006 202928020 chr8 127747224 127747238
#> 9 chr1 207726976 207727002 chr8 127724887 127724913
#> 10 chr1 209800781 209800785 chr8 127753804 127753808
#> manta_name SCORE STRAND_A STRAND_B
#> 1 <NA> 208.20 - +
#> 2 <NA> 308.00 - +
#> 3 <NA> 253.09 - +
#> 4 <NA> 211.34 - +
#> 5 <NA> 335.95 - +
#> 6 MantaBND:2:133568:133570:0:2:0:0 NA - +
#> 7 <NA> 226.75 + -
#> 8 <NA> 419.03 + -
#> 9 <NA> 204.27 - +
#> 10 MantaBND:0:549563:738680:0:1:0:0 NA + -
#> tumour_sample_id normal_sample_id VAF_tumour DP
#> 1 01-12047_tumorA 01-12047_normal 0.10396040 202
#> 2 BLGSP-71-30-00665-01A-01E BLGSP-71-30-00665-10A-01D 0.08870968 124
#> 3 BLGSP-71-30-00678-01A-01E BLGSP-71-06-00286-99A-01D 0.05128205 195
#> 4 BLGSP-71-30-00656-01A-01E BLGSP-71-06-00286-99A-01D 0.05241935 248
#> 5 13-38657_tumorB 13-38657_normal 0.10800000 250
#> 6 11-12873_tumorC 11-12873_normal 0.05900000 102
#> 7 BLGSP-71-30-00655-01A-01E BLGSP-71-06-00286-99A-01D 0.05555556 198
#> 8 14-11777_tumorB 14-11777N 0.03921569 459
#> 9 BLGSP-71-30-00661-01A-01E BLGSP-71-06-00286-99A-01D 0.07253886 193
#> 10 BLGSP-71-30-00647-01A-01E BLGSP-71-06-00286-99A-01D 0.13200000 38
nrow(hg38_myc_locus_sv)
#> [1] 683
incorrect_myc_locus_sv <- get_combined_sv(region = myc_region_grch37,
projection = "hg38")
dplyr::select(incorrect_myc_locus_sv,1:14)
#> genomic_data Object
#> Genome Build: hg38
#> Showing first 10 rows:
#> CHROM_A START_A END_A CHROM_B START_B END_B
#> 1 chr1 182655283 182655290 chr8 128767396 128767403
#> 2 chr1 233914485 233914492 chr8 128773743 128773750
#> 3 chr11 73459284 73459290 chr8 128758908 128758914
#> 4 chr11 93629111 93629307 chr8 128726629 128726825
#> 5 chr11 93629111 93629647 chr8 128726343 128727380
#> 6 chr11 93629111 93629641 chr8 128726295 128727325
#> 7 chr11 93629111 93629613 chr8 128726323 128727499
#> 8 chr11 93629111 93629581 chr8 128726380 128727086
#> 9 chr11 93629111 93629568 chr8 128726370 128727275
#> 10 chr11 93629111 93629568 chr8 128726368 128727133
#> manta_name SCORE STRAND_A STRAND_B tumour_sample_id
#> 1 MantaBND:203817:0:1:0:0:0:0 NA - - 12-23835_tumorA
#> 2 MantaBND:306088:0:1:0:0:0:0 NA + - 05-16093_tumorA
#> 3 <NA> 259.43 - + 01-23117_tumorB
#> 4 MantaBND:202456:0:1:0:0:0:1 NA + - 00-12637_CLC02086
#> 5 MantaBND:28037:1:9:0:0:0:1 NA + - 01-20774T
#> 6 MantaBND:0:278162:278290:0:0:0:1 NA + - CLC03336
#> 7 MantaBND:170141:0:1:0:0:0:1 NA + - CLC03338
#> 8 MantaBND:204178:0:1:0:0:0:1 NA + - CLC03454
#> 9 MantaBND:157518:0:3:0:0:0:0 NA + - CLC03455
#> 10 MantaBND:191300:0:1:0:0:0:1 NA + - CLC03456
#> normal_sample_id VAF_tumour DP
#> 1 12-23835_normal 0.0530000 114
#> 2 05-16093_normal 0.0780000 77
#> 3 01-23117_normal 0.1383929 224
#> 4 FL1011N 0.0980000 61
#> 5 14-11247N 0.2800000 25
#> 6 14-11247N 0.2570000 179
#> 7 14-11247N 0.1150000 139
#> 8 14-11247N 0.2710000 140
#> 9 14-11247N 0.2680000 97
#> 10 14-11247N 0.1850000 151
nrow(incorrect_myc_locus_sv)
#> [1] 51
# Despite potentially being incomplete, we can nonetheless
# annotate these directly for more details
annotated_myc_hg38 = suppressMessages(
GAMBLR.utils::annotate_sv(hg38_myc_locus_sv, genome_build = "hg38")
)
head(annotated_myc_hg38)
#> chrom1 start1 end1 chrom2 start2 end2 name score strand1
#> 1 18 63123493 63123497 8 127728389 127728393 . 1742.35 +
#> 2 18 63195263 63195266 8 127744561 127744564 . NA -
#> 3 2 28983232 28983241 8 127711263 127711272 . NA -
#> 4 3 70834666 70834671 8 127750318 127750323 . 249.44 -
#> 5 3 101756651 101756656 8 127724888 127724893 . 261.80 +
#> 6 4 1746418 1746422 8 127723482 127723486 . NA -
#> strand2 tumour_sample_id gene partner fusion
#> 1 - SU-DHL-10 BCL2 <NA> NA-BCL2
#> 2 - SP194216 BCL2 <NA> NA-BCL2
#> 3 - 02-14764_tumorB ALK <NA> NA-ALK
#> 4 + BLGSP-71-30-00661-01A-01E FOXP1 <NA> NA-FOXP1
#> 5 - BLGSP-71-30-00676-01A-01E NFKBIZ <NA> NA-NFKBIZ
#> 6 - 09-41114T WHSC1 <NA> NA-WHSC1
table(annotated_myc_hg38$partner)
#>
#> BCL6 CCNL1 DMD IGH IGK IGL LRMP PAX5 RFTN1 ZEB2
#> 6 1 2 377 5 7 3 11 1 1
# The usual MYC partners are seen here
annotated_myc_incorrect = suppressMessages(
GAMBLR.utils::annotate_sv(incorrect_myc_locus_sv, genome_build = "hg38")
)
head(annotated_myc_incorrect)
#> chrom1 start1 end1 chrom2 start2 end2 name score strand1
#> 1 8 127313080 127313571 8 128746007 128746587 . NA +
#> 2 8 128738977 128738983 8 128752583 128752589 . NA +
#> 3 8 128738977 128738983 8 128752583 128752589 . NA +
#> 4 8 128738979 128738981 8 128752583 128752585 . NA +
#> 5 8 128738979 128738981 8 128752583 128752585 . NA +
#> 6 8 128738979 128738981 8 128752583 128752585 . NA +
#> strand2 tumour_sample_id gene partner fusion
#> 1 - PD26401c MYC <NA> NA-MYC
#> 2 - 04-14093_tumorA MYC <NA> NA-MYC
#> 3 - 04-14093_tumorB MYC <NA> NA-MYC
#> 4 - 05-24065T MYC <NA> NA-MYC
#> 5 - 10-27119T MYC <NA> NA-MYC
#> 6 - SU-DHL-6 MYC <NA> NA-MYC
table(annotated_myc_incorrect$partner)
#> < table of extent 0 >
# The effect of specifying the wrong coordinate is evident