Thank you very much for the training materials. It helped a lot in creating my first nextflow pipeline. I built a pipeline for germline SNV variant calling for short read illumina. I run into problem with GATK Variant Recalibrator - error Argument resource is missing. Apparently, the resource files are in the Nextflow work directory. The input files for Variant recalibrator seem to be ok. If I run separately the GATk Variant Recalibrator command it works. Any ideas would be much appreciated. Thanks!
script for main.nf
nextflow.enable.dsl=2
//Channel and parameters
//reads
params.r1 = ‘/home/juls/nextflow_module/na12878_r1.fq’
params.r2 = ‘/home/juls/nextflow_module/na12878_r2.fq’
//reference genome
params.reference = ‘/home/juls/reference/Homo_sapiens.GRCh38.dna.primary_assembly.fasta’
params.reference_fai = ‘/home/juls/reference/Homo_sapiens.GRCh38.dna.primary_assembly.fasta.fai’
params.reference_dict = ‘/home/juls/reference/Homo_sapiens.GRCh38.dna.primary_assembly.dict’
params.reference_ann = ‘/home/juls/reference/Homo_sapiens.GRCh38.dna.primary_assembly.fasta.ann’
params.reference_bwt = ‘/home/juls/reference/Homo_sapiens.GRCh38.dna.primary_assembly.fasta.bwt’
params.reference_pac = ‘/home/juls/reference/Homo_sapiens.GRCh38.dna.primary_assembly.fasta.pac’
params.reference_amb = ‘/home/juls/reference/Homo_sapiens.GRCh38.dna.primary_assembly.fasta.amb’
params.reference_sa = ‘/home/juls/reference/Homo_sapiens.GRCh38.dna.primary_assembly.fasta.sa’
//outdir
params.outdir = ‘/home/juls/nextflow_module/results’
//picard
params.picard = ‘/home/juls/picard/build/libs/picard.jar’
//intervals list
params.intervals = ‘/home/juls/nextflow_test/intervals.list’
//resources for variant recalibrator
params.resources_1000Gomni = ‘/home/juls/resources/1000G_omni2.5.hg38.vcf.gz’
params.resources_1000Gomni_tbi = ‘/home/juls/resources/1000G_omni2.5.hg38.vcf.gz.tbi’
params.resources_1000Gphase = ‘/home/juls/resources/1000G_phase1.snps.high_confidence.hg38.vcf.gz’
params.resources_1000Gphase_tbi = ‘/home/juls/resources/1000G_phase1.snps.high_confidence.hg38.vcf.gz.tbi’
params.resources_hapmap = ‘/home/juls/resources/hapmap_3.3.hg38.vcf.gz’
params.resources_hapmap_tbi = ‘/home/juls/resources/hapmap_3.3.hg38.vcf.gz.tbi’
params.resources_dbsnp = ‘/home/juls/resources/Homo_sapiens_assembly38.dbsnp138.vcf.gz’
params.resources_dbsnp_tbi = ‘/home/juls/resources/Homo_sapiens_assembly38.dbsnp138.vcf.gz.tbi’
// Module INCLUDE statements
include { bwamem } from ‘./modules/bwamem.nf’
include { sortsam } from ‘./modules/SortSam_picard.nf’
include { addReadGroups} from ‘./modules/AddOrReplaceReadGroups.nf’
include { markDuplicates } from ‘./modules/MarkDuplicates_picard.nf’
include { buildBamIndex } from ‘./modules/BuildBamIndex.nf’
include { haplotypeCaller } from ‘./modules/HaplotypeCaller.nf’
include { indexgVCF } from ‘./modules/tabix.nf’
include { genomicsDBImport } from ‘./modules/genomicsDBImport.nf’
include { genotypeGVCFs } from ‘./modules/genotypeGVCFs.nf’
include { unzipVCF } from ‘./modules/unzipVCF.nf’
include { variantRecalibrator } from ‘./modules/VariantRecalibrator.nf’
workflow{
//Create input channels
reference_channel = Channel.fromPath(‘/home/juls/reference/Homo_sapiens.GRCh38.dna.primary_assembly.fasta’)
ref_tuple = Channel.value(tuple(params.reference, params.reference_fai, params.reference_dict, params.reference_amb, params.reference_ann, params.reference_bwt, params.reference_pac, params.reference_sa))
r1_ch = Channel.value(params.r1)
r2_ch = Channel.value(params.r2)
hapmap_tuple = Channel.value(tuple(params.resources_hapmap, params.resources_hapmap_tbi))
omni_tuple = Channel.value(tuple(params.resources_1000Gomni, params.resources_1000Gomni_tbi))
phase_tuple = Channel.value(tuple(params.resources_1000Gphase, params.resources_1000Gphase_tbi))
dbsnp_tuple = Channel.value(tuple(params.resources_dbsnp, params.resources_dbsnp_tbi))
bwamem (ref_tuple, r1_ch, r2_ch)
sortsam (bwamem.out)
addReadGroups (sortsam.out)
markDuplicates (addReadGroups.out)
buildBamIndex (markDuplicates.out)
haplotypeCaller (markDuplicates.out, buildBamIndex.out, ref_tuple)
indexgVCF(haplotypeCaller.out)
genomicsDBImport (haplotypeCaller.out, indexgVCF.out, params.intervals)
genotypeGVCFs (ref_tuple, genomicsDBImport.out)
unzipVCF (genotypeGVCFs.out)
variantRecalibrator (unzipVCF.out, ref_tuple, hapmap_tuple, omni_tuple, phase_tuple, dbsnp_tuple)
}
script for module VariantRecalibrator
process variantRecalibrator{
container “community.wave.seqera.io/library/gatk4:4.6.1.0--e3124bcb2431f4a9”
publishDir ‘results/vcf’, mode: ‘copy’
input:
path vcf
tuple path(reference), path(reference_fai), path(reference_dict), path(reference_amb), path(reference_ann), path(reference_bwt), path(reference_pac), path(reference_sa)
tuple path(resources_hapmap), path(resources_hapmap_tbi)
tuple path(resources_1000Gomni), path(resources_1000Gomni_tbi)
tuple path(resources_1000Gphase), path(resources_1000Gphase_tbi)
tuple path(resources_dbsnp), path(resources_dbsnp_tbi)
output:
path "var_recal.recal", emit:recal
path "var_recal.tranches'", emit:tranches
script:
"""
echo "Reference: $reference"
echo "VCF: $vcf"
echo "Hapmap: $resources_hapmap"
echo "Omni: $resources_1000Gomni"
echo "1000G: $resources_1000Gphase"
echo "dbSNP: $resources_dbsnp"
ls -lh
gatk VariantRecalibrator
-R $reference \
-V $vcf \
--resource:hapmap,known=false,training=true,truth=true,prior=15.0 ${resources_hapmap}\
--resource:omni,known=false,training=true,truth=false,prior=12.0 ${resources_1000Gomni}\
--resource:1000G,known=false,training=true,truth=false,prior=10.0 ${resources_1000Gphase}\
--resource:dbsnp,known=true,training=false,truth=false,prior=2.0 ${resources_dbsnp}\
-an QD -an MQ -an MQRankSum -an ReadPosRankSum -an FS -an SOR \
-mode SNP \
-O var_recal.recal \
--tranches-file var_recal.tranches
"""
}