Hi,
My current workflow generate one MULTIQC report for all the samples in the input list, for some reason I want to generate multiqc report file individually per sample instead.
Here’s my current workflow…multiqc process…and also attached the complete workflow main.nf
at the end. Appreciate any help.
Current workflow and module for single multiqc report, works fine
input.yaml
samples:
-
biosample_id: NA12878-chr14-AKT1
aln: NA12878-chr14-AKT1.bam
-
biosample_id: NA12878-chr14-AKT2
aln: NA12878-chr14-AKT2.bam
main.nf
Channel
.empty()
.mix( mosdepth_bam.out.dists )
.mix( mosdepth_bam.out.summary )
.mix( mosdepth_cram.out.dists )
.mix( mosdepth_cram.out.summary )
.mix( mosdepth_datamash.out.coverage )
.mix( verifybamid2_bam.out.freemix )
.mix( verifybamid2_cram.out.freemix )
.mix( verifybamid2_bam.out.ancestry )
.mix( verifybamid2_cram.out.ancestry )
.mix( samtools_stats_bam.out )
.mix( samtools_stats_cram.out )
.map { sample, files -> files }
.collect()
.set { log_files }
multiqc( log_files )
modules/multiqc/main.nf
process multiqc {
input:
path 'data/*'
output:
path "multiqc_report.html", emit: report
path "multiqc_data", emit: data
path "multiqc_data/multiqc_data.json", emit: json_data
"""
multiqc \\
--data-format json \\
--enable-npm-plugin \\
.
"""
}
I tried the below for multiqc process to report per sample…
Try 1. main.nf
Channel
samples.map { it.biosample_id }
// .empty()
.mix( mosdepth_bam.out.dists )
.mix( mosdepth_bam.out.summary )
.mix( mosdepth_cram.out.dists )
.mix( mosdepth_cram.out.summary )
.mix( mosdepth_datamash.out.coverage )
.mix( verifybamid2_bam.out.freemix )
.mix( verifybamid2_cram.out.freemix )
.mix( verifybamid2_bam.out.ancestry )
.mix( verifybamid2_cram.out.ancestry )
.mix( samtools_stats_bam.out.stats )
.mix( samtools_stats_cram.out.stats )
// .map { sample, files -> files }
// .map { it.biosample_id }
.collect()
.set { sample, log_files }
multiqc( log_files )
Try 2. main.nf
Channel
samples.map { it.biosample_id }
.set { sample_ids }
multiqc( sample_ids, mosdepth_datamash.out.coverage.mix( samtools_stats_bam.out.stats, samtools_stats_cram.out.stats, mosdepth_bam.out.dists, mosdepth_bam.out.summary, mosdepth_cram.out.dists, mosdepth_cram.out.summary, verifybamid2_bam.out.freemix, verifybamid2_cram.out.freemix, verifybamid2_bam.out.ancestry, verifybamid2_cram.out.ancestry, picard_collect_multiple_metrics_bam.out.insert_size, picard_collect_multiple_metrics_cram.out.insert_size, picard_collect_multiple_metrics_bam.out.quality, picard_collect_multiple_metrics_cram.out.quality ).collect() )
Try module
modules/multiqc/main.nf
process multiqc {
tag { sample }
input:
val(sample)
path("*")
output:
tuple val(sample), path("${sample}/multiqc_report.html"), emit: report
tuple val(sample), path("${sample}/multiqc_data"), emit: data
tuple val(sample), path("${sample}/multiqc_data/multiqc_data.json"), emit: json_data
"""
multiqc \\
--data-format json \\
--enable-npm-plugin \\
-o ${sample} \\
.
"""
}
Try 3. main.nf
multiqc( mosdepth_datamash.out.coverage.mix( samtools_stats_bam.out.stats, samtools_stats_cram.out.stats, mosdepth_bam.out.dists, mosdepth_bam.out.summary, mosdepth_cram.out.dists, mosdepth_cram.out.summary, verifybamid2_bam.out.freemix, verifybamid2_cram.out.freemix, verifybamid2_bam.out.ancestry, verifybamid2_cram.out.ancestry, picard_collect_multiple_metrics_bam.out.insert_size, picard_collect_multiple_metrics_cram.out.insert_size, picard_collect_multiple_metrics_bam.out.quality, picard_collect_multiple_metrics_cram.out.quality ).collect() )
modules/multiqc/main.nf
process multiqc {
tag { sample }
input:
tuple val(sample), path('*')
....
....
ERROR
WARN: Input tuple does not match input set cardinality declared by process `multiqc`
offending value: [NA12878-chr14-AKT1, /Users/tests/NA12878-chr14-AKT1_1000genomes-dragen-3.7.6/work/34/3d9c244d1ba1b1b7e7b019becba6ae/NA12878-chr14-AKT1.insert_size_metrics.txt, NA12878-chr14-AKT1, /Users/tests/NA12878-chr14-AKT1_1000genomes-dragen-3.7.6/work/34/3d9c244d1ba1b1b7e7b019becba6ae/NA12878-chr14-AKT1.quality_yield_metrics.txt, NA12878-chr14-AKT1, /Users/tests/NA12878-chr14-AKT1_1000genomes-dragen-3.7.6/work/88/90701b0a8c8c6861d2c4af605eeb42/NA12878-chr14-AKT1.stats, NA12878-chr14-AKT1, [/Users/tests/NA12878-chr14-AKT1_1000genomes-dragen-3.7.6/work/fb/e8935106a74cff24d53e44403f3136/NA12878-chr14-AKT1.mosdepth.global.dist.txt, /Users/tests/NA12878-chr14-AKT1_1000genomes-dragen-3.7.6/work/fb/e8935106a74cff24d53e44403f3136/NA12878-chr14-AKT1.mosdepth.region.dist.txt], NA12878-chr14-AKT1, /Users/tests/NA12878-chr14-AKT1_1000genomes-dragen-3.7.6/work/fb/e8935106a74cff24d53e44403f3136/NA12878-chr14-AKT1.mosdepth.summary.txt, NA12878-chr14-AKT1, /Users/tests/NA12878-chr14-AKT1_1000genomes-dragen-3.7.6/work/69/8a684babdde53d4efc124fd74f50c4/NA12878-chr14-AKT1.mosdepth.csv, NA12878-chr14-AKT2, /Users/tests/NA12878-chr14-AKT1_1000genomes-dragen-3.7.6/work/a7/34cd5464d286da93227c868affcfa6/NA12878-chr14-AKT2.stats, NA12878-chr14-AKT2, /Users/tests/NA12878-chr14-AKT1_1000genomes-dragen-3.7.6/work/f5/3a02b426d3653b692575b255c7ef03/NA12878-chr14-AKT2.quality_yield_metrics.txt, NA12878-chr14-AKT2, /Users/tests/NA12878-chr14-AKT1_1000genomes-dragen-3.7.6/work/f5/3a02b426d3653b692575b255c7ef03/NA12878-chr14-AKT2.insert_size_metrics.txt, NA12878-chr14-AKT2, [/Users/tests/NA12878-chr14-AKT1_1000genomes-dragen-3.7.6/work/8f/bc1280d1688933bdb678093d09b7e8/NA12878-chr14-AKT2.mosdepth.global.dist.txt, /Users/tests/NA12878-chr14-AKT1_1000genomes-dragen-3.7.6/work/8f/bc1280d1688933bdb678093d09b7e8/NA12878-chr14-AKT2.mosdepth.region.dist.txt], NA12878-chr14-AKT2, /Users/tests/NA12878-chr14-AKT1_1000genomes-dragen-3.7.6/work/8f/bc1280d1688933bdb678093d09b7e8/NA12878-chr14-AKT2.mosdepth.summary.txt, NA12878-chr14-AKT2, /Users/tests/NA12878-chr14-AKT1_1000genomes-dragen-3.7.6/work/99/912e93062f1568504322d31ddb2ef1/NA12878-chr14-AKT2.mosdepth.csv]
ERROR ~ Error executing process > 'multiqc (NA12878-chr14-AKT1)'
Caused by:
Missing output file(s) `multiqc_report.html` expected by process `multiqc (NA12878-chr14-AKT1)`
Complete workflow
main.nf
// main
workflow {
ref_fasta = file( params.reference )
ref_fasta_idx = file( params.reference + ".fai" )
autosomes_non_gap_regions = file( params.autosomes_non_gap_regions )
vbi2_ud = file( params.vbi2_ud )
vbi2_bed = file( params.vbi2_bed )
vbi2_mean = file( params.vbi2_mean )
inputs = new YamlSlurper().parse(file(params.inputs_list))
Channel
.fromList(inputs['samples'])
.ifEmpty { ['biosample_id': params.biosample_id, 'aln': params.aln] }
.set { samples }
Channel
samples.branch { rec ->
def aln_file = rec.aln ? file( rec.aln ) : null
bam: rec.biosample_id && aln_file?.extension == 'bam'
def bam_idx = file( "${rec.aln}.bai" )
return tuple( rec.biosample_id, aln_file, bam_idx )
cram: rec.biosample_id && aln_file?.extension == 'cram'
def cram_idx = file( "${rec.aln}.crai" )
return tuple( rec.biosample_id, aln_file, cram_idx )
}
.set { aln_inputs }
samtools_stats_bam( aln_inputs.bam, [] )
samtools_stats_cram( aln_inputs.cram, ref_fasta )
verifybamid2_bam( aln_inputs.bam, ref_fasta, vbi2_ud, vbi2_bed, vbi2_mean )
verifybamid2_cram( aln_inputs.cram, ref_fasta, vbi2_ud, vbi2_bed, vbi2_mean )
picard_collect_multiple_metrics_bam( aln_inputs.bam, [], [] )
picard_collect_multiple_metrics_cram( aln_inputs.cram, ref_fasta, ref_fasta_idx )
mosdepth_bam( aln_inputs.bam, [] )
mosdepth_cram( aln_inputs.cram, ref_fasta )
Channel
.empty()
.mix( mosdepth_bam.out.regions )
.mix( mosdepth_cram.out.regions )
.set { mosdepth_regions }
mosdepth_datamash( mosdepth_regions, autosomes_non_gap_regions )
// mosdepth_datamash( autosomes_non_gap_regions, mosdepth_bam.out.regions.mix( mosdepth_cram.out.regions ) )
Channel
.empty()
.mix( mosdepth_bam.out.dists )
.mix( mosdepth_bam.out.summary )
.mix( mosdepth_cram.out.dists )
.mix( mosdepth_cram.out.summary )
.mix( mosdepth_datamash.out.coverage )
.mix( verifybamid2_bam.out.freemix )
.mix( verifybamid2_cram.out.freemix )
.mix( verifybamid2_bam.out.ancestry )
.mix( verifybamid2_cram.out.ancestry )
.mix( picard_collect_multiple_metrics_bam.out.insert_size )
.mix( picard_collect_multiple_metrics_cram.out.insert_size )
.mix( picard_collect_multiple_metrics_bam.out.quality )
.mix( picard_collect_multiple_metrics_cram.out.quality )
.mix( samtools_stats_bam.out )
.mix( samtools_stats_cram.out )
.map { sample, files -> files }
.collect()
.set { log_files }
multiqc( log_files )
Channel
samples.map { it.biosample_id }
.set { sample_ids }
compile_metrics ( sample_ids, multiqc.out.json_data )
}