So I wouldn’t sort here. Instead, I would aim for a channel structure where the signal and background have their own items, e.g. something like this:
[ meta, signal_bam, signal_bed, signal_readnum, background_bam, background_bed, background_readnum ]
This gives you clear elements to be used as a process input which can be controlled and tested.
To do this, you will need to split the channel into ‘signal’ and ‘background’, then re-join them. You use branch to split, then join to re-join. You will have to use some maps to make sure the joining key is correct. Here’s an example based on my answer to the other question:
workflow {
REMOVE_UNMAPPED_READS_out_bam = Channel.of(
[[id:'test1_replicate1_background', sample:'test1', replicate:'replicate1', type:'background', single_end:false], 'test1_replicate1_background_mapped.bam'],
[[id:'test1_replicate1_signal', sample:'test1', replicate:'replicate1', type:'signal', single_end:false], 'test1_replicate1_signal_mapped.bam'],
[[id:'test1_replicate2_background', sample:'test1', replicate:'replicate2', type:'background', single_end:false], 'test1_replicate2_background_mapped.bam'],
[[id:'test1_replicate2_signal', sample:'test1', replicate:'replicate2', type:'signal', single_end:false], 'test1_replicate2_signal_mapped.bam'],
[[id:'test2_replicate1_background', sample:'test2', replicate:'replicate1', type:'background', single_end:false], 'test2_replicate1_background_mapped.bam'],
[[id:'test2_replicate1_signal', sample:'test2', replicate:'replicate1', type:'signal', single_end:false], 'test2_replicate1_signal_mapped.bam'],
)
CREATEREADNUM_out_readnum = Channel.of(
[[id:'test1_replicate1_background', sample:'test1', replicate:'replicate1', type:'background', single_end:false], 'test1_replicate1_background.clip.peakClusters.bed'],
[[id:'test1_replicate1_signal', sample:'test1', replicate:'replicate1', type:'signal', single_end:false], 'test1_replicate1_signal.clip.peakClusters.bed'],
[[id:'test1_replicate2_background', sample:'test1', replicate:'replicate2', type:'background', single_end:false], 'test1_replicate2_background.clip.peakClusters.bed'],
[[id:'test1_replicate2_signal', sample:'test1', replicate:'replicate2', type:'signal', single_end:false], 'test1_replicate2_signal.clip.peakClusters.bed'],
[[id:'test2_replicate1_background', sample:'test2', replicate:'replicate1', type:'background', single_end:false], 'test2_replicate1_background.clip.peakClusters.bed'],
[[id:'test2_replicate1_signal', sample:'test2', replicate:'replicate1', type:'signal', single_end:false], 'test2_replicate1_signal.clip.peakClusters.bed'],
)
CLIPPER_out_bed = Channel.of(
[[id:'test1_replicate1_background', sample:'test1', replicate:'replicate1', type:'background', single_end:false], 'test1_replicate1_background.readnum.txt'],
[[id:'test1_replicate1_signal', sample:'test1', replicate:'replicate1', type:'signal', single_end:false], 'test1_replicate1_signal.readnum.txt'],
[[id:'test1_replicate2_background', sample:'test1', replicate:'replicate2', type:'background', single_end:false], 'test1_replicate2_background.readnum.txt'],
[[id:'test1_replicate2_signal', sample:'test1', replicate:'replicate2', type:'signal', single_end:false], 'test1_replicate2_signal.readnum.txt'],
[[id:'test2_replicate1_background', sample:'test2', replicate:'replicate1', type:'background', single_end:false], 'test2_replicate1_background.readnum.txt'],
[[id:'test2_replicate1_signal', sample:'test2', replicate:'replicate1', type:'signal', single_end:false], 'test2_replicate1_signal.readnum.txt'],
)
joined_ch = REMOVE_UNMAPPED_READS_out_bam
.join(CREATEREADNUM_out_readnum)
.join(CLIPPER_out_bed)
joined_ch
.map { meta, bam, bed, readnum ->
tuple(
meta.subMap('sample', 'replicate', 'type'),
meta,
bam,
bed,
readnum
)
}
// Split channel into different signal types
.branch { sample_map, meta, bam, bed, readnum ->
signal: sample_map.type == 'signal'
background: sample_map.type == 'background'
other: true
}
.set { split_joined_ch }
// Drop the 'type' from the joining group so we match up the signal and background
split_joined_ch_signal = split_joined_ch.signal.map { submeta, meta, bam, bed, readnum ->
tuple(
meta.subMap('sample', 'replicate'),
meta,
bam,
bed,
readnum
)
}
split_joined_ch_background = split_joined_ch.background.map { submeta, meta, bam, bed, readnum ->
tuple(
meta.subMap('sample', 'replicate'),
meta,
bam,
bed,
readnum
)
}
// Rejoin the two channels
split_joined_ch_signal
.join(split_joined_ch_background, by: 0, remainder: true)
.view()
}
This looks fairly intimidating but I’ve made a very verbose answer to try and explain things, you can probably do this in a much cleaner manner.