Skip to content

Commit

Permalink
Create a "genome file" that bedtools jaccard needs.
Browse files Browse the repository at this point in the history
Otherwise it crashes if a sequence from B is not in A.

There is obvious repetition of the same computation here.  It is not
satisfying but I need a short term fix.  Hopefully it will put
the genome sequence in the node's memory cache, which will be useful
in the final commands.
  • Loading branch information
charles-plessy committed Oct 9, 2024
1 parent 264035e commit 9159900
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 6 deletions.
3 changes: 2 additions & 1 deletion modules/local/merge_repeatmasker_all.nf
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@ process MERGE_REPM_RESULTS {
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
awk '/^>/ {if (seqlen){print seqname "\t" seqlen}; split(\$1, a, ">"); seqname=a[2]; seqlen=0; next} {seqlen += length(\$0)} END {print seqname "\t" seqlen}' $genome > genome.genome # thanks, ChatGPT!
run_bedtools_operations() {
bedtools jaccard -nonamecheck -a "\$1" -b "\$2" > "${prefix}_\${3}_jaccard.txt"
bedtools jaccard -nonamecheck -a "\$1" -b "\$2" -g genome.genome > "${prefix}_\${3}_jaccard.txt"
zcat "\$1" "\$2" | sort -k1,1 -k2,2n | bedtools merge | gzip --best --no-name > "${prefix}_\${3}.mask.bed.gz"
}
Expand Down
3 changes: 2 additions & 1 deletion modules/local/merge_repeatmasker_dfam.nf
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@ process MERGE_REPM_RESULTS {
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
awk '/^>/ {if (seqlen){print seqname "\t" seqlen}; split(\$1, a, ">"); seqname=a[2]; seqlen=0; next} {seqlen += length(\$0)} END {print seqname "\t" seqlen}' $genome > genome.genome # thanks, ChatGPT!
run_bedtools_operations() {
bedtools jaccard -nonamecheck -a "\$1" -b "\$2" > "${prefix}_\${3}_jaccard.txt"
bedtools jaccard -nonamecheck -a "\$1" -b "\$2" -g genome.genome > "${prefix}_\${3}_jaccard.txt"
zcat "\$1" "\$2" | sort -k1,1 -k2,2n | bedtools merge | gzip --best --no-name > "${prefix}_\${3}.mask.bed.gz"
}
Expand Down
3 changes: 2 additions & 1 deletion modules/local/merge_repeatmasker_extlib.nf
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@ process MERGE_REPM_RESULTS {
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
awk '/^>/ {if (seqlen){print seqname "\t" seqlen}; split(\$1, a, ">"); seqname=a[2]; seqlen=0; next} {seqlen += length(\$0)} END {print seqname "\t" seqlen}' $genome > genome.genome # thanks, ChatGPT!
run_bedtools_operations() {
bedtools jaccard -nonamecheck -a "\$1" -b "\$2" > "${prefix}_\${3}_jaccard.txt"
bedtools jaccard -nonamecheck -a "\$1" -b "\$2" -g genome.genome > "${prefix}_\${3}_jaccard.txt"
zcat "\$1" "\$2" | sort -k1,1 -k2,2n | bedtools merge | gzip --best --no-name > "${prefix}_\${3}.mask.bed.gz"
}
Expand Down
7 changes: 4 additions & 3 deletions modules/local/mergemasks.nf
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,10 @@ process MERGE_MASKS {
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
bedtools jaccard -nonamecheck -a $tantan -b $windowmasker > ${prefix}_tantan_windowmasker_jaccard.txt
bedtools jaccard -nonamecheck -a $tantan -b $repeatmasker > ${prefix}_tantan_repeatmasker_jaccard.txt
bedtools jaccard -nonamecheck -a $repeatmasker -b $windowmasker > ${prefix}_repeatmasker_windowmasker_jaccard.txt
awk '/^>/ {if (seqlen){print seqname "\t" seqlen}; split(\$1, a, ">"); seqname=a[2]; seqlen=0; next} {seqlen += length(\$0)} END {print seqname "\t" seqlen}' $genome > genome.genome # thanks, ChatGPT!
bedtools jaccard -nonamecheck -a $tantan -b $windowmasker -g genome.genome > ${prefix}_tantan_windowmasker_jaccard.txt
bedtools jaccard -nonamecheck -a $tantan -b $repeatmasker -g genome.genome > ${prefix}_tantan_repeatmasker_jaccard.txt
bedtools jaccard -nonamecheck -a $repeatmasker -b $windowmasker -g genome.genome > ${prefix}_repeatmasker_windowmasker_jaccard.txt
zcat $tantan $windowmasker | sort -k1,1 -k2,2n | bedtools merge | gzip --best --no-name > ${prefix}_tantan_windowmasker.bed.gz
zcat $tantan $repeatmasker | sort -k1,1 -k2,2n | bedtools merge | gzip --best --no-name > ${prefix}_tantan_repeatmasker.bed.gz
Expand Down

0 comments on commit 9159900

Please sign in to comment.