Skip to content

Commit fe46ddf

Browse files
cimendescimendes
authored andcommitted
dengue_typing now retrieves the sequence of the closest reference
1 parent c5e4f04 commit fe46ddf

File tree

2 files changed

+44
-0
lines changed

2 files changed

+44
-0
lines changed

flowcraft/bin/parse_fasta.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#!/usr/bin/env python3
2+
3+
4+
import argparse
5+
from itertools import groupby
6+
import os
7+
8+
def getSequence(ref, fasta):
9+
10+
entry = (x[1] for x in groupby(fasta, lambda line: line[0] == ">"))
11+
12+
for header in entry:
13+
headerStr = header.__next__()[1:].strip()
14+
seq = "".join(s.strip() for s in entry.__next__())
15+
16+
if ref == headerStr.replace('>',''):
17+
filename = os.path.join(os.getcwd(), ref.replace('/','_').split('|')[0])
18+
output_file = open(filename + '.fa', "w+")
19+
output_file.write(">" + headerStr + "\\n" + seq + "\\n")
20+
output_file.close()
21+
22+
def main():
23+
24+
parser = argparse.ArgumentParser(prog='parse_fasta.py', description="Parse FASTA files for a specific header", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
25+
parser.add_argument('--version', help='Version information', action='version', version=str('%(prog)s v0.1'))
26+
27+
parser_required = parser.add_argument_group('Required options')
28+
parser_required.add_argument('-t', type=str, metavar='header of sequence to be retrieved',
29+
help='Uncompressed fastq file containing mate 1 reads', required=True)
30+
parser_required.add_argument('-f', type=argparse.FileType('r'), metavar='/path/to/input/file.fasta',
31+
help='Fasta with the sequences', required=True)
32+
33+
args = parser.parse_args()
34+
35+
getSequence(args.t, args.f)
36+
37+
38+
39+
if __name__ == "__main__":
40+
main()

flowcraft/generator/templates/dengue_typing.nf

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ process dengue_typing_{{ pid }} {
1212

1313
output:
1414
file "seq_typing*"
15+
file "*.fa"
1516
{% with task_name="dengue_typing" %}
1617
{%- include "compiler_channels.txt" ignore missing -%}
1718
{% endwith %}
@@ -26,6 +27,9 @@ process dengue_typing_{{ pid }} {
2627
2728
seq_typing.py assembly --org Dengue Virus -f ${assembly} -o ./ -j $task.cpus -t nucl
2829
30+
awk 'NR == 2 { print \$4 }' seq_typing.report_types.tab > reference
31+
parse_fasta.py -t \$(cat reference) -f /NGStools/seq_typing/reference_sequences/dengue_virus/1_GenotypesDENV_14-05-18.fasta
32+
2933
# Add information to dotfiles
3034
json_str="{'tableRow':[{'sample':'${sample_id}','data':[{'header':'seqtyping','value':'\$(cat seq_typing.report.txt)','table':'typing'}]}],'metadata':[{'sample':'${sample_id}','treeData':'\$(cat seq_typing.report.txt)','column':'typing'}]}"
3135
echo \$json_str > .report.json

0 commit comments

Comments
 (0)