Skip to content

Commit a379785

Browse files
added Fast ani component (#130)
* added fast ani component * removed unused params * fixed output name * fixed input name * fixed image tag * added log to stderr * added implementation of multi fasta parsing for fastani * added channel for parameter fragLen * added parser for main fasta * changed docker img tag * fixed file naming * fixed pep8 issue * added join to file name * added documentation and publish dir * updated changelog * fixed file naming * removed output_type * fixed changelog
1 parent be82be0 commit a379785

File tree

7 files changed

+172
-2
lines changed

7 files changed

+172
-2
lines changed

changelog.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,11 @@
55
### Components changes
66

77
Updated images for components `mash_dist`, `mash_screen` and
8-
`mapping_patlas`
8+
`mapping_patlas`.
9+
10+
### New components
11+
- Added component `fast_ani`.
12+
913

1014
## 1.3.0
1115

docs/user/available_components.rst

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,13 @@ Distance Estimation
9999
plasmid database and generates a JSON input file for pATLAS. This component
100100
searches for containment of a given sequence in read sequencing data.
101101
However if a different database is provided it can use mash screen for other
102-
purporses.
102+
purposes.
103+
104+
- :doc:`components/fast_ani`: Performs pairwise comparisons between fastas,
105+
given a multifasta as input for fastANI. It will split the multifasta into
106+
single fastas that will then be provided as a matrix. The output will be the
107+
all pairwise comparisons that pass the minimum of 50 aligned sequences with a
108+
default length of 200 bp.
103109

104110
- :doc:`components/mash_sketch_fasta`: Performs mash sketch for fasta files.
105111

docs/user/components/fast_ani.rst

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
fast_ani
2+
========
3+
4+
Purpose
5+
-------
6+
7+
This component performs pairwise comparisons between fastas,
8+
given a multifasta as input for fastANI. It will split the multifasta into
9+
single fastas that will then be provided as a matrix. The output will be the
10+
all pairwise comparisons that pass the minimum of 50 aligned sequences with a
11+
default length of 200 bp.
12+
13+
Input/Output type
14+
------------------
15+
16+
- Input type: ``fasta``
17+
- Output type: ``None``
18+
19+
20+
Parameters
21+
----------
22+
23+
- ``fragLen``: Sets the minimum size of the fragment to be passed to
24+
`--fragLen` argument of fastANI.
25+
26+
27+
Published results
28+
-----------------
29+
30+
- ``results/fast_ani/``: A text file with the extension `.out`, which has all
31+
the pairwise comparisons between sequences, reporting ANI.
32+
33+
34+
Published reports
35+
-----------------
36+
37+
None.
38+
39+
40+
Default directives
41+
------------------
42+
43+
- ``fastAniMatrix``:
44+
- ``container``: flowcraft/fast_ani
45+
- ``version``: 1.1.0-2
46+
- ``cpus``: 20
47+
- ``memory``: { 30.GB * task.attempt }

flowcraft/generator/components/distance_estimation.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,3 +200,32 @@ def __init__(self, **kwargs):
200200
self.status_channels = [
201201
"mashSketchFastq",
202202
]
203+
204+
205+
class FastAniMatrix(Process):
206+
207+
def __init__(self, **kwargs):
208+
209+
super().__init__(**kwargs)
210+
211+
self.input_type = "fasta"
212+
213+
self.params = {
214+
"fragLen": {
215+
"default": 3000,
216+
"description": "Set size of fragment. Default: 3000."
217+
}
218+
}
219+
220+
self.directives = {
221+
"fastAniMatrix": {
222+
"container": "flowcraft/fast_ani",
223+
"version": "1.1.0-2",
224+
"cpus": 20,
225+
"memory": "{ 30.GB * task.attempt }"
226+
},
227+
}
228+
229+
self.status_channels = [
230+
"fastAniMatrix",
231+
]

flowcraft/generator/engine.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868
"filter_poly": readsqc.FilterPoly,
6969
"integrity_coverage": readsqc.IntegrityCoverage,
7070
"fasterq_dump": downloads.FasterqDump,
71+
"fast_ani": distest.FastAniMatrix,
7172
"kraken": meta.Kraken,
7273
"mafft": alignment.Mafft,
7374
"mapping_patlas": mapping_patlas.PatlasMapping,
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
IN_fragLen_{{ pid }} = Channel.value(params.fragLen{{ param_id }})
2+
3+
// runs fast ani for multiple comparisons (many to many mode)
4+
process fastAniMatrix_{{ pid }} {
5+
6+
{% include "post.txt" ignore missing %}
7+
8+
tag { sample_id }
9+
10+
publishDir 'results/fast_ani/fast_ani_{{ pid }}/',
11+
12+
input:
13+
set sample_id, file(fasta) from {{ input_channel }}
14+
val fragLenValue from IN_fragLen_{{ pid }}
15+
16+
output:
17+
set sample_id, fasta, file("*.out")
18+
{% with task_name="fastAniMatrix", sample_id="sample_id" %}
19+
{%- include "compiler_channels.txt" ignore missing -%}
20+
{% endwith %}
21+
22+
"""
23+
mkdir fasta_store
24+
fasta_spliter.py ${fasta}
25+
fastANI --ql files_fastani.txt --rl files_fastani.txt \
26+
-t ${task.cpus} --fragLen ${fragLenValue} \
27+
-o ${sample_id.take(sample_id.lastIndexOf("."))}_fastani.out
28+
"""
29+
30+
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#!/usr/bin/env python3
2+
3+
"""
4+
Purpose
5+
-------
6+
7+
This module is intended to split all fastas in a multifasta file into different
8+
fasta files.
9+
10+
Code documentation
11+
------------------
12+
13+
"""
14+
15+
import os
16+
import sys
17+
18+
19+
def main():
20+
21+
cwd = os.getcwd()
22+
# a var to check if out_handle is started and if so it enables to control
23+
# how it should be closed
24+
out_handle = False
25+
# opens the input file of the process
26+
input_file = open(sys.argv[1])
27+
# a file with the list of all paths to fasta files that will be used by
28+
# fastANI
29+
list_files = open("files_fastani.txt", "w")
30+
# iterates by each entry in the fasta file
31+
for line in input_file:
32+
if line.startswith(">"):
33+
if out_handle:
34+
out_handle.close()
35+
# writes the output to fasta store folder inside cwd, respective
36+
# workdir
37+
path_to_file = os.path.join(cwd, "fasta_store",
38+
"_".join(line.split("_")[0:3])
39+
.replace(">", "") + ".fas")
40+
# writes to list of files
41+
list_files.write(path_to_file + "\n")
42+
out_handle = open(path_to_file, "w")
43+
out_handle.write(line)
44+
else:
45+
out_handle.write(line)
46+
47+
out_handle.close()
48+
input_file.close()
49+
list_files.close()
50+
51+
52+
if __name__ == "__main__":
53+
main()

0 commit comments

Comments
 (0)