1+ """
2+ This module provides an implementation of NetMSA algorithm in Julia, which
3+ can be used for multiple sequence alignment.
4+ """
15module NetMSA
26
37using StatsBase
48
59export createPeerMatrix, matrixalignment
610
11+ @doc raw """
12+ Store the position of a given particle. Position ``x_s_i (r)`` of the particle
13+ ``p_s_i`` is defined by using the row ``r`` that contains the symbol ``s_i`` as
14+ well as locations of the symbol ``s_i`` in the different columns (indexes of the
15+ columns that contain ``s_i`` ) in the row ``r``.
16+ """
717mutable struct Position
818 row:: Int64
919 indexes:: Vector{Int64}
1020end
1121
22+ """
23+ A particle that is used for creating swarms.
24+
25+ # Fields
26+ - value::Char : Value of the particle, e.g. 'b' or 'c'
27+ - updated::Int64 : Number of turns till last updated
28+ - pos::Position : The original position of the particle
29+ - best::Position : The best local position of the particle
30+ - bestvalue::Float64 : Best local score
31+ """
1232mutable struct Particle
1333 value:: Char
1434 updated:: Int64
1535 pos:: Position
1636 best:: Position
1737 bestvalue:: Float64
1838
39+ """
40+ Particle(value::Char, pos::Position)
41+
42+ Initializes the Particle, where the best position is intialized
43+ to be the current position.
44+ """
1945 function Particle (value:: Char , pos:: Position )
2046 return new (value, 0 , pos, pos, 0.0 )
2147 end
@@ -30,7 +56,7 @@ matrix by the `missing` keyword.
3056
3157# Examples
3258```jldoctest
33- julia> createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
59+ julia> NetMSA. createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
34608×4 Array{Union{Missing, Char},2}:
3561 'a' 'a' 'a' 'a'
3662 'b' 'c' 'b' 'b'
@@ -61,7 +87,7 @@ represented by `value`, at the `rowindex` in the `matrix`.
6187
6288# Examples
6389```jldoctest
64- julia> M = createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
90+ julia> M = NetMSA. createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
65918×4 Array{Union{Missing, Char},2}:
6692 'a' 'a' 'a' 'a'
6793 'b' 'c' 'b' 'b'
@@ -89,7 +115,7 @@ with its frequency.
89115
90116# Examples
91117```jldoctest
92- julia> M = createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
118+ julia> M = NetMSA. createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
931198×4 Array{Union{Missing, Char},2}:
94120 'a' 'a' 'a' 'a'
95121 'b' 'c' 'b' 'b'
@@ -120,7 +146,7 @@ A row is *aligned* if it only contains different occurrences of the same symbol.
120146
121147# Examples
122148```jldoctest
123- julia> M = createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
149+ julia> M = NetMSA. createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
1241508×4 Array{Union{Missing, Char},2}:
125151 'a' 'a' 'a' 'a'
126152 'b' 'c' 'b' 'b'
@@ -154,7 +180,7 @@ the number of columns in the matrix.
154180
155181# Examples
156182```jldoctest
157- julia> M = createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
183+ julia> M = NetMSA. createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
1581848×4 Array{Union{Missing, Char},2}:
159185 'a' 'a' 'a' 'a'
160186 'b' 'c' 'b' 'b'
@@ -188,14 +214,14 @@ w(r) = \begin{cases}
188214 w_3; & \t ext{ if r is full} \\
189215\e nd{cases}
190216```
191- where ``n_s`` is the number of occurrences of the symbol s in the aligned row r ,
192- and c is the total number of columns in the row. The value of x is equal to zero
193- if every symbol in the row r occurred at most once, otherwise x is equal to the
194- max number of occurrences (matches) of some symbol in r .
217+ where ``n_s`` is the number of occurrences of the symbol ``s`` in the aligned row ``r`` ,
218+ and ``c`` is the total number of columns in the row. The value of ``x`` is equal to zero
219+ if every symbol in the row ``r`` occurred at most once, otherwise ``x`` is equal to the
220+ max number of occurrences (matches) of some symbol in ``r`` .
195221
196222# Examples
197223```jldoctest
198- julia> M = createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
224+ julia> M = NetMSA. createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
1992258×4 Array{Union{Missing, Char},2}:
200226 'a' 'a' 'a' 'a'
201227 'b' 'c' 'b' 'b'
@@ -229,14 +255,48 @@ function weight(row; w1=0.25, w2=0.5, w3=1.0)
229255 end
230256end
231257
232- function objective (M, rowindex:: Int ; endindex:: Int = 0 )
258+ @doc raw """
259+ objective(M, rowindex; endindex=0)
260+
261+ Return the objective score of the row, calculated as follows:
262+
263+ ```math
264+ f(x_s(r)) = \f rac{A(r) \t imes C(r)}{1 + Gaps(r)} \t imes \s um_{j=r}^{k} w(j)
265+ ```
266+ where ``A(r)`` is the number of aligned rows in M from ``r`` to the last row,
267+ ``C(r)`` is the maximum number of matched charachters in the current row,
268+ ``Gaps(r)`` is the number of gaps added to the matrix M from row ``r`` to the
269+ last row, and ``w(r)`` is the weight of the row ``r``.
270+
271+
272+ `endindex` is used to reduce the search area for Gaps, and if it is not provided,
273+ it would default to `size(M)[1]`.
274+
275+ # Examples
276+ ```jldoctest
277+ julia> M = NetMSA.createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
278+ 8×4 Array{Union{Missing, Char},2}:
279+ 'a' 'a' 'a' 'a'
280+ 'b' 'c' 'b' 'b'
281+ 'c' 'b' 'c' 'c'
282+ 'b' 'c' 'h' 'b'
283+ 'c' 'f' 'i' 'c'
284+ 'd' 'g' 'm' 'j'
285+ 'e' missing 'n' 'k'
286+ 'm' missing missing 'm'
287+
288+ juila> NetMSA.objective(M, 2)
289+ 2.625
290+ ```
291+ """
292+ function objective (M, rowindex; endindex= 0 )
233293 weights = sum (weight .(eachrow (M[rowindex: end , :])))
234294 C = mostfrequent (M[rowindex, :])[1 ];
235295 A = sum (aligned .(eachrow (M))[rowindex: end ])
236296
237297 endindex = endindex == 0 ? size (M)[1 ] : endindex;
238298 if endindex > size (M)[1 ]
239- throw (ArgumentError (" endind exceeds the matrix size" ));
299+ throw (ArgumentError (" endindex exceeds the matrix size" ));
240300 end
241301
242302 counts = countmap (M[rowindex: endindex, :]);
@@ -245,11 +305,11 @@ function objective(M, rowindex::Int; endindex::Int=0)
245305 return weights * (A * C) / (1 + Gaps)
246306end
247307
248- function createswarm (rowindex:: Int64 , row )
249- unique = Set (skipmissing (row ))
308+ function createswarm (rowindex:: Int64 , M )
309+ unique = Set (skipmissing (M[rowindex, :] ))
250310 swarm = Vector {Particle} (undef, length (unique))
251311 for (i, c) in enumerate (unique)
252- swarm[i] = Particle (c, getposition (rowindex, row, c ))
312+ swarm[i] = Particle (c, getposition (c, rowindex, M ))
253313 end
254314 return swarm
255315end
0 commit comments