Skip to content

Commit d134ccb

Browse files
committed
docs: update docs
1 parent e5c64f8 commit d134ccb

File tree

1 file changed

+75
-15
lines changed

1 file changed

+75
-15
lines changed

src/NetMSA.jl

Lines changed: 75 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,47 @@
1+
"""
2+
This module provides an implementation of NetMSA algorithm in Julia, which
3+
can be used for multiple sequence alignment.
4+
"""
15
module NetMSA
26

37
using StatsBase
48

59
export createPeerMatrix, matrixalignment
610

11+
@doc raw"""
12+
Store the position of a given particle. Position ``x_s_i (r)`` of the particle
13+
``p_s_i`` is defined by using the row ``r`` that contains the symbol ``s_i`` as
14+
well as locations of the symbol ``s_i`` in the different columns (indexes of the
15+
columns that contain ``s_i`` ) in the row ``r``.
16+
"""
717
mutable struct Position
818
row::Int64
919
indexes::Vector{Int64}
1020
end
1121

22+
"""
23+
A particle that is used for creating swarms.
24+
25+
# Fields
26+
- value::Char : Value of the particle, e.g. 'b' or 'c'
27+
- updated::Int64 : Number of turns till last updated
28+
- pos::Position : The original position of the particle
29+
- best::Position : The best local position of the particle
30+
- bestvalue::Float64 : Best local score
31+
"""
1232
mutable struct Particle
1333
value::Char
1434
updated::Int64
1535
pos::Position
1636
best::Position
1737
bestvalue::Float64
1838

39+
"""
40+
Particle(value::Char, pos::Position)
41+
42+
Initializes the Particle, where the best position is intialized
43+
to be the current position.
44+
"""
1945
function Particle(value::Char, pos::Position)
2046
return new(value, 0, pos, pos, 0.0)
2147
end
@@ -30,7 +56,7 @@ matrix by the `missing` keyword.
3056
3157
# Examples
3258
```jldoctest
33-
julia> createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
59+
julia> NetMSA.createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
3460
8×4 Array{Union{Missing, Char},2}:
3561
'a' 'a' 'a' 'a'
3662
'b' 'c' 'b' 'b'
@@ -61,7 +87,7 @@ represented by `value`, at the `rowindex` in the `matrix`.
6187
6288
# Examples
6389
```jldoctest
64-
julia> M = createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
90+
julia> M = NetMSA.createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
6591
8×4 Array{Union{Missing, Char},2}:
6692
'a' 'a' 'a' 'a'
6793
'b' 'c' 'b' 'b'
@@ -89,7 +115,7 @@ with its frequency.
89115
90116
# Examples
91117
```jldoctest
92-
julia> M = createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
118+
julia> M = NetMSA.createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
93119
8×4 Array{Union{Missing, Char},2}:
94120
'a' 'a' 'a' 'a'
95121
'b' 'c' 'b' 'b'
@@ -120,7 +146,7 @@ A row is *aligned* if it only contains different occurrences of the same symbol.
120146
121147
# Examples
122148
```jldoctest
123-
julia> M = createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
149+
julia> M = NetMSA.createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
124150
8×4 Array{Union{Missing, Char},2}:
125151
'a' 'a' 'a' 'a'
126152
'b' 'c' 'b' 'b'
@@ -154,7 +180,7 @@ the number of columns in the matrix.
154180
155181
# Examples
156182
```jldoctest
157-
julia> M = createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
183+
julia> M = NetMSA.createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
158184
8×4 Array{Union{Missing, Char},2}:
159185
'a' 'a' 'a' 'a'
160186
'b' 'c' 'b' 'b'
@@ -188,14 +214,14 @@ w(r) = \begin{cases}
188214
w_3; & \text{ if r is full} \\
189215
\end{cases}
190216
```
191-
where ``n_s`` is the number of occurrences of the symbol s in the aligned row r,
192-
and c is the total number of columns in the row. The value of x is equal to zero
193-
if every symbol in the row r occurred at most once, otherwise x is equal to the
194-
max number of occurrences (matches) of some symbol in r.
217+
where ``n_s`` is the number of occurrences of the symbol ``s`` in the aligned row ``r``,
218+
and ``c`` is the total number of columns in the row. The value of ``x`` is equal to zero
219+
if every symbol in the row ``r`` occurred at most once, otherwise ``x`` is equal to the
220+
max number of occurrences (matches) of some symbol in ``r``.
195221
196222
# Examples
197223
```jldoctest
198-
julia> M = createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
224+
julia> M = NetMSA.createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
199225
8×4 Array{Union{Missing, Char},2}:
200226
'a' 'a' 'a' 'a'
201227
'b' 'c' 'b' 'b'
@@ -229,14 +255,48 @@ function weight(row; w1=0.25, w2=0.5, w3=1.0)
229255
end
230256
end
231257

232-
function objective(M, rowindex::Int; endindex::Int=0)
258+
@doc raw"""
259+
objective(M, rowindex; endindex=0)
260+
261+
Return the objective score of the row, calculated as follows:
262+
263+
```math
264+
f(x_s(r)) = \frac{A(r) \times C(r)}{1 + Gaps(r)} \times \sum_{j=r}^{k} w(j)
265+
```
266+
where ``A(r)`` is the number of aligned rows in M from ``r`` to the last row,
267+
``C(r)`` is the maximum number of matched charachters in the current row,
268+
``Gaps(r)`` is the number of gaps added to the matrix M from row ``r`` to the
269+
last row, and ``w(r)`` is the weight of the row ``r``.
270+
271+
272+
`endindex` is used to reduce the search area for Gaps, and if it is not provided,
273+
it would default to `size(M)[1]`.
274+
275+
# Examples
276+
```jldoctest
277+
julia> M = NetMSA.createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
278+
8×4 Array{Union{Missing, Char},2}:
279+
'a' 'a' 'a' 'a'
280+
'b' 'c' 'b' 'b'
281+
'c' 'b' 'c' 'c'
282+
'b' 'c' 'h' 'b'
283+
'c' 'f' 'i' 'c'
284+
'd' 'g' 'm' 'j'
285+
'e' missing 'n' 'k'
286+
'm' missing missing 'm'
287+
288+
juila> NetMSA.objective(M, 2)
289+
2.625
290+
```
291+
"""
292+
function objective(M, rowindex; endindex=0)
233293
weights = sum(weight.(eachrow(M[rowindex:end, :])))
234294
C = mostfrequent(M[rowindex, :])[1];
235295
A = sum(aligned.(eachrow(M))[rowindex:end])
236296

237297
endindex = endindex == 0 ? size(M)[1] : endindex;
238298
if endindex > size(M)[1]
239-
throw(ArgumentError("endind exceeds the matrix size"));
299+
throw(ArgumentError("endindex exceeds the matrix size"));
240300
end
241301

242302
counts = countmap(M[rowindex:endindex, :]);
@@ -245,11 +305,11 @@ function objective(M, rowindex::Int; endindex::Int=0)
245305
return weights * (A * C) / (1 + Gaps)
246306
end
247307

248-
function createswarm(rowindex::Int64, row)
249-
unique = Set(skipmissing(row))
308+
function createswarm(rowindex::Int64, M)
309+
unique = Set(skipmissing(M[rowindex, :]))
250310
swarm = Vector{Particle}(undef, length(unique))
251311
for (i, c) in enumerate(unique)
252-
swarm[i] = Particle(c, getposition(rowindex, row, c))
312+
swarm[i] = Particle(c, getposition(c, rowindex, M))
253313
end
254314
return swarm
255315
end

0 commit comments

Comments
 (0)