Skip to content

Commit 160f255

Browse files
committed
docs: update docs and add test cases
1 parent d134ccb commit 160f255

File tree

2 files changed

+240
-48
lines changed

2 files changed

+240
-48
lines changed

src/NetMSA.jl

Lines changed: 186 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,30 @@ function objective(M, rowindex; endindex=0)
305305
return weights * (A * C) / (1 + Gaps)
306306
end
307307

308+
"""
309+
createswarm(rowindex::Int64, M)
310+
311+
Create a swarm containing unique Particles in the current row.
312+
313+
# Examples
314+
```jldoctest
315+
julia> M = NetMSA.createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
316+
8×4 Array{Union{Missing, Char},2}:
317+
'a' 'a' 'a' 'a'
318+
'b' 'c' 'b' 'b'
319+
'c' 'b' 'c' 'c'
320+
'b' 'c' 'h' 'b'
321+
'c' 'f' 'i' 'c'
322+
'd' 'g' 'm' 'j'
323+
'e' missing 'n' 'k'
324+
'm' missing missing 'm'
325+
326+
juila> NetMSA.createswarm(2, M)
327+
2-element Array{NetMSA.Particle,1}:
328+
NetMSA.Particle('c', 0, NetMSA.Position(2, [2]), NetMSA.Position(2, [2]), 0.0)
329+
NetMSA.Particle('b', 0, NetMSA.Position(2, [1, 3, 4]), NetMSA.Position(2, [1, 3, 4]), 0.0)
330+
```
331+
"""
308332
function createswarm(rowindex::Int64, M)
309333
unique = Set(skipmissing(M[rowindex, :]))
310334
swarm = Vector{Particle}(undef, length(unique))
@@ -314,24 +338,50 @@ function createswarm(rowindex::Int64, M)
314338
return swarm
315339
end
316340

317-
function criteria3(p::Particle, M, newindex)
318-
# display(newindex)
319-
# display(M[newindex, :])
320-
# display(length(p.pos.indexes) != length(getposition(newindex, M[newindex, :], p.value).indexes))
321-
return length(p.pos.indexes) != length(getposition(newindex, M[newindex, :], p.value).indexes)
341+
function criteria3(p::Particle, newindex::Int, M)
342+
return length(p.pos.indexes) != length(getposition(p.value, newindex, M).indexes)
322343
end
323344

324-
function criteria2(p::Particle)
325-
return p.updated > 6;
345+
function criteria2(p::Particle; threshold::Int=5)
346+
return p.updated > threshold;
326347
end
327348

328-
function stopcriteria(p::Particle, M, t)
329-
c3 = criteria3(p, M, t);
330-
c2 = criteria2(p);
331-
if c3
332-
display("Terminating cause of criteria 3")
333-
elseif c2
334-
display("Terminating cause of criteria 2")
349+
"""
350+
stopcriteria(p::Particle, newindex, M; threshold::Int=5, debug=false)
351+
352+
Check whether a stopping criteria has been met. Two stopping criteria
353+
are checked in this function:
354+
- Criteria 2: If a particle hasn't updated its best score in the last `threshold` turns.
355+
- Criteria 3: If a particle moves to a new row which already contains the same symbol as that of the particle.
356+
357+
# Examples
358+
```jldoctest
359+
julia> M = NetMSA.createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
360+
8×4 Array{Union{Missing, Char},2}:
361+
'a' 'a' 'a' 'a'
362+
'b' 'c' 'b' 'b'
363+
'c' 'b' 'c' 'c'
364+
'b' 'c' 'h' 'b'
365+
'c' 'f' 'i' 'c'
366+
'd' 'g' 'm' 'j'
367+
'e' missing 'n' 'k'
368+
'm' missing missing 'm'
369+
370+
juila> p = NetMSA.Particle('b', NetMSA.getposition('b', 2, M));
371+
NetMSA.Particle('b', 0, Main.NetMSA.Position(2, [1, 3, 4]), Main.NetMSA.Position(2, [1, 3, 4]), 0.0)
372+
373+
julia> NetMSA.stopcriteria(p, 3, M; debug=true)
374+
"Terminating because of criteria 3"
375+
true
376+
```
377+
"""
378+
function stopcriteria(p::Particle, newindex, M; threshold::Int=5, debug=false)
379+
c3 = criteria3(p, newindex, M);
380+
c2 = criteria2(p; threshold=threshold);
381+
if debug && c3
382+
display("Terminating because of criteria 3")
383+
elseif debug && c2
384+
display("Terminating because of criteria 2")
335385
end
336386
return c3 || c2;
337387
end
@@ -340,91 +390,179 @@ function remove_missing_rows(M)
340390
return M[[length(Set(skipmissing(r))) != 0 for r in eachrow(M)], :]
341391
end
342392

393+
"""
394+
flydown(p, M; stride=1)
395+
396+
Fly down the given particle by `stride`.
397+
398+
# Examples
399+
```jldoctest
400+
julia> M = NetMSA.createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
401+
8×4 Array{Union{Missing, Char},2}:
402+
'a' 'a' 'a' 'a'
403+
'b' 'c' 'b' 'b'
404+
'c' 'b' 'c' 'c'
405+
'b' 'c' 'h' 'b'
406+
'c' 'f' 'i' 'c'
407+
'd' 'g' 'm' 'j'
408+
'e' missing 'n' 'k'
409+
'm' missing missing 'm'
410+
411+
juila> p = NetMSA.Particle('b', NetMSA.getposition('b', 2, M));
412+
NetMSA.Particle('b', 0, Main.NetMSA.Position(2, [1, 3, 4]), Main.NetMSA.Position(2, [1, 3, 4]), 0.0)
413+
414+
julia> NetMSA.flydown(p, M)
415+
9×4 Array{Union{Missing, Char},2}:
416+
'a' 'a' 'a' 'a'
417+
'-' 'c' '-' '-'
418+
'b' 'b' 'b' 'b'
419+
'c' 'c' 'c' 'c'
420+
'b' 'f' 'h' 'b'
421+
'c' 'g' 'i' 'c'
422+
'd' missing 'm' 'j'
423+
'e' missing 'n' 'k'
424+
'm' missing missing 'm'
425+
426+
julia> NetMSA.flydown(p, M; stride=3)
427+
11×4 Array{Union{Missing, Char},2}:
428+
'a' 'a' 'a' 'a'
429+
'-' 'c' '-' '-'
430+
'-' 'b' '-' '-'
431+
'-' 'c' '-' '-'
432+
'b' 'f' 'b' 'b'
433+
'c' 'g' 'c' 'c'
434+
'b' missing 'h' 'b'
435+
'c' missing 'i' 'c'
436+
'd' missing 'm' 'j'
437+
'e' missing 'n' 'k'
438+
'm' missing missing 'm'
439+
```
440+
"""
343441
function flydown(p, M; stride=1)
344442
notpcols = setdiff(collect(1:size(M, 2)), p.pos.indexes)
345443
colsize = size(M, 2)
346444
pos = p.pos
347445
newrows = fill('-', (stride, colsize))
348446
M = vcat(M[1:pos.row - 1, :], reshape(newrows, stride, colsize), M[pos.row:end, :])
349-
display(M)
350447
for i in collect(pos.row + stride:size(M, 1))
351448
M[i - stride,notpcols] = M[i, notpcols]
352449
M[i, notpcols] .= missing
353450
end
354-
display(M)
355451
M = remove_missing_rows(M)
356-
return M
357452
end
358453

359-
function rowalignment(r, M)
360-
row = M[r, :];
361-
# display(row);
362-
# println(aligned(row))
454+
"""
455+
rowalignment(rowindex, M)
456+
457+
Return Particle with best position that aligns (maximizes the objective score)
458+
the given row in the matrix.
459+
460+
# Examples
461+
```jldoctest
462+
julia> M = NetMSA.createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
463+
8×4 Array{Union{Missing, Char},2}:
464+
'a' 'a' 'a' 'a'
465+
'b' 'c' 'b' 'b'
466+
'c' 'b' 'c' 'c'
467+
'b' 'c' 'h' 'b'
468+
'c' 'f' 'i' 'c'
469+
'd' 'g' 'm' 'j'
470+
'e' missing 'n' 'k'
471+
'm' missing missing 'm'
472+
473+
juila> p = NetMSA.Particle('b', NetMSA.getposition('b', 2, M));
474+
NetMSA.Particle('b', 0, Main.NetMSA.Position(2, [1, 3, 4]), Main.NetMSA.Position(2, [1, 3, 4]), 0.0)
475+
476+
julia> NetMSA.rowalignment(2, M)
477+
NetMSA.Particle('c', 0, Main.NetMSA.Position(2, [2]), Main.NetMSA.Position(3, [1]), 9.0)
478+
```
479+
"""
480+
function rowalignment(rowindex, M)
481+
row = M[rowindex, :];
363482
if aligned(row)
364-
# println("aligned");
365483
return nothing;
366-
end
484+
end
367485

368-
swarm = createswarm(r, row);
486+
swarm = createswarm(rowindex, M);
369487

370488
gₒ = g = swarm[1];
371-
gₒvalue = gvalue = objective(M, r, endindex=r);
489+
gₒvalue = gvalue = objective(M, rowindex, endindex=rowindex);
372490
cols = size(row, 1)
373491

374492
for p in swarm
375493

376-
t = r;
494+
t = rowindex;
377495
N = copy(M);
378496

379-
p.bestvalue = objective(M, r, endindex=t)
380-
# display("Aligning $p");
381-
# display(N)
382-
# display(p)
497+
p.bestvalue = objective(M, rowindex, endindex=t)
383498

384499
missingp = setdiff(collect(1:size(N, 2)), p.pos.indexes)
385500
maxlen = maximum([length(collect(skipmissing(col))) for col in eachcol(N[:, missingp])])
386501
criteria1 = maxlen;
387502

388-
# display(criteria1)
389503

390-
while stopcriteria(p, N, t) != true && t < criteria1
391-
# display(stopcriteria(p, N, t) != true)
504+
while stopcriteria(p, t, N) != true && t < criteria1
392505
t += 1;
393506
p.updated += 1;
394-
507+
395508
N = flydown(p, N);
396-
# display(N)
397-
display(p)
398-
score = objective(N, r);
399-
display(score);
400-
# display(p.bestvalue);
509+
score = objective(N, rowindex);
401510
if score > p.bestvalue
402511
p.bestvalue = score;
403512
p.updated = 0;
404513
end
405-
514+
406515
if score > gvalue
407-
gvalue = score;
516+
gvalue = score;
408517
g = deepcopy(p);
409-
g.best = getposition(t, N[t, :], p.value);
518+
g.best = getposition(p.value, t, N);
410519
g.bestvalue = score;
411-
end
412-
413-
414-
# display(p)
415-
416520
end
521+
417522
end
523+
end
418524

419525
if gvalue == gₒvalue
420526
return nothing;
421527
end
528+
422529
return g;
423530
end
424531

532+
533+
"""
534+
matrixalignment(M)
535+
536+
Align the matrix using NetMSA algorithm.
537+
538+
# Examples
539+
```jldoctest
540+
julia> M = NetMSA.createPeerMatrix(["abcbcdem", "acbcfg", "abchimn", "abcbcjkm"])
541+
8×4 Array{Union{Missing, Char},2}:
542+
'a' 'a' 'a' 'a'
543+
'b' 'c' 'b' 'b'
544+
'c' 'b' 'c' 'c'
545+
'b' 'c' 'h' 'b'
546+
'c' 'f' 'i' 'c'
547+
'd' 'g' 'm' 'j'
548+
'e' missing 'n' 'k'
549+
'm' missing missing 'm'
550+
551+
juila> NetMSA.matrixalignment(M)
552+
9×4 Array{Union{Missing, Char},2}:
553+
'a' 'a' 'a' 'a'
554+
'b' '-' 'b' 'b'
555+
'c' 'c' 'c' 'c'
556+
'b' 'b' '-' 'b'
557+
'c' 'c' '-' 'c'
558+
'd' 'f' 'h' 'j'
559+
'e' 'g' 'i' 'k'
560+
'm' '-' 'm' 'm'
561+
'-' '-' 'n' '-'
562+
```
563+
"""
425564
function matrixalignment(M)
426565
for (index, row) in enumerate(eachrow(M))
427-
# println("$index: $row")
428566
g = rowalignment(index, M)
429567
if !isnothing(g)
430568
M = flydown(g, M, stride=g.best.row - g.pos.row)

test/runtests.jl

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,58 @@ using Test
5050

5151
@test_throws ArgumentError NetMSA.objective(M, 2, endindex=9)
5252

53+
@test length(NetMSA.createswarm(2, M)) == 2;
54+
55+
@test length(NetMSA.createswarm(8, M)) == 1;
56+
57+
p = NetMSA.Particle('b', NetMSA.getposition('b', 2, M));
58+
59+
@test NetMSA.stopcriteria(p, 3, M) == true;
60+
61+
@test NetMSA.criteria3(p, 3, M) == true;
62+
63+
@test NetMSA.criteria2(p) == false;
64+
65+
p.updated = 6;
66+
67+
@test NetMSA.criteria2(p) == true;
68+
69+
@test NetMSA.stopcriteria(p, 2, M) == true;
70+
71+
newrow = [missing missing missing missing];
72+
73+
@test isequal(NetMSA.remove_missing_rows(vcat(M, newrow)), M);
74+
75+
N = [
76+
['a' 'a' 'a' 'a']
77+
['-' 'c' '-' '-']
78+
['b' 'b' 'b' 'b']
79+
['c' 'c' 'c' 'c']
80+
['b' 'f' 'h' 'b']
81+
['c' 'g' 'i' 'c']
82+
['d' missing 'm' 'j']
83+
['e' missing 'n' 'k']
84+
['m' missing missing 'm']
85+
]
86+
87+
@test isequal(NetMSA.flydown(p, M), N);
88+
89+
@test NetMSA.rowalignment(2, M).bestvalue == 9.0;
90+
91+
@test NetMSA.rowalignment(2, M).best.row == 3;
92+
93+
aligned_M = [
94+
['a' 'a' 'a' 'a']
95+
['b' '-' 'b' 'b']
96+
['c' 'c' 'c' 'c']
97+
['b' 'b' '-' 'b']
98+
['c' 'c' '-' 'c']
99+
['d' 'f' 'h' 'j']
100+
['e' 'g' 'i' 'k']
101+
['m' '-' 'm' 'm']
102+
['-' '-' 'n' '-']
103+
]
104+
105+
@test isequal(NetMSA.matrixalignment(M), aligned_M);
106+
53107
end

0 commit comments

Comments
 (0)