Skip to content

Commit f12d64b

Browse files
authored
Allow join to work with BioSymbol (#234)
Previously, it was only possible to join BioSequences. Now, one can call join(LongRNA{2}, (RNA_U, rna"AGCA")) and have it work. Also skip unneeded calls to resize! when joining
1 parent fe5aabd commit f12d64b

File tree

4 files changed

+37
-10
lines changed

4 files changed

+37
-10
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file.
44
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
55
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
66

7+
## [UNRELEASED]
8+
9+
### Added
10+
* It is now possible to `join` BioSymbols into a BioSequence.
11+
712
## [3.0.1]
813
### Removed
914

src/biosequence/biosequence.jl

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -100,13 +100,13 @@ end
100100

101101
# Fast path for iterables we know are stateless
102102
function join!(seq::BioSequence, it::Union{Vector, Tuple, Set})
103-
_join!(resize!(seq, sum(length, it, init=0)), it, Val(true))
103+
_join!(resize!(seq, sum(joinlen, it, init=0)), it, Val(true))
104104
end
105105

106106
"""
107107
join!(seq::BioSequence, iter)
108108
109-
Concatenate all biosequences in `iter` into `seq`, resizing it to fit.
109+
Concatenate all biosequences/biosymbols in `iter` into `seq`, resizing it to fit.
110110
111111
# Examples
112112
```
@@ -122,19 +122,27 @@ join!(seq::BioSequence, it) = _join!(seq, it, Val(false))
122122
# B is whether the size of the destination seq is already
123123
# known to be the final size
124124
function _join!(seq::BioSequence, it, ::Val{B}) where B
125-
index = 1
125+
len = 0
126+
oldlen = length(seq)
126127
for i in it
127-
B || resize!(seq, length(seq) + length(i))
128-
copyto!(seq, index, i, 1, length(i))
129-
index += length(i)
128+
pluslen = joinlen(i)
129+
if !B && oldlen < (len + pluslen)
130+
resize!(seq, len + pluslen)
131+
end
132+
if i isa BioSymbol
133+
seq[len + 1] = i
134+
else
135+
copyto!(seq, len + 1, i, 1, length(i))
136+
end
137+
len += pluslen
130138
end
131139
seq
132140
end
133141

134142
"""
135143
join(::Type{T <: BioSequence}, seqs)
136144
137-
Concatenate all the `seqs` to a biosequence of type `T`.
145+
Concatenate all the biosequences/biosymbols in `seqs` to a biosequence of type `T`.
138146
139147
# Examples
140148
```
@@ -146,9 +154,12 @@ TAGAAC
146154
see also [`join!`](@ref)
147155
"""
148156
function Base.join(::Type{T}, it::Union{Vector, Tuple, Set}) where {T <: BioSequence}
149-
_join!(T(undef, sum(length, it, init=0)), it, Val(true))
157+
_join!(T(undef, sum(joinlen, it, init=0)), it, Val(true))
150158
end
151159

160+
# length is intentionally not implemented for BioSymbol
161+
joinlen(x::Union{BioSequence, BioSymbol}) = x isa BioSymbol ? 1 : length(x)
162+
152163
function Base.join(::Type{T}, it) where {T <: BioSequence}
153164
_join!(empty(T), it, Val(false))
154165
end

test/biosequences/biosequence.jl

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,16 @@ random_simple(len::Integer) = SimpleSeq(rand([RNA_A, RNA_C, RNA_G, RNA_U], len))
4949

5050
seq2 = SimpleSeq([RNA_U, RNA_C, RNA_U])
5151
gen = (i for i in [seq, seq2])
52-
@test join!(SimpleSeq([]), [seq, seq2]) == SimpleSeq([RNA(i) for i in "CGUUCU"])
53-
@test join!(SimpleSeq([]), gen) == SimpleSeq([RNA(i) for i in "CGUUCU"])
52+
newseq = SimpleSeq([])
53+
join!(newseq, [seq, seq2])
54+
@test newseq == SimpleSeq([RNA(i) for i in "CGUUCU"])
55+
join!(newseq, gen)
56+
@test newseq == SimpleSeq([RNA(i) for i in "CGUUCU"])
57+
join!(newseq, [RNA_U, RNA_C, SimpleSeq([RNA_G, RNA_C])])
58+
@test newseq == SimpleSeq([RNA(i) for i in "UCGC"])
5459
@test join(SimpleSeq, [seq, seq2]) == join!(SimpleSeq([]), [seq, seq2])
5560
@test join(SimpleSeq, gen) == join!(SimpleSeq([]), gen)
61+
@test join(SimpleSeq, [RNA_U, RNA_G, seq, RNA_U]) == SimpleSeq([RNA(i) for i in "UGCGUU"])
5662

5763
@test copy!(SimpleSeq([]), seq) == seq
5864
seq3 = copy(seq2)

test/longsequences/basics.jl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,11 @@ end
263263

264264
base_aa_seq = aa"KMAEEHPAIYWLMN"
265265
test_join(LongAA, (aa"KMVLE", aa"", (@view base_aa_seq[3:6])), aa"KMVLEAEEH")
266+
267+
# Joining seqs and symbols
268+
test_join(LongAA, [AA_G, AA_P, AA_L, aa"MNVWEED", AA_K], aa"GPLMNVWEEDK")
269+
test_join(LongRNA{4}, [RNA_M, RNA_U, RNA_S, rna"AGCGSK"], rna"MUSAGCGSK")
270+
test_join(LongDNA{2}, [dna"ATGCTTA", DNA_G, DNA_G, DNA_A, DNA_A, DNA_A], dna"ATGCTTAGGAAA")
266271
end
267272

268273
@testset "Length" begin

0 commit comments

Comments
 (0)