Skip to content

Commit 2865850

Browse files
author
Ben J. Ward
committed
Implement Base.broadcastable for BioSequence types.
1 parent 6f8c351 commit 2865850

File tree

3 files changed

+281
-0
lines changed

3 files changed

+281
-0
lines changed

src/biosequence/biosequence.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,3 +157,4 @@ include("printing.jl")
157157
include("transformations.jl")
158158
include("counting.jl")
159159
include("copying.jl")
160+
include("seqbroadcast.jl")

src/biosequence/seqbroadcast.jl

Lines changed: 269 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,269 @@
1+
###
2+
### Broadcasting with BioSequences.
3+
###
4+
### Customising the broadcasting machinery for BioSequences.
5+
###
6+
### This file is a part of BioJulia.
7+
### License is MIT: https://github.com/BioJulia/BioSequences.jl/blob/master/LICENSE.md
8+
9+
10+
# First we ensure that BioSequence types are actually passed to the broadcast
11+
# machinery.
12+
#
13+
# By default, this would otherwise `collect` the sequence into a `Vector` of
14+
# `DNA` or `RNA` etc. This wastes time and memory, when broadcasting should
15+
# just be able to use the BioSequence itself as if it were an array with a
16+
# a single dimension, akin to Vector.
17+
Base.broadcastable(x::BioSequence) = x
18+
19+
20+
21+
#=
22+
23+
Let's have a look at the example of broadcasting addition with a vector.
24+
25+
@less broadcast(+, [1,2,3], 1)
26+
===
27+
broadcast(f::Tf, As...) where {Tf} = materialize(broadcasted(f, As...))
28+
29+
30+
@less Base.broadcasted(+, [1,2,3], 1)
31+
===
32+
@inline function broadcasted(f, arg1, arg2, args...)
33+
arg1′ = broadcastable(arg1)
34+
arg2′ = broadcastable(arg2)
35+
args′ = map(broadcastable, args)
36+
broadcasted(combine_styles(arg1′, arg2′, args′...), f, arg1′, arg2′, args′...)
37+
end
38+
@inline broadcasted(::S, f, args...) where S<:BroadcastStyle = Broadcasted{S}(f, args)
39+
40+
bc = Base.broadcasted(Base.Broadcast.combine_styles(arg1, arg2), +, arg1, arg2)
41+
Base.Broadcast.Broadcasted(+, ([1, 2, 3], 1))
42+
43+
typeof(bc)
44+
Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(+), Tuple{Vector{Int64}, Int64}}
45+
46+
47+
@less Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}}(+, (arg1, arg2))
48+
===
49+
function Broadcasted{Style}(f::F, args::Args, axes=nothing) where {Style, F, Args<:Tuple}
50+
# using Core.Typeof rather than F preserves inferrability when f is a type
51+
Broadcasted{Style, typeof(axes), Core.Typeof(f), Args}(f, args, axes)
52+
end
53+
54+
55+
@less materialize(bc)
56+
===
57+
@inline materialize(bc::Broadcasted) = copy(instantiate(bc))
58+
materialize(x) = x
59+
60+
61+
@less Base.Broadcast.instantiate(bc)
62+
===
63+
@inline function instantiate(bc::Broadcasted{Style}) where {Style}
64+
if bc.axes isa Nothing # Not done via dispatch to make it easier to extend instantiate(::Broadcasted{Style})
65+
axes = combine_axes(bc.args...)
66+
else
67+
axes = bc.axes
68+
check_broadcast_axes(axes, bc.args...)
69+
end
70+
return Broadcasted{Style}(bc.f, bc.args, axes)
71+
end
72+
73+
74+
@less Base.Broadcast.combine_axes(bc.args...)
75+
===
76+
@inline combine_axes(A, B...) = broadcast_shape(axes(A), combine_axes(B...))
77+
@inline combine_axes(A, B) = broadcast_shape(axes(A), axes(B))
78+
79+
80+
@less @less Base.Broadcast.broadcast_shape(axes([1,2,3]), axes(1))
81+
===
82+
broadcast_shape(shape::Tuple) = shape
83+
broadcast_shape(shape::Tuple, shape1::Tuple, shapes::Tuple...) = broadcast_shape(_bcs(shape, shape1), shapes...)
84+
85+
86+
@less Base.Broadcast._bcs(axes([1,2,3]), axes(1))
87+
===
88+
_bcs(shape::Tuple, ::Tuple{}) = (shape[1], _bcs(tail(shape), ())...)
89+
90+
_bcs consolidates two shapes into a single output shape
91+
92+
shape = axes([1,2,3])
93+
@less Base.Broadcast._bcs(Base.tail(shape), ())
94+
===
95+
_bcs(::Tuple{}, ::Tuple{}) = ()
96+
()
97+
98+
(shape[1], _bcs(tail(shape), ())...)
99+
100+
101+
@less copy(Base.Broadcast.instantiate(bc))
102+
===
103+
@inline function copy(bc::Broadcasted{Style}) where {Style}
104+
ElType = combine_eltypes(bc.f, bc.args)
105+
if Base.isconcretetype(ElType)
106+
# We can trust it and defer to the simpler `copyto!`
107+
return copyto!(similar(bc, ElType), bc)
108+
end
109+
# When ElType is not concrete, use narrowing. Use the first output
110+
# value to determine the starting output eltype; copyto_nonleaf!
111+
# will widen `dest` as needed to accommodate later values.
112+
bc′ = preprocess(nothing, bc)
113+
iter = eachindex(bc′)
114+
y = iterate(iter)
115+
if y === nothing
116+
# if empty, take the ElType at face value
117+
return similar(bc′, ElType)
118+
end
119+
# Initialize using the first value
120+
I, state = y
121+
@inbounds val = bc′[I]
122+
dest = similar(bc′, typeof(val))
123+
@inbounds dest[I] = val
124+
# Now handle the remaining values
125+
# The typeassert gives inference a helping hand on the element type and dimensionality
126+
# (work-around for #28382)
127+
ElType′ = ElType === Union{} ? Any : ElType <: Type ? Type : ElType
128+
RT = dest isa AbstractArray ? AbstractArray{<:ElType′, ndims(dest)} : Any
129+
return copyto_nonleaf!(dest, bc′, iter, state, 1)::RT
130+
end
131+
132+
133+
@less Base.Broadcast.combine_eltypes(bc.f, bc.args)
134+
===
135+
combine_eltypes(f, args::Tuple) = promote_typejoin_union(Base._return_type(f, eltypes(args)))
136+
137+
@less Base.Broadcast.eltypes(bc.args)
138+
139+
@less Base._return_type(+, Base.Broadcast.eltypes(bc.args))
140+
141+
@less Base.Broadcast.promote_typejoin_union(Base._return_type(+, Base.Broadcast.eltypes(bc.args)))
142+
===
143+
function promote_typejoin_union(::Type{T}) where T
144+
if T === Union{}
145+
return Union{}
146+
elseif T isa UnionAll
147+
return Any # TODO: compute more precise bounds
148+
elseif T isa Union
149+
return promote_typejoin(promote_typejoin_union(T.a), promote_typejoin_union(T.b))
150+
elseif T <: Tuple
151+
return typejoin_union_tuple(T)
152+
else
153+
return T
154+
end
155+
end
156+
157+
158+
@less similar(bc, Base.Broadcast.combine_eltypes(bc.f, bc.args))
159+
===
160+
Base.similar(bc::Broadcasted, ::Type{T}) where {T} = similar(bc, T, axes(bc))
161+
Base.similar(::Broadcasted{DefaultArrayStyle{N}}, ::Type{ElType}, dims) where {N,ElType} = similar(Array{ElType}, dims)
162+
Base.similar(::Broadcasted{DefaultArrayStyle{N}}, ::Type{Bool}, dims) where N = similar(BitArray, dims)
163+
164+
@less copyto!(similar(bc, Base.Broadcast.combine_eltypes(bc.f, bc.args)), bc)
165+
===
166+
## general `copyto!` methods
167+
# The most general method falls back to a method that replaces Style->Nothing
168+
# This permits specialization on typeof(dest) without introducing ambiguities
169+
@inline copyto!(dest::AbstractArray, bc::Broadcasted) = copyto!(dest, convert(Broadcasted{Nothing}, bc))
170+
171+
172+
@less copyto!(dest, convert(Base.Broadcast.Broadcasted{Nothing}, bc))
173+
===
174+
@inline function copyto!(dest::AbstractArray, bc::Broadcasted{Nothing})
175+
axes(dest) == axes(bc) || throwdm(axes(dest), axes(bc))
176+
# Performance optimization: broadcast!(identity, dest, A) is equivalent to copyto!(dest, A) if indices match
177+
if bc.f === identity && bc.args isa Tuple{AbstractArray} # only a single input argument to broadcast!
178+
A = bc.args[1]
179+
if axes(dest) == axes(A)
180+
return copyto!(dest, A)
181+
end
182+
end
183+
bc′ = preprocess(dest, bc)
184+
# Performance may vary depending on whether `@inbounds` is placed outside the
185+
# for loop or not. (cf. https://github.com/JuliaLang/julia/issues/38086)
186+
@inbounds @simd for I in eachindex(bc′)
187+
dest[I] = bc′[I]
188+
end
189+
return dest
190+
end
191+
192+
193+
@less Base.Broadcast.preprocess(dest, bc)
194+
===
195+
# Preprocessing a `Broadcasted` does two things:
196+
# * unaliases any arguments from `dest`
197+
# * "extrudes" the arguments where it is advantageous to pre-compute the broadcasted indices
198+
@inline preprocess(dest, bc::Broadcasted{Style}) where {Style} = Broadcasted{Style}(bc.f, preprocess_args(dest, bc.args), bc.axes)
199+
preprocess(dest, x) = extrude(broadcast_unalias(dest, x))
200+
201+
@inline preprocess_args(dest, args::Tuple) = (preprocess(dest, args[1]), preprocess_args(dest, tail(args))...)
202+
preprocess_args(dest, args::Tuple{Any}) = (preprocess(dest, args[1]),)
203+
preprocess_args(dest, args::Tuple{}) = ()
204+
205+
206+
Base.Broadcast.preprocess(dest, bc.args[1])
207+
@less Base.Broadcast.broadcast_unalias(dest, bc.args[1])
208+
===
209+
# For broadcasted assignments like `broadcast!(f, A, ..., A, ...)`, where `A`
210+
# appears on both the LHS and the RHS of the `.=`, then we know we're only
211+
# going to make one pass through the array, and even though `A` is aliasing
212+
# against itself, the mutations won't affect the result as the indices on the
213+
# LHS and RHS will always match. This is not true in general, but with the `.op=`
214+
# syntax it's fairly common for an argument to be `===` a source.
215+
broadcast_unalias(dest, src) = dest === src ? src : unalias(dest, src)
216+
broadcast_unalias(::Nothing, src) = src
217+
218+
219+
@less Base.Broadcast.unalias(dest, bc.args[1])
220+
===
221+
## rudimentary aliasing detection ##
222+
"""
223+
Base.unalias(dest, A)
224+
225+
Return either `A` or a copy of `A` in a rough effort to prevent modifications to `dest` from
226+
affecting the returned object. No guarantees are provided.
227+
228+
Custom arrays that wrap or use fields containing arrays that might alias against other
229+
external objects should provide a [`Base.dataids`](@ref) implementation.
230+
231+
This function must return an object of exactly the same type as `A` for performance and type
232+
stability. Mutable custom arrays for which [`copy(A)`](@ref) is not `typeof(A)` should
233+
provide a [`Base.unaliascopy`](@ref) implementation.
234+
235+
See also [`Base.mightalias`](@ref).
236+
"""
237+
unalias(dest, A::AbstractArray) = mightalias(dest, A) ? unaliascopy(A) : A
238+
unalias(dest, A::AbstractRange) = A
239+
unalias(dest, A) = A
240+
241+
"""
242+
Base.mightalias(A::AbstractArray, B::AbstractArray)
243+
244+
Perform a conservative test to check if arrays `A` and `B` might share the same memory.
245+
246+
By default, this simply checks if either of the arrays reference the same memory
247+
regions, as identified by their [`Base.dataids`](@ref).
248+
"""
249+
mightalias(A::AbstractArray, B::AbstractArray) = !isbits(A) && !isbits(B) && !_isdisjoint(dataids(A)
250+
, dataids(B))
251+
mightalias(x, y) = false
252+
253+
"""
254+
Base.dataids(A::AbstractArray)
255+
256+
Return a tuple of `UInt`s that represent the mutable data segments of an array.
257+
258+
Custom arrays that would like to opt-in to aliasing detection of their component
259+
parts can specialize this method to return the concatenation of the `dataids` of
260+
their component parts. A typical definition for an array that wraps a parent is
261+
`Base.dataids(C::CustomArray) = dataids(C.parent)`.
262+
"""
263+
dataids(A::AbstractArray) = (UInt(objectid(A)),)
264+
dataids(A::Array) = (UInt(pointer(A)),)
265+
dataids(::AbstractRange) = ()
266+
dataids(x) = ()
267+
268+
=#
269+

src/longsequences/indexing.jl

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,3 +69,14 @@ end
6969

7070
# Broadcasting
7171

72+
#Base.axes(x::BioSymbol) = ()
73+
#Base.ndims(::Type{<:BioSymbol}) = 0
74+
#Base.ndims(x::BioSymbol) = 0
75+
#Base.broadcastable(x::BioSymbol) = x
76+
77+
#Base.broadcastable(x::LongSequence) = x
78+
#Base.getindex(x::LongSequence, i::Base.CartesianIndex{1}) = x[first(i.I)]
79+
80+
#Base.BroadcastStyle(::Type{T}) where {T<:LongSequence} = Broadcast.Style{T}()
81+
82+
#Base.BroadcastStyle(::Broadcast.Style{<:LongSequence}, ::Broadcast.DefaultArrayStyle{N}) where {N} = Broadcast.DefaultArrayStyle{N}()

0 commit comments

Comments
 (0)