diff --git a/.github/workflows/Test.yml b/.github/workflows/Test.yml index 591bb3e2d..982cf5f83 100644 --- a/.github/workflows/Test.yml +++ b/.github/workflows/Test.yml @@ -125,7 +125,7 @@ jobs: - uses: actions/checkout@v5 - uses: julia-actions/setup-julia@v2 with: - version: '1.10' + version: '1.11' - name: Develop packages run: | julia -e " diff --git a/docs/Project.toml b/docs/Project.toml index afd6fcd38..022ce7094 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,5 +1,13 @@ [deps] Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" +GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" +JLArrays = "27aeb0d3-9eb9-45fb-866b-73c2ecf80fcb" [compact] Documenter = "1.8" + +[sources] +GPUArrays = {path = ".."} +GPUArraysCore = {path = "../lib/GPUArraysCore"} +JLArrays = {path = "../lib/JLArrays"} diff --git a/docs/make.jl b/docs/make.jl index a37b0cd9b..3b0d6d4f7 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,8 +1,8 @@ -using Documenter, GPUArrays +using Documenter, GPUArrays, GPUArraysCore, JLArrays function main() makedocs( - modules = [GPUArrays], + modules = [GPUArrays, GPUArraysCore, JLArrays], format = Documenter.HTML( # Use clean URLs on CI prettyurls = get(ENV, "CI", nothing) == "true", @@ -11,19 +11,15 @@ function main() ), sitename = "GPUArrays.jl", pages = [ - "Home" => "index.md", - "Interface" => "interface.md", - "Functionality" => [ - "functionality/host.md", - "functionality/device.md", - ], - "Test suite" => "testsuite.md", + "Home" => "index.md", + "interface.md", + "api.md", ], doctest = true, warnonly = [:missing_docs], ) - deploydocs( + return deploydocs( repo = "github.com/JuliaGPU/GPUArrays.jl.git" ) end diff --git a/docs/src/api.md b/docs/src/api.md new file mode 100644 index 000000000..77a911a7b --- /dev/null +++ b/docs/src/api.md @@ -0,0 +1,49 @@ +# API Reference + +## GPUArrays + +### Public + +```@autodocs +Modules = [GPUArrays] +Private = false +``` + +### Internals + +```@autodocs +Modules = [GPUArrays] +Public = false +``` + +## GPUArraysCore + +### Public + +```@autodocs +Modules = [GPUArraysCore] +Private = false +``` + +### Internals + +```@autodocs +Modules = [GPUArraysCore] +Public = false +``` + +## JLArrays + +### Public + +```@autodocs +Modules = [JLArrays] +Private = false +``` + +### Internals + +```@autodocs +Modules = [JLArrays] +Public = false +``` diff --git a/docs/src/functionality/device.md b/docs/src/functionality/device.md deleted file mode 100644 index 364aeedf1..000000000 --- a/docs/src/functionality/device.md +++ /dev/null @@ -1,3 +0,0 @@ -# `AbstractDeviceArray` - -TODO: describe functionality diff --git a/docs/src/functionality/host.md b/docs/src/functionality/host.md deleted file mode 100644 index dc450f323..000000000 --- a/docs/src/functionality/host.md +++ /dev/null @@ -1,3 +0,0 @@ -# `AbstractGPUArray` - -TODO: describe functionality diff --git a/docs/src/interface.md b/docs/src/interface.md index 9e4864ada..cef80e308 100644 --- a/docs/src/interface.md +++ b/docs/src/interface.md @@ -1,6 +1,6 @@ # Interface -To extend the above functionality to a new array type, you should use the types and +To extend the GPUArrays functionality to a new array type, you should use the types and implement the interfaces listed on this page. GPUArrays is designed around having two different array types to represent a GPU array: one that exists only on the host, and one that actually can be instantiated on the device (i.e. in kernels). @@ -31,9 +31,45 @@ KernelAbstractions.get_backend(a::CA) where CA <: CustomArray = CustomBackend() There are numerous examples of potential interfaces for GPUArrays, such as with [JLArrays](https://github.com/JuliaGPU/GPUArrays.jl/blob/master/lib/JLArrays/src/JLArrays.jl), [CuArrays](https://github.com/JuliaGPU/CUDA.jl/blob/master/src/gpuarrays.jl), and [ROCArrays](https://github.com/JuliaGPU/AMDGPU.jl/blob/master/src/gpuarrays.jl). -## Caching Allocator +## Device abstractions -```@docs -GPUArrays.@cached -GPUArrays.@uncached +!!! warning + Work in progress. + +## Test suite + +GPUArrays provides an extensive test suite that covers all of the functionality that should +be available after implementing the required interfaces. This test suite is part of this +package, but for dependency reasons it is not available when importing the package. Instead, +you should include the code from your `runtests.jl` as follows: + +```julia +import GPUArrays +gpuarrays = pathof(GPUArrays) +gpuarrays_root = dirname(dirname(gpuarrays)) +include(joinpath(gpuarrays_root, "test", "testsuite.jl")) +``` + +With this set-up, you can run the test suite like this: + +```julia +TestSuite.test(MyGPUArrayType) +``` + +If you don't want to run the whole suite, you can also run parts of it: + +```julia +T = JLArray +GPUArrays.allowscalar(false) # fail tests when slow indexing path into Array type is used. + +TestSuite.test_gpuinterface(T) # interface functions like gpu_call, threadidx, etc +TestSuite.test_base(T) # basic functionality like launching a kernel on the GPU and Base operations +TestSuite.test_blas(T) # tests the blas interface +TestSuite.test_broadcasting(T) # tests the broadcasting implementation +TestSuite.test_construction(T) # tests all kinds of different ways of constructing the array +TestSuite.test_linalg(T) # linalg function tests +TestSuite.test_mapreduce(T) # mapreduce sum, etc +TestSuite.test_indexing(T) # indexing tests +TestSuite.test_random(T) # randomly constructed arrays +TestSuite.test_io(T) ``` diff --git a/docs/src/testsuite.md b/docs/src/testsuite.md deleted file mode 100644 index c953eff05..000000000 --- a/docs/src/testsuite.md +++ /dev/null @@ -1,37 +0,0 @@ -# Test suite - -GPUArrays provides an extensive test suite that covers all of the functionality that should -be available after implementing the required interfaces. This test suite is part of this -package, but for dependency reasons it is not available when importing the package. Instead, -you should include the code from your `runtests.jl` as follows: - -```julia -import GPUArrays -gpuarrays = pathof(GPUArrays) -gpuarrays_root = dirname(dirname(gpuarrays)) -include(joinpath(gpuarrays_root, "test", "testsuite.jl")) -``` - -With this set-up, you can run the test suite like this: - -```julia -TestSuite.test(MyGPUArrayType) -``` -If you don't want to run the whole suite, you can also run parts of it: - - -```julia -T = JLArray -GPUArrays.allowscalar(false) # fail tests when slow indexing path into Array type is used. - -TestSuite.test_gpuinterface(T) # interface functions like gpu_call, threadidx, etc -TestSuite.test_base(T) # basic functionality like launching a kernel on the GPU and Base operations -TestSuite.test_blas(T) # tests the blas interface -TestSuite.test_broadcasting(T) # tests the broadcasting implementation -TestSuite.test_construction(T) # tests all kinds of different ways of constructing the array -TestSuite.test_linalg(T) # linalg function tests -TestSuite.test_mapreduce(T) # mapreduce sum, etc -TestSuite.test_indexing(T) # indexing tests -TestSuite.test_random(T) # randomly constructed arrays -TestSuite.test_io(T) -``` diff --git a/lib/GPUArraysCore/src/GPUArraysCore.jl b/lib/GPUArraysCore/src/GPUArraysCore.jl index bcf24e601..3dc17ac09 100644 --- a/lib/GPUArraysCore/src/GPUArraysCore.jl +++ b/lib/GPUArraysCore/src/GPUArraysCore.jl @@ -18,21 +18,64 @@ for device-side objects. """ abstract type AbstractGPUArray{T, N} <: DenseArray{T, N} end +""" + AbstractGPUVector{T} + +Shortcut for `AbstractGPUArray{T, 1}`. +""" const AbstractGPUVector{T} = AbstractGPUArray{T, 1} + +""" + AbstractGPUMatrixT} + +Shortcut for `AbstractGPUArray{T, 2}`. +""" const AbstractGPUMatrix{T} = AbstractGPUArray{T, 2} + +""" + AbstractGPUVecOrMat{T} + +Shortcut for `Union{AbstractGPUArray{T, 1}, AbstractGPUArray{T, 2}}`. +""" const AbstractGPUVecOrMat{T} = Union{AbstractGPUArray{T, 1}, AbstractGPUArray{T, 2}} # convenience aliases for working with wrapped arrays + +""" + WrappedGPUArray{T, N} + +Convenience alias for working with wrapped arrays from [Adapt.jl](https://github.com/JuliaGPU/Adapt.jl). +""" const WrappedGPUArray{T,N} = WrappedArray{T,N,AbstractGPUArray,AbstractGPUArray{T,N}} + +""" + AnyGPUArray{T, N} + +Shortcut for `Union{AbstractGPUArray{T,N}, WrappedGPUArray{T,N}}`. +""" const AnyGPUArray{T,N} = Union{AbstractGPUArray{T,N}, WrappedGPUArray{T,N}} + +""" + AnyGPUVector{T} + +Shortcut for `AnyGPUArray{T, 1}`. +""" const AnyGPUVector{T} = AnyGPUArray{T, 1} + +""" + AnyGPUMatrix{T} + +Shortcut for `AnyGPUArray{T, 2}`. +""" const AnyGPUMatrix{T} = AnyGPUArray{T, 2} ## broadcasting """ -Abstract supertype for GPU array styles. The `N` parameter is the dimensionality. + AbstractGPUArrayStyle{N} <: Base.Broadcast.AbstractArrayStyle{N} + +Abstract supertype for GPU array broadcasting styles. The `N` parameter is the dimensionality. Downstream implementations should provide a concrete array style type that inherits from this supertype. diff --git a/lib/JLArrays/src/JLArrays.jl b/lib/JLArrays/src/JLArrays.jl index 097f01530..0d0d851ae 100644 --- a/lib/JLArrays/src/JLArrays.jl +++ b/lib/JLArrays/src/JLArrays.jl @@ -34,6 +34,15 @@ end const MAXTHREADS = 256 +""" + JLBackend <: KernelAbstractions.GPU + +Backend object associated with JLArrays for [KernelAbstractions.jl](https://github.com/JuliaGPU/KernelAbstractions.jl). + +# Fields + +- `static::Bool=false` +""" struct JLBackend <: KernelAbstractions.GPU static::Bool JLBackend(;static::Bool=false) = new(static) @@ -89,6 +98,13 @@ function check_eltype(T) end end +""" + JLArray{T, N} + +CPU-located array type that emulates the behavior of GPU arrays. + +Useful for testing GPU-oriented code when no actual GPU is available. +""" mutable struct JLArray{T, N} <: AbstractGPUArray{T, N} data::DataRef{Vector{UInt8}} @@ -123,6 +139,18 @@ mutable struct JLArray{T, N} <: AbstractGPUArray{T, N} end end +""" + JLSparseVector{Tv, Ti} + +Sparse vector backed by `JLVector`s, similar to `SparseArrays.SparseVector`. + +# Fields + +- `iPtr::JLVector{Ti, 1}`: indices of non-zero coefficients +- `nzVal::JLVector{Tv, 1}`: values of non-zero coefficients +- `len::Int`: size of the vector +- `nnz::Ti`: number of non-zero coefficients +""" mutable struct JLSparseVector{Tv, Ti} <: GPUArrays.AbstractGPUSparseVector{Tv, Ti} iPtr::JLArray{Ti, 1} nzVal::JLArray{Tv, 1} @@ -138,6 +166,19 @@ SparseArrays.nnz(x::JLSparseVector) = x.nnz SparseArrays.nonzeroinds(x::JLSparseVector) = x.iPtr SparseArrays.nonzeros(x::JLSparseVector) = x.nzVal +""" + JLSparseMatrixCSC{Tv, Ti} + +Sparse matrix in Compressed Sparse Column format, backed by `JLVector`s (similar to `SparseArrays.SparseMatrixCSC`). + +# Fields + +- `colPtr::JLArray{Ti, 1}`: column `j` maps to indices `colPtr[j]:(colPtr[j+1]-1)` in `rowVal` and `nzVal` +- `rowVal::JLArray{Ti, 1}`: row indices for non-zero coefficients +- `nzVal::JLArray{Tv, 1}`: values of non-zero coefficients +- `dims::NTuple{2,Int}`: size of the matrix +- `nnz::Ti`: number of non-zero coefficients +""" mutable struct JLSparseMatrixCSC{Tv, Ti} <: GPUArrays.AbstractGPUSparseMatrixCSC{Tv, Ti} colPtr::JLArray{Ti, 1} rowVal::JLArray{Ti, 1} @@ -166,6 +207,19 @@ function Base.getindex(A::JLSparseMatrixCSC{Tv, Ti}, i::Integer, j::Integer) whe ((r1 > r2) || (A.rowVal[r1] != i)) ? zero(Tv) : A.nzVal[r1] end +""" + JLSparseMatrixCSR{Tv, Ti} + +Sparse matrix in Compressed Sparse Row format, backed by `JLVector`s (similar to the transpose of a `SparseArrays.SparseMatrixCSC`). + +# Fields + +- `rowPtr::JLArray{Ti, 1}`: row `i` maps to indices `rowPtr[i]:(rowPtr[i+1]-1)` in `colVal` and `nzVal` +- `colVal::JLArray{Ti, 1}`: col indices for non-zero coefficients +- `nzVal::JLArray{Tv, 1}`: values of non-zero coefficients +- `dims::NTuple{2,Int}`: size of the matrix +- `nnz::Ti`: number of non-zero coefficients +""" mutable struct JLSparseMatrixCSR{Tv, Ti} <: GPUArrays.AbstractGPUSparseMatrixCSR{Tv, Ti} rowPtr::JLArray{Ti, 1} colVal::JLArray{Ti, 1} @@ -273,8 +327,25 @@ end ## convenience constructors +""" + JLVector{T} + +Shortcut for `JLArray{T,1}`. +""" const JLVector{T} = JLArray{T,1} + +""" + JLMatrix{T} + +Shortcut for `JLArray{T,2}`. +""" const JLMatrix{T} = JLArray{T,2} + +""" + JLVecOrMat{T} + +Shortcut for `Union{JLVector{T},JLMatrix{T}}`. +""" const JLVecOrMat{T} = Union{JLVector{T},JLMatrix{T}} # type and dimensionality specified @@ -309,18 +380,65 @@ export DenseJLArray, DenseJLVector, DenseJLMatrix, DenseJLVecOrMat, AnyJLArray, AnyJLVector, AnyJLMatrix, AnyJLVecOrMat # dense arrays: stored contiguously in memory +""" + DenseJLArray + +Supertype for `JLArray`s stored contiguously in memory. +""" DenseJLArray{T,N} = JLArray{T,N} + +""" + DenseJLVector{T} + +Shortcut for `DenseJLArray{T,1}`. +""" DenseJLVector{T} = DenseJLArray{T,1} + +""" + DenseJLMatrix{T} + +Shortcut for `DenseJLArray{T,2}`. +""" DenseJLMatrix{T} = DenseJLArray{T,2} + +""" + DenseJLVecOrMat{T} + +Shortcut for `Union{DenseJLVector{T}, DenseJLMatrix{T}}`. +""" DenseJLVecOrMat{T} = Union{DenseJLVector{T}, DenseJLMatrix{T}} # strided arrays StridedSubJLArray{T,N,I<:Tuple{Vararg{Union{Base.RangeIndex, Base.ReshapedUnitRange, Base.AbstractCartesianIndex}}}} = SubArray{T,N,<:JLArray,I} + +""" + StridedJLArray{T,N} + +Supertype for (views of) `JLArray`s in a strided fashion. +""" StridedJLArray{T,N} = Union{JLArray{T,N}, StridedSubJLArray{T,N}} + +""" + StridedJLVector{T} + +Shortcut for `StridedJLArray{T,1}`. +""" StridedJLVector{T} = StridedJLArray{T,1} + +""" + StridedJLMatrix{T} + +Shortcut for `StridedJLArray{T,2}`. +""" StridedJLMatrix{T} = StridedJLArray{T,2} + +""" + StridedJLVecOrMat{T} + +Shortcut for `Union{StridedJLVector{T}, StridedJLMatrix{T}}`. +""" StridedJLVecOrMat{T} = Union{StridedJLVector{T}, StridedJLMatrix{T}} Base.pointer(x::StridedJLArray{T}) where {T} = Base.unsafe_convert(Ptr{T}, x) @@ -328,10 +446,32 @@ Base.pointer(x::StridedJLArray{T}) where {T} = Base.unsafe_convert(Ptr{T}, x) Base.unsafe_convert(Ptr{T}, x) + Base._memory_offset(x, i) end -# anything that's (secretly) backed by a JLArray +""" + AnyJLArray{T,N} + +Supertype for anything that is (secretly) backed by a `JLArray`. +""" AnyJLArray{T,N} = Union{JLArray{T,N}, WrappedArray{T,N,JLArray,JLArray{T,N}}} + +""" + AnyJLVector{T} + +Shortcut for `AnyJLArray{T,1}`. +""" AnyJLVector{T} = AnyJLArray{T,1} + +""" + AnyJLMatrix{T} + +Shortcut for `AnyJLArray{T,2}`. +""" AnyJLMatrix{T} = AnyJLArray{T,2} + +""" + AnyJLVecOrMat{T} + +Shortcut for `Union{AnyJLVector{T}, AnyJLMatrix{T}}`. +""" AnyJLVecOrMat{T} = Union{AnyJLVector{T}, AnyJLMatrix{T}} @@ -436,6 +576,12 @@ end JLArray{T,N}(xs::JLArray{T,N}) where {T,N} = xs # adapt for the GPU + +""" + jl(x) + +Adapt an object `x` to the `JLArray` backend. +""" jl(xs) = adapt(JLArray, xs) ## don't convert isbits types since they are already considered GPU-compatible Adapt.adapt_storage(::Type{JLArray}, xs::AbstractArray) = diff --git a/src/device/sparse.jl b/src/device/sparse.jl index b8346eafe..db7fc3d4f 100644 --- a/src/device/sparse.jl +++ b/src/device/sparse.jl @@ -12,9 +12,25 @@ using SparseArrays export GPUSparseDeviceVector, GPUSparseDeviceMatrixCSC, GPUSparseDeviceMatrixCSR, GPUSparseDeviceMatrixBSR, GPUSparseDeviceMatrixCOO +""" + AbstractGPUSparseDeviceMatrix{Tv, Ti} + +Supertype for GPU sparse matrices with value type `Tv` and index type `Ti`. +""" abstract type AbstractGPUSparseDeviceMatrix{Tv, Ti} <: AbstractSparseMatrix{Tv, Ti} end +""" + GPUSparseDeviceVector{Tv,Ti,Vi,Vv} + +Sparse vector with generic backing, similar to `SparseArrays.SparseVector`. +# Fields + +- `iPtr::Vi`: indices of non-zero coefficients +- `nzVal::Vv`: values of non-zero coefficients +- `len::Int`: size of the vector +- `nnz::Ti`: number of non-zero coefficients +""" struct GPUSparseDeviceVector{Tv,Ti,Vi,Vv, A} <: AbstractSparseVector{Tv,Ti} iPtr::Vi nzVal::Vv @@ -28,6 +44,19 @@ SparseArrays.nnz(g::GPUSparseDeviceVector) = g.nnz SparseArrays.nonzeroinds(g::GPUSparseDeviceVector) = g.iPtr SparseArrays.nonzeros(g::GPUSparseDeviceVector) = g.nzVal +""" + GPUSparseDeviceMatrixCSC{Tv,Ti,Vi,Vv} + +Sparse matrix in Compressed Sparse Column format with generic backing. + +# Fields + +- `colPtr::Vi`: column `j` maps to indices `colPtr[j]:(colPtr[j+1]-1)` in `rowVal` and `nzVal` +- `rowVal::Vi`: row indices for non-zero coefficients +- `nzVal::Vv`: values of non-zero coefficients +- `dims::NTuple{2,Int}`: size of the matrix +- `nnz::Ti`: number of non-zero coefficients +""" struct GPUSparseDeviceMatrixCSC{Tv,Ti,Vi,Vv,A} <: AbstractGPUSparseDeviceMatrix{Tv, Ti} colPtr::Vi rowVal::Vi @@ -66,6 +95,19 @@ function SparseArrays.nnz(x::GPUSparseDeviceColumnView) return length(SparseArrays.nzrange(A, colidx)) end +""" + GPUSparseDeviceMatrixCSR{Tv,Ti,Vi,Vv} + +Sparse matrix in Compressed Sparse Row format with generic backing. + +# Fields + +- `rowPtr::Vi`: row `i` maps to indices `rowPtr[i]:(rowPtr[i+1]-1)` in `colVal` and `nzVal` +- `colVal::Vi`: column indices for non-zero coefficients +- `nzVal::Vv`: values of non-zero coefficients +- `dims::NTuple{2,Int}`: size of the matrix +- `nnz::Ti`: number of non-zero coefficients +""" struct GPUSparseDeviceMatrixCSR{Tv,Ti,Vi,Vv,A} <: AbstractGPUSparseDeviceMatrix{Tv,Ti} rowPtr::Vi colVal::Vi @@ -84,16 +126,44 @@ end end end +""" + GPUSparseDeviceMatrixBSR{Tv,Ti,Vi,Vv} + +Sparse matrix in Block Compressed Sparse Row format with generic backing. + +# Fields + +- `rowPtr::Vi`: row `i` maps to indices `rowPtr[i]:(rowPtr[i+1]-1)` in `colVal` and `nzVal` +- `colVal::Vi`: column indices for the top-left corners of the blocks +- `nzVal::Vv`: values of non-zero coefficients +- `dims::NTuple{2,Int}`: size of the matrix +- `blockDim::Ti`: number of rows = number of columns in a block +- `dir::Char` +- `nnz::Ti`: number of non-zero coefficients +""" struct GPUSparseDeviceMatrixBSR{Tv,Ti,Vi,Vv,A} <: AbstractGPUSparseDeviceMatrix{Tv,Ti} rowPtr::Vi colVal::Vi nzVal::Vv dims::NTuple{2,Int} - blockDim::Ti - dir::Char + blockDim::Ti # TODO: rectangular blocks? + dir::Char # TODO: document nnz::Ti end +""" + GPUSparseDeviceMatrixCOO{Tv,Ti,Vi,Vv} + +Sparse matrix in COOrdinate format with generic backing. + +# Fields + +- `rowInd::Vi`: row indices for non-zero coefficients +- `colInd::Vi`: column indices for non-zero coefficients +- `nzVal::Vv`: values of non-zero coefficients +- `dims::NTuple{2,Int}`: size of the matrix +- `nnz::Ti`: number of non-zero coefficients +""" struct GPUSparseDeviceMatrixCOO{Tv,Ti,Vi,Vv, A} <: AbstractGPUSparseDeviceMatrix{Tv,Ti} rowInd::Vi colInd::Vi diff --git a/src/host/abstractarray.jl b/src/host/abstractarray.jl index 0080935e9..bab7ee54c 100644 --- a/src/host/abstractarray.jl +++ b/src/host/abstractarray.jl @@ -54,6 +54,27 @@ end # per-object state, with a flag to indicate whether the object has been freed. # this is to support multiple calls to `unsafe_free!` on the same object, # while only lowering the reference count of the underlying data once. + +""" + DataRef + +A helper class to manage the storage of an array. + +There's multiple reasons we don't just put the data directly in a `GPUArray` struct: +- to share data between multiple arrays, e.g., to create views; +- to be able to early-free data and release GC pressure. + +To support this, wrap the data in a `DataRef` instead, and use it with the following methods: +- `ref[]`: get the data; +- `copy(ref)`: create a new reference, increasing the reference count; +- `unsafe_free!(ref)`: decrease the reference count, and free the data if it reaches 0. + +The contained `RefCounted` struct should not be used directly. + +The flag `freed` is here to indicate whether the object has been freed. +This is to support multiple calls to `unsafe_free!` on the same object, +while only lowering the reference count of the underlying data once. +""" mutable struct DataRef{D} rc::RefCounted{D} freed::Bool diff --git a/src/host/construction.jl b/src/host/construction.jl index 1a4d8f7b5..829626808 100644 --- a/src/host/construction.jl +++ b/src/host/construction.jl @@ -83,7 +83,11 @@ function hasfieldcount(@nospecialize(dt)) return true end -# for finding specific element types, e.g., when Float64 is unsupported +""" + contains_eltype(T, typ) + +For finding specific element type `T` inside `typ`, e.g., when `Float64` is unsupported. +""" function contains_eltype(T, typ) if T === typ return true @@ -99,15 +103,19 @@ function contains_eltype(T, typ) return false end -# Types that are allocated inline include: -# 1. plain bitstypes (`Int`, `(Float16, Float32)`, plain immutable structs, etc). -# these are simply stored contiguously in the buffer. -# 2. structs of unions (`struct Foo; x::Union{Int, Float32}; end`) -# these are stored with a selector at the end (handled by Julia). -# 3. bitstype unions (`Union{Int, Float32}`, etc) -# these are stored contiguously and require a selector array (handled by us) -# -# This function explains why a type is not allocated inline. +""" + explain_allocatedinline(@nospecialize(T)[, depth; maxdepth]) + +This function explains why a type is not allocated inline. + +Types that are allocated inline include: +1. plain bitstypes (`Int`, `(Float16, Float32)`, plain immutable structs, etc). + these are simply stored contiguously in the buffer. +2. structs of unions (`struct Foo; x::Union{Int, Float32}; end`) + these are stored with a selector at the end (handled by Julia). +3. bitstype unions (`Union{Int, Float32}`, etc) + these are stored contiguously and require a selector array (handled by us) +""" function explain_allocatedinline(@nospecialize(T), depth=0; maxdepth=10) depth > maxdepth && return ""