Skip to content

Commit 9256383

Browse files
authored
Add randn2! to fill vectors with normally distributed numbers (#57)
1 parent 3642b2a commit 9256383

File tree

4 files changed

+30
-4
lines changed

4 files changed

+30
-4
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "IPUToolkit"
22
uuid = "92e0b95a-4011-435a-96f4-10064551ddbe"
33
authors = ["Emily Dietrich <jakibaki@live.com>", "Luk Burchard <luk.burchard@gmail.com>", "Mosè Giordano <mose@gnu.org>"]
4-
version = "1.4.2"
4+
version = "1.5.0"
55

66
[deps]
77
Clang = "40e3b903-d033-50b4-a0cc-940c62c95e31"

docs/src/compiler.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,17 @@ Inside codelets defined with [`@codelet`](@ref) all calls to random functions
3939
result to call to corresponding IPU builtins for [random number generation](https://docs.graphcore.ai/projects/poplar-api/en/latest/ipu_intrinsics/ipu_builtins.html#random-number-generation).
4040
The uniformly distributed numbers follow the general semantic of the Julia function `rand` (floating point numbers are uniformely distributed in the $[0, 1)$ range), while the normally distributed numbers have the properties described in the Poplar SDK documentation (numbers are in the range $[-93/16, 93/16]$).
4141

42+
!!! note
43+
44+
The IPU builtins for random numbers return pairs of numbers, but the Julia functions `randn(Float16)` and `randn(Float32)` return only a single number, discarding the second number of the pair.
45+
If you have a vector of even length that you want to fill in-place with normally distributed numbers, you can use the [`randn2!`](@ref) function to do that efficiently, without discarding any number.
46+
4247
Additionally, you can use the [IPU builtins](https://docs.graphcore.ai/projects/poplar-api/en/latest/ipu_intrinsics/ipu_builtins.html) listed below.
4348

4449
```@docs
4550
get_scount_l
4651
get_tile_id
52+
randn2!
4753
```
4854

4955
## Printing

src/compiler/compiler.jl

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# based on GPUCompiler example https://github.com/JuliaGPU/GPUCompiler.jl/blob/master/examples/kernel.jl
22
module IPUCompiler
33

4-
export @codelet, @ipuprogram, VertexVector, VertexScalar, In, Out, InOut, get_scount_l, get_tile_id, add_vertex
4+
export @codelet, @ipuprogram, VertexVector, VertexScalar, In, Out, InOut, get_scount_l, get_tile_id, randn2!, add_vertex
55

66
include("output.jl")
77

@@ -80,8 +80,17 @@ Call the [`__builtin_ipu_get_tile_id()`](https://docs.graphcore.ai/projects/popl
8080
"""
8181
function get_tile_id end
8282

83-
include("runtime.jl")
83+
"""
84+
randn2!(v::VertexVector) -> v
85+
86+
Fill the vector `v` with normally-distributed (mean 0, standard deviation 1) random numbers.
87+
The vector *must* have even length.
88+
This function takes advantage of [IPU builtins for random number generation](https://docs.graphcore.ai/projects/poplar-api/en/latest/ipu_intrinsics/ipu_builtins.html#random-number-generation), which return pairs of numbers at a time.
89+
"""
90+
function randn2! end
91+
8492
include("vertices.jl")
93+
include("runtime.jl")
8594

8695
GPUCompiler.runtime_module(::CompilerJob{<:Any,IPUCompilerParams}) = IPURuntime
8796
# `GPUCompiler.isintrinsic` specifies functions which are to be considered intrinsics for

src/compiler/runtime.jl

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
module IPURuntime
22

3-
import ..IPUCompiler: @device_override, @ipuprintf, @ipuprintln, get_scount_l, get_tile_id
3+
import ..IPUCompiler: @device_override, @ipuprintf, @ipuprintln, get_scount_l, get_tile_id, randn2!, VertexVector, Out, InOut
44
using GPUCompiler: reset_runtime
55
import LinearAlgebra
66

@@ -54,6 +54,17 @@ get_tile_id() = ccall("extern _llvm_colossus_get_tile_id", llvmcall, Cuint, ())
5454
# same semantic.
5555
@device_override Base.randn(T::Type{Float16}) = @inbounds ccall("extern _llvm_colossus_f16v2grand", llvmcall, NTuple{2, VecElement{Float16}}, ())[1].value
5656
@device_override Base.randn(T::Type{Float32}) = @inbounds ccall("extern _llvm_colossus_f32v2grand", llvmcall, NTuple{2, VecElement{Float32}}, ())[1].value
57+
function randn2!(v::VertexVector{T}) where {T}
58+
for idx in UInt32(1):UInt32(2):UInt32(length(v))
59+
rnd = if T == Float32
60+
ccall("extern _llvm_colossus_f32v2grand", llvmcall, NTuple{2, VecElement{Float32}}, ())
61+
elseif T == Float16
62+
ccall("extern _llvm_colossus_f16v2grand", llvmcall, NTuple{2, VecElement{Float16}}, ())
63+
end
64+
@inbounds v[idx] = rnd[1].value
65+
@inbounds v[idx+1] = rnd[2].value
66+
end
67+
end
5768

5869
## Math functions.
5970
# There are different reasons why we prefer LLVM intrinsics on the IPU: implementations in

0 commit comments

Comments
 (0)