From 1e6e9d30ea1498dbcecf211b1cbcaebc40205e18 Mon Sep 17 00:00:00 2001 From: StijnWoestenborghs Date: Tue, 14 May 2024 14:34:53 +0200 Subject: [PATCH 01/16] itertools.product & unvectorized fw index --- basalt/autograd/ops/mlops.mojo | 78 +++++++++++++++++++++++++++++++++- basalt/autograd/ops/ops.mojo | 9 +++- basalt/utils/itertools.mojo | 47 ++++++++++++++++++++ tests/mojo/test_mlops.mojo | 76 +++++++++++++++++++++++---------- 4 files changed, 185 insertions(+), 25 deletions(-) create mode 100644 basalt/utils/itertools.mojo diff --git a/basalt/autograd/ops/mlops.mojo b/basalt/autograd/ops/mlops.mojo index 0869919..0f9bb1f 100644 --- a/basalt/autograd/ops/mlops.mojo +++ b/basalt/autograd/ops/mlops.mojo @@ -4,6 +4,7 @@ from math.limit import min_finite, max_finite from basalt import Tensor, TensorShape from basalt.utils.tensorutils import elwise_transform +from basalt.utils.itertools import product from basalt.autograd.attributes import Attribute, AttributeVector @@ -491,4 +492,79 @@ struct SLICE: Self.slice_kernel[ug_shape, t1_shape, steps, starts, ends, True](res_grad, ug) - return res_grad ^ \ No newline at end of file + return res_grad ^ + + +struct INDEX: + @staticmethod + fn adjust_boundary(slice: Int, dim_size: Int) -> Int: + # Adjust negative indices & ensure they are within bounds. + var s = slice if slice >= 0 else dim_size + slice + return max(min(s, dim_size), 0) + + @staticmethod + fn to_indeces(shape: TensorShape, attrs: AttributeVector) -> List[List[Int]]: + var SLICE_LITERALS = List[StringLiteral]("dim_0s", "dim_1s", "dim_2s", "dim_3s", "dim_4s", "dim_5s", "dim_6s", "dim_7s") + var INDEX_LITERALS = List[StringLiteral]("dim_0i", "dim_1i", "dim_2i", "dim_3i", "dim_4i", "dim_5i", "dim_6i", "dim_7i") + + var indeces = List[List[Int]]() + for dim in range(shape.rank()): + var temp = List[Int]() + + # Option 1: Slice + if attrs[SLICE_LITERALS[dim]]: + var slice = attrs[SLICE_LITERALS[dim]].value().to_shape() + var step = slice[2] if slice.rank() == 3 else 1 + for i in range( + start=Self.adjust_boundary(slice[0], shape[dim]), + end=Self.adjust_boundary(slice[1], shape[dim]), + step=step + ): + temp.append(i) + + # Option 2: Indeces + elif attrs[INDEX_LITERALS[dim]]: + var indeces = attrs[INDEX_LITERALS[dim]].value().to_shape() + for i in range(indeces.rank()): + temp.append(indeces[i]) + + # All indeces + else: + for i in range(shape[dim]): + temp.append(i) + + indeces.append(temp) + + return indeces ^ + + @staticmethod + fn result_shape(shape: TensorShape, attrs: AttributeVector) -> TensorShape: + var indeces = Self.to_indeces(shape, attrs) + var new_shape = List[Int]() + for i in range(shape.rank()): + new_shape.append(len(indeces[i])) + return TensorShape(new_shape) + + @staticmethod + fn forward[ + t1_shape: TensorShape, + attributes: AttributeVector, + ](inout res: Tensor[dtype], t1: Tensor[dtype]): + alias indeces = Self.to_indeces(t1_shape, attributes) + alias strides = t1_shape.strides() + + var j = 0 + for comb in product(indeces): + var flat_index = 0 + for dim in range(t1_shape.rank()): + flat_index += comb[dim] * strides[dim] + res[j] = t1[flat_index] + j += 1 + + @staticmethod + fn backward[ + ug_shape: TensorShape, + t1_shape: TensorShape, + attributes: AttributeVector = AttributeVector(), + ](ug: Tensor[dtype], t1: Tensor[dtype]) -> Tensor[dtype]: + return Tensor[dtype]() \ No newline at end of file diff --git a/basalt/autograd/ops/ops.mojo b/basalt/autograd/ops/ops.mojo index 7198270..c737821 100644 --- a/basalt/autograd/ops/ops.mojo +++ b/basalt/autograd/ops/ops.mojo @@ -15,7 +15,7 @@ from .basics import ( TRANSPOSE, FMA, ) -from .mlops import SIGMOID, RELU, TANH, CLIP, SQUEEZE, UNSQUEEZE, SLICE +from .mlops import SIGMOID, RELU, TANH, CLIP, SQUEEZE, UNSQUEEZE, SLICE, INDEX from .dynamics import CONCAT, SPLIT from .conv import CONV2D from .pool import MAXPOOL2D @@ -61,6 +61,7 @@ struct OP(Stringable): alias CONCAT = OP(23, "CONCAT", dynamic=True) alias SPLIT = OP(24, "SPLIT", dynamic=True) alias SLICE = OP(25, "SLICE") + alias INDEX = OP(26, "INDEX") var id: UInt8 var name: Bytes[16] @@ -135,6 +136,8 @@ fn static_result_shape( return UNSQUEEZE.result_shape(t1_shape, attributes) elif op == OP.SLICE: return SLICE.result_shape(t1_shape, attributes) + elif op == OP.INDEX: + return INDEX.result_shape(t1_shape, attributes) else: print("[ERROR] Operator not found.") return TensorShape(-1) @@ -249,6 +252,8 @@ fn forward_op[ UNSQUEEZE.forward[t1_shape, attributes](res, t1) elif op == OP.SLICE: SLICE.forward[t1_shape, attributes](res, t1) + elif op == OP.INDEX: + INDEX.forward[t1_shape, attributes](res, t1) else: print("[ERROR] Operator not found.") @@ -361,6 +366,8 @@ fn backward_op[ res_grad = UNSQUEEZE.backward[ug_shape, t1_shape](ug, t1) elif op == OP.SLICE: res_grad = SLICE.backward[ug_shape, t1_shape, attributes](ug, t1) + elif op == OP.INDEX: + res_grad = INDEX.backward[ug_shape, t1_shape, attributes](ug, t1) else: print("[ERROR] Operator not found.") res_grad = Tensor[dtype](-1) diff --git a/basalt/utils/itertools.mojo b/basalt/utils/itertools.mojo new file mode 100644 index 0000000..aceda31 --- /dev/null +++ b/basalt/utils/itertools.mojo @@ -0,0 +1,47 @@ + +@value +struct _ProductIterator(Sized): + var lists: List[List[Int]] + var indeces: List[Int] + var _iters: Int + + @always_inline("nodebug") + fn __init__(inout self, lists: List[List[Int]]): + self.lists = lists + self.indeces = List[Int]() + for i in range(len(lists)): + self.indeces.append(0) + + self._iters = 1 + for lst in self.lists: + self._iters *= len(lst[]) + + @always_inline("nodebug") + fn __len__(self) -> Int: + return self._iters + + @always_inline("nodebug") + fn __iter__(self) -> Self: + return self + + @always_inline("nodebug") + fn __next__(inout self) -> List[Int]: + var res = List[Int]() + for i in range(len(self.lists)): + res.append(self.lists[i][self.indeces[i]]) + self._increment_indeces() + self._iters -= 1 + return res ^ + + @always_inline("nodebug") + fn _increment_indeces(inout self): + for i in reversed(range(len(self.indeces))): + self.indeces[i] += 1 + if self.indeces[i] < len(self.lists[i]): + break + self.indeces[i] = 0 + + +@always_inline("nodebug") +fn product(lists: List[List[Int]]) -> _ProductIterator: + return _ProductIterator(lists) \ No newline at end of file diff --git a/tests/mojo/test_mlops.mojo b/tests/mojo/test_mlops.mojo index 2ba723e..4d87bb1 100644 --- a/tests/mojo/test_mlops.mojo +++ b/tests/mojo/test_mlops.mojo @@ -620,33 +620,63 @@ fn test_backward_SLICE_multiple_axes() raises: ](t1, ug, expected_ug) +from basalt.autograd.ops.mlops import INDEX + +fn test_INDEX() raises: + alias t1_shape = TensorShape(2, 3, 5) + var t = Tensor[dtype](t1_shape) + for i in range(t.num_elements()): + t[i] = i + + # t[:, [0, 0], 0:5:2] + # TODO: need for a list attribute as this only supports to specify indeces of MAX_RANK + alias attr_1 = Attribute("dim_1i", TensorShape(0, 0)) + alias attr_2 = Attribute("dim_2s", TensorShape(0, 5, 2)) + + var expected = Tensor[dtype](2, 2, 3) + for i in range(2): + for j in range(2): + for k in range(3): + expected[i*2*3 + j*3 + k] = i * 3 * 5 + k * 2 + + test_unary_op[ + OP.INDEX, t1_shape, AttributeVector( + attr_1, + attr_2, + ) + ](t, expected) + + print(expected) + + fn main(): try: - test_SIGMOID() - test_RELU() - test_TANH() - test_CLIP() - test_SQUEEZE() - test_UNSQUEEZE() - test_SLICE() - test_SLICE_step() - test_SLICE_neg() - test_SLICE_multiple_axes() + # test_SIGMOID() + # test_RELU() + # test_TANH() + # test_CLIP() + # test_SQUEEZE() + # test_UNSQUEEZE() + # test_SLICE() + # test_SLICE_step() + # test_SLICE_neg() + # test_SLICE_multiple_axes() + test_INDEX() except e: print("[ERROR] Error in forward mlops") print(e) return - try: - test_backward_SIGMOID() - test_backward_RELU() - test_backward_TANH() - test_backward_CLIP() - test_backward_SQUEEZE() - test_backward_UNSQUEEZE() - test_backward_SLICE() - test_backward_SLICE_multiple_axes() - except e: - print("[ERROR] Error in backward mlops") - print(e) - return + # try: + # test_backward_SIGMOID() + # test_backward_RELU() + # test_backward_TANH() + # test_backward_CLIP() + # test_backward_SQUEEZE() + # test_backward_UNSQUEEZE() + # test_backward_SLICE() + # test_backward_SLICE_multiple_axes() + # except e: + # print("[ERROR] Error in backward mlops") + # print(e) + # return From 60e510844596ede4fe51a22ed1c54fe27fc50292 Mon Sep 17 00:00:00 2001 From: StijnWoestenborghs Date: Tue, 14 May 2024 14:56:27 +0200 Subject: [PATCH 02/16] unoptimized index bw --- basalt/autograd/ops/mlops.mojo | 15 ++++++++- tests/mojo/test_mlops.mojo | 57 ++++++++++++++++++++++++++-------- 2 files changed, 58 insertions(+), 14 deletions(-) diff --git a/basalt/autograd/ops/mlops.mojo b/basalt/autograd/ops/mlops.mojo index 0f9bb1f..5aa2d8b 100644 --- a/basalt/autograd/ops/mlops.mojo +++ b/basalt/autograd/ops/mlops.mojo @@ -567,4 +567,17 @@ struct INDEX: t1_shape: TensorShape, attributes: AttributeVector = AttributeVector(), ](ug: Tensor[dtype], t1: Tensor[dtype]) -> Tensor[dtype]: - return Tensor[dtype]() \ No newline at end of file + alias indeces = Self.to_indeces(t1_shape, attributes) + alias strides = t1_shape.strides() + + var res_grad = Tensor[dtype](t1_shape) + + var j = 0 + for comb in product(indeces): + var flat_index = 0 + for dim in range(t1_shape.rank()): + flat_index += comb[dim] * strides[dim] + res_grad[flat_index] += ug[j] + j += 1 + + return res_grad^ \ No newline at end of file diff --git a/tests/mojo/test_mlops.mojo b/tests/mojo/test_mlops.mojo index 4d87bb1..964e134 100644 --- a/tests/mojo/test_mlops.mojo +++ b/tests/mojo/test_mlops.mojo @@ -649,6 +649,36 @@ fn test_INDEX() raises: print(expected) +fn test_INDEX_backward() raises: + alias t1_shape = TensorShape(2, 3, 5) + var t = Tensor[dtype](t1_shape) + for i in range(t.num_elements()): + t[i] = i + + alias attr_1 = Attribute("dim_1i", TensorShape(0, 0)) + alias attr_2 = Attribute("dim_2s", TensorShape(0, 5, 2)) + + alias ug_shape = TensorShape(2, 2, 3) + var ug = Tensor[dtype](ug_shape) + fill(ug, 1.0) + + var expected = Tensor[dtype](t1_shape) + for i in range(2): + for j in range(2): + for k in range(3): + # NOTE: `+=` because selected indeces [0, 0] can repeat + expected[i * 3 * 5 + k * 2] += 1.0 + + test_unary_op_backward[ + OP.INDEX, t1_shape, ug_shape, AttributeVector( + attr_1, + attr_2, + ) + ](t, ug, expected) + + print(expected) + + fn main(): try: # test_SIGMOID() @@ -667,16 +697,17 @@ fn main(): print(e) return - # try: - # test_backward_SIGMOID() - # test_backward_RELU() - # test_backward_TANH() - # test_backward_CLIP() - # test_backward_SQUEEZE() - # test_backward_UNSQUEEZE() - # test_backward_SLICE() - # test_backward_SLICE_multiple_axes() - # except e: - # print("[ERROR] Error in backward mlops") - # print(e) - # return + try: + # test_backward_SIGMOID() + # test_backward_RELU() + # test_backward_TANH() + # test_backward_CLIP() + # test_backward_SQUEEZE() + # test_backward_UNSQUEEZE() + # test_backward_SLICE() + # test_backward_SLICE_multiple_axes() + test_INDEX_backward() + except e: + print("[ERROR] Error in backward mlops") + print(e) + return From 8d90c09d852356ac5f30c8328d5549d785e9c827 Mon Sep 17 00:00:00 2001 From: StijnWoestenborghs Date: Tue, 14 May 2024 18:10:38 +0200 Subject: [PATCH 03/16] getindex to product & vectorized fw --- basalt/autograd/ops/mlops.mojo | 55 +++++++++++++++++++++++++++++----- basalt/utils/itertools.mojo | 34 +++++++++++---------- 2 files changed, 65 insertions(+), 24 deletions(-) diff --git a/basalt/autograd/ops/mlops.mojo b/basalt/autograd/ops/mlops.mojo index 5aa2d8b..fd871fd 100644 --- a/basalt/autograd/ops/mlops.mojo +++ b/basalt/autograd/ops/mlops.mojo @@ -545,6 +545,26 @@ struct INDEX: new_shape.append(len(indeces[i])) return TensorShape(new_shape) + @staticmethod + fn map_indeces[ + nelts: Int, + strides: TensorShape, + indeces: List[List[Int]], + ](idx: Int) -> SIMD[DType.int64, nelts]: + alias indeces_product = product(indeces) + + var temp = SIMD[DType.int64, nelts]() + for i in range(idx, idx + nelts): + var comb = indeces_product[i] + var flat_index = 0 + + for dim in range(len(comb)): + flat_index += comb[dim] * strides[dim] + + temp[i % nelts] = flat_index + + return temp + @staticmethod fn forward[ t1_shape: TensorShape, @@ -552,14 +572,17 @@ struct INDEX: ](inout res: Tensor[dtype], t1: Tensor[dtype]): alias indeces = Self.to_indeces(t1_shape, attributes) alias strides = t1_shape.strides() + alias total_length = len(product(indeces)) + + @parameter + fn vec_index[nelts: Int](i: Int): + + res.store[nelts](i, + t1.data().gather(Self.map_indeces[nelts, strides, indeces](i)) + ) + + vectorize[vec_index, nelts](total_length) - var j = 0 - for comb in product(indeces): - var flat_index = 0 - for dim in range(t1_shape.rank()): - flat_index += comb[dim] * strides[dim] - res[j] = t1[flat_index] - j += 1 @staticmethod fn backward[ @@ -569,9 +592,25 @@ struct INDEX: ](ug: Tensor[dtype], t1: Tensor[dtype]) -> Tensor[dtype]: alias indeces = Self.to_indeces(t1_shape, attributes) alias strides = t1_shape.strides() + # alias total_length = len(product(indeces)) var res_grad = Tensor[dtype](t1_shape) + # @parameter + # fn vec_index[nelts: Int](i: Int): + + # var offset = Self.map_indeces[nelts, strides, indeces](i) + # res_grad.data().scatter( + # offset, + # res_grad.data().gather(offset) + ug.load[nelts](i), + # ) + + # vectorize[vec_index, nelts](total_length) + + # BUG: Edge case in vectorization: + # When the offset = [0, 2, 4, 0] and ug = [1, 1, 1, 1] + # It doesn't scatter to index 0 twice as it should be: res_grad[0] += 1 + 1 + var j = 0 for comb in product(indeces): var flat_index = 0 @@ -579,5 +618,5 @@ struct INDEX: flat_index += comb[dim] * strides[dim] res_grad[flat_index] += ug[j] j += 1 - + return res_grad^ \ No newline at end of file diff --git a/basalt/utils/itertools.mojo b/basalt/utils/itertools.mojo index aceda31..fd7a6ce 100644 --- a/basalt/utils/itertools.mojo +++ b/basalt/utils/itertools.mojo @@ -2,16 +2,14 @@ @value struct _ProductIterator(Sized): var lists: List[List[Int]] - var indeces: List[Int] + var _current: Int var _iters: Int @always_inline("nodebug") fn __init__(inout self, lists: List[List[Int]]): self.lists = lists - self.indeces = List[Int]() - for i in range(len(lists)): - self.indeces.append(0) - + self._current = 0 + self._iters = 1 for lst in self.lists: self._iters *= len(lst[]) @@ -26,20 +24,24 @@ struct _ProductIterator(Sized): @always_inline("nodebug") fn __next__(inout self) -> List[Int]: - var res = List[Int]() - for i in range(len(self.lists)): - res.append(self.lists[i][self.indeces[i]]) - self._increment_indeces() + self._current += 1 self._iters -= 1 - return res ^ + return self._get_combination(self._current - 1) + + @always_inline("nodebug") + fn _get_combination(self, current: Int) -> List[Int]: + var combination = List[Int]() + var count = current + for i in reversed(range(len(self.lists))): + var index = count % len(self.lists[i]) + combination.append(self.lists[i][index]) + count //= len(self.lists[i]) + combination._reverse() + return combination ^ @always_inline("nodebug") - fn _increment_indeces(inout self): - for i in reversed(range(len(self.indeces))): - self.indeces[i] += 1 - if self.indeces[i] < len(self.lists[i]): - break - self.indeces[i] = 0 + fn __getitem__(self, index: Int) -> List[Int]: + return self._get_combination(index) @always_inline("nodebug") From 113b5aeabbf5479638addbf3b9afe24c189c73aa Mon Sep 17 00:00:00 2001 From: StijnWoestenborghs Date: Tue, 14 May 2024 19:19:02 +0200 Subject: [PATCH 04/16] something inbetween --- basalt/autograd/ops/mlops.mojo | 39 ++++++++++++++++------------------ 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/basalt/autograd/ops/mlops.mojo b/basalt/autograd/ops/mlops.mojo index fd871fd..6e38aaa 100644 --- a/basalt/autograd/ops/mlops.mojo +++ b/basalt/autograd/ops/mlops.mojo @@ -592,31 +592,28 @@ struct INDEX: ](ug: Tensor[dtype], t1: Tensor[dtype]) -> Tensor[dtype]: alias indeces = Self.to_indeces(t1_shape, attributes) alias strides = t1_shape.strides() - # alias total_length = len(product(indeces)) + alias total_length = len(product(indeces)) var res_grad = Tensor[dtype](t1_shape) - # @parameter - # fn vec_index[nelts: Int](i: Int): + @parameter + fn vec_index[nelts: Int](i: Int): - # var offset = Self.map_indeces[nelts, strides, indeces](i) - # res_grad.data().scatter( - # offset, - # res_grad.data().gather(offset) + ug.load[nelts](i), - # ) - - # vectorize[vec_index, nelts](total_length) - - # BUG: Edge case in vectorization: - # When the offset = [0, 2, 4, 0] and ug = [1, 1, 1, 1] - # It doesn't scatter to index 0 twice as it should be: res_grad[0] += 1 + 1 + var offset = Self.map_indeces[nelts, strides, indeces](i) + + # res_grad.data().scatter( + # offset, + # res_grad.data().gather(offset) + ug.load[nelts](i), + # ) + # BUG: Edge case in vectorization: + # When the offset = [0, 2, 4, 0] and ug = [1, 1, 1, 1] + # It doesn't scatter to index 0 twice as it should be: res_grad[0] += 1 + 1 + + # Workaround + var u = ug.load[nelts](i) + for j in range(nelts): + res_grad[int(offset[j])] += u[j] - var j = 0 - for comb in product(indeces): - var flat_index = 0 - for dim in range(t1_shape.rank()): - flat_index += comb[dim] * strides[dim] - res_grad[flat_index] += ug[j] - j += 1 + vectorize[vec_index, nelts](total_length) return res_grad^ \ No newline at end of file From 0c56fa4673263929ae86fee75d7a99e29d9301d6 Mon Sep 17 00:00:00 2001 From: StijnWoestenborghs Date: Thu, 16 May 2024 00:57:56 +0200 Subject: [PATCH 05/16] upsampling nearest --- basalt/autograd/attributes.mojo | 5 + basalt/nn/__init__.mojo | 1 + basalt/nn/layers/upsample.mojo | 117 +++++++++++++++++++++++ tests/python/test_upsample.mojo | 159 ++++++++++++++++++++++++++++++++ 4 files changed, 282 insertions(+) create mode 100644 basalt/nn/layers/upsample.mojo create mode 100644 tests/python/test_upsample.mojo diff --git a/basalt/autograd/attributes.mojo b/basalt/autograd/attributes.mojo index 9be1822..2e87300 100644 --- a/basalt/autograd/attributes.mojo +++ b/basalt/autograd/attributes.mojo @@ -67,6 +67,11 @@ struct AttributeVector(Sized, Stringable, CollectionElement): return self.attributes[i] return None + @always_inline("nodebug") + fn append(inout self, attribute: Attribute): + self.attributes[self.size] = attribute + self.size += 1 + @always_inline("nodebug") fn __str__(self) -> String: var s: String = "[" diff --git a/basalt/nn/__init__.mojo b/basalt/nn/__init__.mojo index 99b30a3..9c994a4 100644 --- a/basalt/nn/__init__.mojo +++ b/basalt/nn/__init__.mojo @@ -4,6 +4,7 @@ from .model import Model from .layers.linear import Linear from .layers.conv import Conv2d from .layers.pool import MaxPool2d +from .layers.upsample import Upsample from .loss import MSELoss, CrossEntropyLoss from .activations import Softmax, LogSoftmax, ReLU, Sigmoid, Tanh diff --git a/basalt/nn/layers/upsample.mojo b/basalt/nn/layers/upsample.mojo new file mode 100644 index 0000000..c70de00 --- /dev/null +++ b/basalt/nn/layers/upsample.mojo @@ -0,0 +1,117 @@ +from basalt import dtype +from basalt import Graph, Symbol, OP +from basalt import Tensor, TensorShape +from basalt.autograd.attributes import AttributeVector, Attribute +from basalt.utils.itertools import product + + +fn _scale_indeces(N: Int, scale: Scalar[dtype], align_corners: Bool, dim: Int, ndims: Int) -> List[Scalar[dtype]]: + var M = int(scale * N) + var indeces = List[Scalar[dtype]]() + if align_corners: + for i in range(M): + indeces.append(i * ((N - 1) / (M - 1))) + else: + var step = 1 / scale + var start = ((M - 1) * step - N + 1) / 2 + for i in range(M): + indeces.append(i * step - start) + + return indeces ^ + + +fn nearest_coeffs(N: Int, scale: Scalar[dtype], dim: Int, ndims: Int) -> List[Int]: + + @parameter + fn round_to_index(number: Scalar[dtype]) -> Int: + return int(number + 0.5) if number > 0 else int(number - 0.5) + + var indeces = List[Int]() + var scaled = _scale_indeces(N, scale, True, dim, ndims) + for i in range(len(scaled)): + indeces.append(round_to_index(scaled[i])) + return indeces ^ + + +fn linear_coeffs(N: Int, scale: Scalar[dtype], align_corners: Bool, dim: Int, ndims: Int) -> Tuple[List[Int], List[Int]]: + # TODO + return (List[Int](), List[Int]()) + + +fn cubic_coeffs(N: Int, scale: Scalar[dtype], align_corners: Bool, dim: Int, ndims: Int) -> Tuple[List[Int], List[Int]]: + # TODO + return (List[Int](), List[Int]()) + + +fn interpolate_nd[ + indices_fn: fn (Int, Scalar[dtype], Bool, Int, Int) -> Tuple[List[Int], List[Int]], +](inout g: Graph, input: Symbol, scale_factors: List[Scalar[dtype]], align_corners: Bool) -> Symbol: + + var spatial_dims = input.shape.rank() - 2 + + var indeces_weights = List[Tuple[List[Int], List[Int]]]() + for i in range(spatial_dims): + indeces_weights.append( + indices_fn( + input.shape[i + 2], + scale_factors[i], + align_corners, + i, + spatial_dims, + ) + ) + + # TODO: interpolation logic + # for idx_weight in product(indeces_weights): + # ... + + return Symbol(-1, dtype, TensorShape(), False) + + +fn Upsample( + inout g: Graph, + input: Symbol, + mode: StringLiteral, + scale_factors: List[Scalar[dtype]], + align_corners: Bool = False, +) -> Symbol: + + # Assumption: A scale needs to be provided for each spatial dimension. + # input shape (B, C, *N) with batch and channel considered as non-spatial dimensions. + # input.shape.rank() - 2 == len(scale_factor) + var spatial_dims = input.shape.rank() - 2 + + var res: Symbol + var attributes = AttributeVector() + var INDEX_LITERALS = List[StringLiteral]("dim_2i", "dim_3i", "dim_4i") + + if mode == "nearest": + # Nearest neighbor interpolation --> input[:, :, *indeces] + for i in range(spatial_dims): + attributes.append( + Attribute( + INDEX_LITERALS[i], + nearest_coeffs(input.shape[i + 2], scale_factors[i], i, spatial_dims) + ) + ) + + res = g.op(OP.INDEX, input, attributes=attributes) + + # elif mode == "linear": + # res = interpolate_nd[linear_coeffs](g, + # input, + # scale_factor, + # align_corners + # ) + + # elif mode == "cubic": + # res = interpolate_nd[cubic_coeffs](g, + # input, + # scale_factor, + # align_corners + # ) + else: + res = input + + return res + diff --git a/tests/python/test_upsample.mojo b/tests/python/test_upsample.mojo new file mode 100644 index 0000000..c5918ff --- /dev/null +++ b/tests/python/test_upsample.mojo @@ -0,0 +1,159 @@ +from python.python import Python, PythonObject + +import basalt.nn as nn +from basalt import dtype, Graph +from basalt import Tensor, TensorShape +from tests import assert_tensors_equal, to_numpy, to_tensor + + +fn test_upsample[ + shape: TensorShape, + mode: StringLiteral, + scale_factors: List[Scalar[dtype]], + align_corners: Bool +]( + t1: Tensor[dtype], + ug: Tensor[dtype], + expected: Tensor[dtype], + expected_grad: Tensor[dtype] +) raises: + + fn create_graph() -> Graph: + var g = Graph() + var t1 = g.input(shape, trainable=True) + var t2 = nn.Upsample(g, t1, mode, scale_factors, align_corners) + g.out(t2) + return g ^ + + alias graph = create_graph() + var model = nn.Model[graph](inference_only=True) + var res = model.inference(t1)[0] + + model.backward(ug) + var res_grad = model.parameters.grads[graph.inputs[0]] + + assert_tensors_equal["almost"](res, expected) + assert_tensors_equal["almost"](res_grad, expected_grad) + + +@value +struct torch_upsample_result: + var expected: Tensor[dtype] + var grad: Tensor[dtype] + + +fn test_upsample_torch[ + shape: TensorShape, + mode: StringLiteral, + scale_factors: List[Scalar[dtype]], + align_corners: Bool +](data: PythonObject, ug: PythonObject) raises -> torch_upsample_result: + + var py = Python.import_module("builtins") + var np = Python.import_module("numpy") + var torch = Python.import_module("torch") + + var py_scales = py.list() + for i in range(len(scale_factors)): + py_scales.append(scale_factors[i]) + + # if mode == "nearest": + # var ups = torch.nn.Upsample(scale_factor=py.tuple(py_scales), mode=mode) + # else: + # var ups = torch.nn.Upsample(scale_factor=py.tuple(py_scales), mode=mode, align_corners=align_corners) + + var ups = torch.nn.Upsample(scale_factor=py.tuple(py_scales), mode=mode) + + var tensor = torch.from_numpy(data).requires_grad_(True) + var expected = ups(tensor) + var upper_grad = torch.from_numpy(ug) + _ = expected.backward(upper_grad) + + return torch_upsample_result( + to_tensor(expected.detach().numpy()), + to_tensor(tensor.grad.numpy()), + ) + + + +fn test_UPSAMPLE_nearest() raises: + var np = Python.import_module("numpy") + + alias shape = TensorShape(1, 1, 2, 2) + alias mode: StringLiteral = "nearest" + alias scales = List[Scalar[dtype]](2.0, 3.0) + alias align_corners = False + + var data = np.array([ + 1, 2, + 3, 4 + ], dtype=np.float32).reshape(1, 1, 2, 2) + + var ug = np.ones((1, 1, 4, 6)) + + var torch_out = test_upsample_torch[shape, mode, scales, align_corners](data, ug) + test_upsample[shape, mode, scales, align_corners]( + to_tensor(data), + to_tensor(ug), + torch_out.expected, + torch_out.grad + ) + + _ = data + + +fn test_UPSAMPLE_linear() raises: + var np = Python.import_module("numpy") + + alias shape = TensorShape(1, 1, 2, 2) + alias mode: StringLiteral = "linear" + alias scales = List[Scalar[dtype]](2.0, 2.0) + + var data = np.array([ + 1, 2, + 3, 4 + ], dtype=np.float32).reshape(1, 1, 2, 2) + + # var expected = np.array([ + # 1., 1.25, 1.75, 2. , + # 1.5, 1.75, 2.25, 2.5 , + # 2.5, 2.75, 3.25, 3.5 , + # 3., 3.25, 3.75, 4. , + # ], dtype=np.float32).reshape(1, 1, 4, 4) + + +fn test_UPSAMPLE_cubic() raises: + var np = Python.import_module("numpy") + + alias shape = TensorShape(1, 1, 4, 4) + alias mode: StringLiteral = "cubic" + alias scales = List[Scalar[dtype]](2.0, 2.0) + + var data = np.array([ + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 16, + ], dtype=np.float32).reshape(1, 1, 4, 4) + + # var expected = np.array([ + # 0.47265625, 0.76953125, 1.24609375, 1.875, 2.28125, 2.91015625, 3.38671875, 3.68359375, + # 1.66015625, 1.95703125, 2.43359375, 3.0625, 3.46875, 4.09765625, 4.57421875, 4.87109375, + # 3.56640625, 3.86328125, 4.33984375, 4.96875, 5.375, 6.00390625, 6.48046875, 6.77734375, + # 6.08203125, 6.37890625, 6.85546875, 7.484375, 7.890625, 8.51953125, 8.99609375, 9.29296875, + # 7.70703125, 8.00390625, 8.48046875, 9.109375, 9.515625, 10.14453125, 10.62109375, 10.91796875, + # 10.22265625, 10.51953125, 10.99609375, 11.625, 12.03125, 12.66015625, 13.13671875, 13.43359375, + # 12.12890625, 12.42578125, 12.90234375, 13.53125, 13.9375, 14.56640625, 15.04296875, 15.33984375, + # 13.31640625, 13.61328125, 14.08984375, 14.71875, 15.125, 15.75390625, 16.23046875, 16.52734375 + # ], dtype=np.float32).reshape(1, 1, 8, 8) + + +fn main(): + + try: + test_UPSAMPLE_nearest() + # test_UPSAMPLE_linear() + # test_UPSAMPLE_cubic() + except e: + print("[Error] Error in Upsample") + print(e) \ No newline at end of file From 0876ea3ba546ffd26a89c822f029089ddc51434a Mon Sep 17 00:00:00 2001 From: NKspartan Date: Mon, 27 May 2024 19:13:36 -0600 Subject: [PATCH 06/16] Added upsample nearest op and test --- basalt/autograd/ops/mlops.mojo | 84 +++++++++++++++++++++++++++++++++- basalt/autograd/ops/ops.mojo | 7 ++- tests/mojo/test_mlops.mojo | 48 ++++++++++++++++++- 3 files changed, 135 insertions(+), 4 deletions(-) diff --git a/basalt/autograd/ops/mlops.mojo b/basalt/autograd/ops/mlops.mojo index 6e38aaa..995e0c4 100644 --- a/basalt/autograd/ops/mlops.mojo +++ b/basalt/autograd/ops/mlops.mojo @@ -616,4 +616,86 @@ struct INDEX: vectorize[vec_index, nelts](total_length) - return res_grad^ \ No newline at end of file + return res_grad^ + + +struct UPSAMPLE: + @staticmethod + fn result_shape(t1_shape: TensorShape, attributes: AttributeVector) -> TensorShape: + var scales = attributes["scales"].value().to_shape() + var mode = attributes["mode"].value().to_string() + + var new_shape = List[Int]() + for i in range(0, t1_shape.rank()): + if i < 2: + new_shape.append(t1_shape[i]) + else: + new_shape.append(t1_shape[i] * scales[i - 2]) + + return TensorShape(new_shape) + + @staticmethod + fn recursive_iter[pos_shape: Int, shape: TensorShape, scales: TensorShape](inout res: Tensor[dtype], t1: Tensor[dtype], strides_res: StaticIntTuple[8], index_t1: Int, index_res: Int): + alias end_pos = shape.rank() - 1 + alias strides = shape.strides() + + @parameter + if pos_shape >= end_pos: + @parameter + fn v_iter[nelts: Int](i: Int): + var values = t1.load[nelts](index_t1 + i) + + var offset_res = index_res + i * scales[end_pos - 2] + for j in range(nelts * scales[pos_shape - 2]): + var temp = j // scales[pos_shape - 2] + + res[offset_res + j] = values[temp] + + vectorize[v_iter, nelts](shape[pos_shape]) + + return + else: + for i in range(shape[pos_shape] * scales[pos_shape - 2]): + var temp_i = i // scales[pos_shape - 2] + var temp_index_t1 = temp_i * strides[pos_shape] + index_t1 + var temp_index_res = i * strides_res[pos_shape] + index_res + + Self.recursive_iter[pos_shape + 1, shape, scales](res, t1, strides_res, temp_index_t1, temp_index_res) + + @staticmethod + fn forward[ + t1_shape: TensorShape, + attributes: AttributeVector, + ](inout res: Tensor[dtype], t1: Tensor[dtype]): + # Input is [N, C, D in, H in, W in], N is batch size and C is number of channels. Ranks 3-D, 4-D or 5-D tensors. + alias scales = attributes["scales"].value().to_shape() # Has to match input size (the last dimensions D, H and W) or just be one value + alias mode = attributes["mode"].value().to_string() + + alias strides = t1_shape.strides() + alias total_length = t1_shape.num_elements() + + alias first_loop = total_length // strides[1] + + var strides_res = res.strides() + + @parameter + if mode == "nearest": + @parameter + fn p_iter(i: Int): + var offset = i * strides[1] + var offset_res = i * strides_res[1] + + Self.recursive_iter[2, t1_shape, scales]( + res, t1, strides_res, offset, offset_res) + + parallelize[p_iter](first_loop) + else: + pass + + @staticmethod + fn backward[ + ug_shape: TensorShape, + t1_shape: TensorShape, + attributes: AttributeVector = AttributeVector(), + ](ug: Tensor[dtype], t1: Tensor[dtype]) -> Tensor[dtype]: + return t1 \ No newline at end of file diff --git a/basalt/autograd/ops/ops.mojo b/basalt/autograd/ops/ops.mojo index c737821..c47f5ce 100644 --- a/basalt/autograd/ops/ops.mojo +++ b/basalt/autograd/ops/ops.mojo @@ -15,7 +15,7 @@ from .basics import ( TRANSPOSE, FMA, ) -from .mlops import SIGMOID, RELU, TANH, CLIP, SQUEEZE, UNSQUEEZE, SLICE, INDEX +from .mlops import SIGMOID, RELU, TANH, CLIP, SQUEEZE, UNSQUEEZE, SLICE, INDEX, UPSAMPLE from .dynamics import CONCAT, SPLIT from .conv import CONV2D from .pool import MAXPOOL2D @@ -62,6 +62,7 @@ struct OP(Stringable): alias SPLIT = OP(24, "SPLIT", dynamic=True) alias SLICE = OP(25, "SLICE") alias INDEX = OP(26, "INDEX") + alias UPSAMPLE = OP(27, "UPSAMPLE") var id: UInt8 var name: Bytes[16] @@ -138,6 +139,8 @@ fn static_result_shape( return SLICE.result_shape(t1_shape, attributes) elif op == OP.INDEX: return INDEX.result_shape(t1_shape, attributes) + elif op == OP.UPSAMPLE: + return UPSAMPLE.result_shape(t1_shape, attributes) else: print("[ERROR] Operator not found.") return TensorShape(-1) @@ -254,6 +257,8 @@ fn forward_op[ SLICE.forward[t1_shape, attributes](res, t1) elif op == OP.INDEX: INDEX.forward[t1_shape, attributes](res, t1) + elif op == OP.UPSAMPLE: + UPSAMPLE.forward[t1_shape, attributes](res, t1) else: print("[ERROR] Operator not found.") diff --git a/tests/mojo/test_mlops.mojo b/tests/mojo/test_mlops.mojo index 964e134..045ae7b 100644 --- a/tests/mojo/test_mlops.mojo +++ b/tests/mojo/test_mlops.mojo @@ -679,6 +679,48 @@ fn test_INDEX_backward() raises: print(expected) +fn test_UPSAMPLE() raises: + alias t1_shape = TensorShape(2, 3, 5) + var t = Tensor[dtype](t1_shape) + for i in range(t.num_elements()): + t[i] = i + + var expected = Tensor[dtype](2, 3, 10) + for i in range(2): + for j in range(3): + for k in range(5): + for l in range(2): + expected[i*3*10 + j*10 + k*2 + l] = t[i*3*5 + j*5 + k] + + test_unary_op[ + OP.UPSAMPLE, t1_shape, AttributeVector( + Attribute("scales", TensorShape(2)), + Attribute("mode", "nearest") + ) + ](t, expected) + + + alias t2_shape = TensorShape(1, 1, 2, 2) + t = Tensor[dtype](t2_shape) + for i in range(t.num_elements()): + t[i] = i + + expected = Tensor[dtype](1, 1, 4, 6) + for i in range(1): + for j in range(1): + for k in range(4): + for l in range(6): + var pos = i*1*2*2 + j*2*2 + (k // 2) * 2 + (l // 3) + expected[i*1*4*6 + j*4*6 + k*6 + l] = t[pos] + + test_unary_op[ + OP.UPSAMPLE, t2_shape, AttributeVector( + Attribute("scales", TensorShape(2, 3)), + Attribute("mode", "nearest") + ) + ](t, expected) + + fn main(): try: # test_SIGMOID() @@ -691,7 +733,8 @@ fn main(): # test_SLICE_step() # test_SLICE_neg() # test_SLICE_multiple_axes() - test_INDEX() + # test_INDEX() + test_UPSAMPLE() except e: print("[ERROR] Error in forward mlops") print(e) @@ -706,7 +749,8 @@ fn main(): # test_backward_UNSQUEEZE() # test_backward_SLICE() # test_backward_SLICE_multiple_axes() - test_INDEX_backward() + # test_INDEX_backward() + pass except e: print("[ERROR] Error in backward mlops") print(e) From 1e5fc432123e83cfdb32a5a7c9c96d8c6cbdb43f Mon Sep 17 00:00:00 2001 From: NKspartan Date: Sun, 2 Jun 2024 16:34:23 -0600 Subject: [PATCH 07/16] Merged with main --- basalt/__init__.mojo | 1 + basalt/autograd/attributes.mojo | 4 +- basalt/autograd/graph.mojo | 116 +++++++------- basalt/autograd/ops/basics.mojo | 5 +- basalt/autograd/ops/conv.mojo | 1 - basalt/autograd/ops/matmul.mojo | 175 +++++++++++++++++++++ basalt/autograd/params.mojo | 6 +- basalt/utils/collection.mojo | 33 +++- basalt/utils/onnx_utils.mojo | 150 +++++++++++------- basalt/utils/tensor_creation_utils.mojo | 73 +++++++++ basalt/utils/tensorutils.mojo | 196 +----------------------- profile.sh | 118 -------------- tests/mojo/test_backward.mojo | 11 +- tests/mojo/test_tensorutils.mojo | 2 +- tests/testing_utils.mojo | 54 +------ 15 files changed, 445 insertions(+), 500 deletions(-) create mode 100644 basalt/autograd/ops/matmul.mojo create mode 100644 basalt/utils/tensor_creation_utils.mojo delete mode 100755 profile.sh diff --git a/basalt/__init__.mojo b/basalt/__init__.mojo index 137eec4..73d84f1 100644 --- a/basalt/__init__.mojo +++ b/basalt/__init__.mojo @@ -5,3 +5,4 @@ from basalt.utils.collection import Collection alias dtype = DType.float32 alias nelts = 2 * simdwidthof[dtype]() alias seed = 42 +alias epsilon = 1e-12 diff --git a/basalt/autograd/attributes.mojo b/basalt/autograd/attributes.mojo index 2e87300..5a57a9a 100644 --- a/basalt/autograd/attributes.mojo +++ b/basalt/autograd/attributes.mojo @@ -120,7 +120,7 @@ struct Attribute(Stringable, CollectionElement): self.size = N for i in range(self.size): - self.data[i] = value[i] + self.data_shape[i] = value[i] @always_inline("nodebug") fn __init__[dtype: DType](inout self, name: String, value: Scalar[dtype]): @@ -161,7 +161,7 @@ struct Attribute(Stringable, CollectionElement): var result = StaticIntTuple[N]() for i in range(N): - result[i] = int(self.data[i]) + result[i] = int(self.data_shape[i]) return result diff --git a/basalt/autograd/graph.mojo b/basalt/autograd/graph.mojo index 2353774..4038bdc 100644 --- a/basalt/autograd/graph.mojo +++ b/basalt/autograd/graph.mojo @@ -11,7 +11,6 @@ from basalt import seed, dtype from basalt import Tensor, TensorShape -@value struct Graph: var inputs: List[Symbol] var params: ParamDict @@ -28,41 +27,42 @@ struct Graph: self.loss_out = None self.symbol_count = 0 - fn input(inout self, shape: TensorShape, trainable: Bool = False) -> Symbol: - var inp = Symbol(self.symbol_count, dtype, shape, trainable) - self.inputs.append(inp) - self.symbol_count += 1 - return inp + fn __moveinit__(inout self, owned other: Graph): + self.inputs = other.inputs^ + self.params = other.params^ + self.nodes = other.nodes^ + self.outputs = other.outputs^ + self.loss_out = other.loss_out + self.symbol_count = other.symbol_count - fn param( - inout self, shape: TensorShape, init: Param, trainable: Bool = True - ) -> Symbol: - var param_id = Symbol(self.symbol_count, dtype, shape, trainable) - self.params.put(param_id, init) + fn create_symbol(inout self, shape: TensorShape, data: Optional[Param] = None, trainable: Bool = False, is_input: Bool = False) -> Symbol: + var symbol = Symbol(self.symbol_count, dtype, shape, trainable) self.symbol_count += 1 - return param_id + + if data is not None: + self.params.put(symbol, data.take()) + else: + self.params.put(symbol) + + if is_input: + self.inputs.append(symbol) + + return symbol + + fn input(inout self, shape: TensorShape, trainable: Bool = False) -> Symbol: + return self.create_symbol(shape, trainable=trainable, is_input=True) + + fn param(inout self, shape: TensorShape, init: Param, trainable: Bool = True) -> Symbol: + return self.create_symbol(shape, init, trainable) fn param(inout self, shape: TensorShape, trainable: Bool = True) -> Symbol: - var param_id = Symbol(self.symbol_count, dtype, shape, trainable) - self.params.put(param_id) - self.symbol_count += 1 - return param_id + return self.create_symbol(shape, trainable=trainable) fn scalar(inout self, value: Scalar[dtype]) -> Symbol: - var scal = Param(value) - var scalar_id = Symbol( - self.symbol_count, dtype, TensorShape(1), trainable=False - ) - self.params.put(scalar_id, scal) - self.symbol_count += 1 - return scalar_id + return self.create_symbol(TensorShape(1), Param(value), trainable=False) fn constant(inout self, shape: TensorShape, data: List[Scalar[dtype]]) -> Symbol: - var cst = Param(data) - var constant_id = Symbol(self.symbol_count, dtype, shape, trainable=False) - self.params.put(constant_id, cst) - self.symbol_count += 1 - return constant_id + return self.create_symbol(shape, Param(data), trainable=False) fn out(inout self, symbol: Symbol): self.outputs.append(symbol) @@ -77,14 +77,15 @@ struct Graph: attributes: AttributeVector = AttributeVector(), ) -> Symbol: var res_shape = static_result_shape(op, operands, attributes) - var res = Symbol( - self.symbol_count, dtype, res_shape, self.result_trainable(operands) - ) + var res = Symbol(self.symbol_count, dtype, res_shape, self.result_trainable(operands)) self.symbol_count += 1 var inputs = List[Symbol]() + inputs.reserve(len(operands)) + for operand in operands: inputs.append(operand) + self.nodes.append(Node(op, inputs, List[Symbol](res), attributes)) return res @@ -95,8 +96,7 @@ struct Graph: operand_2: Float64, attributes: AttributeVector = AttributeVector(), ) -> Symbol: - var operand_2_symbol = self.scalar(operand_2) - return self.op(op, operand_1, operand_2_symbol, attributes=attributes) + return self.op(op, operand_1, self.scalar(operand_2), attributes=attributes) fn op( inout self, @@ -105,43 +105,43 @@ struct Graph: operand_2: Symbol, attributes: AttributeVector = AttributeVector(), ) -> Symbol: - var operand_1_symbol = self.scalar(operand_1) - return self.op(op, operand_1_symbol, operand_2, attributes=attributes) + return self.op(op, self.scalar(operand_1), operand_2, attributes=attributes) + + fn create_symbols(inout self, shapes: List[TensorShape], trainable: Bool = False) -> List[Symbol]: + var symbols = List[Symbol]() + symbols.reserve(len(shapes)) + + for shape in shapes: + symbols.append(Symbol(self.symbol_count, dtype, shape[], trainable)) + self.symbol_count += 1 + + return symbols + + fn add_node(inout self, op: OP, inputs: List[Symbol], outputs: List[Symbol], attributes: AttributeVector): + self.nodes.append(Node(op, inputs, outputs, attributes)) - # Dynamic ops fn concat(inout self, *operands: Symbol, dim: Int = 0) -> Symbol: - # NOTE: Concat could fit into g.op() given a different static_result_shape is called var attributes = AttributeVector(Attribute("dim", dim)) - var res_shape = dynamic_result_shape(OP.CONCAT, operands, attributes)[0] - var res = Symbol( - self.symbol_count, dtype, res_shape, self.result_trainable(operands) - ) - self.symbol_count += 1 + var res_symbols = self.create_symbols(List[TensorShape](res_shape), self.result_trainable(operands)) - var inputs = List[Symbol]() + var operand_list = List[Symbol]() + operand_list.reserve(len(operands)) for operand in operands: - inputs.append(operand) - self.nodes.append(Node(OP.CONCAT, inputs, List[Symbol](res), attributes)) - return res + operand_list.append(operand) + + self.add_node(OP.CONCAT, operand_list, res_symbols, attributes) + return res_symbols[0] fn split( inout self, operand: Symbol, sections: List[Int], dim: Int = 0 ) -> List[Symbol]: - var attributes = AttributeVector( - Attribute("sections", TensorShape(sections)), Attribute("dim", dim) - ) + var attributes = AttributeVector(Attribute("sections", TensorShape(sections)), Attribute("dim", dim)) var res_shapes = dynamic_result_shape(OP.SPLIT, operand, attributes) var trainable = self.result_trainable(operand) - - var results = List[Symbol]() - for i in range(len(res_shapes)): - var symbol = Symbol(self.symbol_count, dtype, res_shapes[i], trainable) - results.append(symbol) - self.symbol_count += 1 - - self.nodes.append(Node(OP.SPLIT, List[Symbol](operand), results, attributes)) - return results + var result_symbols = self.create_symbols(res_shapes, trainable) + self.add_node(OP.SPLIT, List[Symbol](operand), result_symbols, attributes) + return result_symbols @staticmethod fn result_trainable(operands: VariadicList[Symbol]) -> Bool: diff --git a/basalt/autograd/ops/basics.mojo b/basalt/autograd/ops/basics.mojo index 3a4a5ab..2305377 100644 --- a/basalt/autograd/ops/basics.mojo +++ b/basalt/autograd/ops/basics.mojo @@ -6,6 +6,7 @@ from basalt import Tensor, TensorShape from basalt.nn.tensor import MAX_RANK from basalt.utils.tensorutils import * from basalt.autograd.attributes import Attribute, AttributeVector +from basalt.autograd.ops.matmul import dot, dot_transpose_t1, dot_transpose_t2 """ Implement forward and backward operations for basic tensor manipulations. @@ -323,9 +324,7 @@ struct POW: @parameter fn vec_pow_bw_x[nelts: Int](i: Int): - res_grad.store[nelts]( - i, a * (t1.load[nelts](i) ** (a - 1)) * ug.load[nelts](i) - ) + res_grad.store[nelts](i, a * ((t1.load[nelts](i) + epsilon) ** (a - 1)) * ug.load[nelts](i)) vectorize[vec_pow_bw_x, nelts](t1_shape.num_elements()) diff --git a/basalt/autograd/ops/conv.mojo b/basalt/autograd/ops/conv.mojo index 3e1a18c..4592c51 100644 --- a/basalt/autograd/ops/conv.mojo +++ b/basalt/autograd/ops/conv.mojo @@ -1,6 +1,5 @@ from basalt import Tensor, TensorShape from basalt.autograd.attributes import AttributeVector -from basalt.utils.tensorutils import dot, dot_transpose_t1, dot_transpose_t2 from algorithm import parallelize, vectorize, tile from math import divmod diff --git a/basalt/autograd/ops/matmul.mojo b/basalt/autograd/ops/matmul.mojo new file mode 100644 index 0000000..e638f81 --- /dev/null +++ b/basalt/autograd/ops/matmul.mojo @@ -0,0 +1,175 @@ +from basalt.utils.tensorutils import transpose_2D +from algorithm import vectorize, parallelize + + +@always_inline +fn calculate_block[ + M: Int, N: Int, K: Int, BLOCK_M: Int, BLOCK_N: Int, nelts: Int +]( + res: DTypePointer[dtype], + t1: DTypePointer[dtype], + t2: DTypePointer[dtype], + bm: Int, + bn: Int, +): + # Compute tile + var acc = stack_allocation[BLOCK_M * BLOCK_N, dtype]() + memset_zero[dtype](acc, BLOCK_M * BLOCK_N) + + for k in range(K): + + @unroll + for m in range(BLOCK_M): + + @parameter + fn inner_n[nelts: Int](n: Int): + acc.store[width=nelts]( + m * BLOCK_N + n, + SIMD[dtype, nelts] + .splat(t1[(bm + m) * K + k]) + .fma( + t2.load[width=nelts](k * N + (bn + n)), + acc.load[width=nelts](m * BLOCK_N + n), + ), + ) + + vectorize[inner_n, nelts](BLOCK_N) + + # Store tile + for m in range(BLOCK_M): + + @parameter + fn vec_store[nelts: Int](n: Int): + res.store[width=nelts]( + (bm + m) * N + (bn + n), acc.load[width=nelts](m * BLOCK_N + n) + ) + + vectorize[vec_store, nelts](BLOCK_N) + + +@parameter +@always_inline +fn dot[ + t1_shape: TensorShape, t2_shape: TensorShape +](inout res: Tensor[dtype], t1: Tensor[dtype], t2: Tensor[dtype]): + dot[t1_shape, t2_shape](res.data(), t1.data(), t2.data()) + + +@parameter +@always_inline +fn dot[ + t1_shape: TensorShape, t2_shape: TensorShape +](res: DTypePointer[dtype], t1: DTypePointer[dtype], t2: DTypePointer[dtype]): + alias M = t1_shape[0] # t1[0] + alias K = t1_shape[1] # t1[1], t2[0] + alias N = t2_shape[1] # t2[1] + + # simdwidthof[dtype]() = 8 for float32 + alias nelts = simdwidthof[dtype]() + alias BLOCK_N = 8 * 2 + alias BLOCK_M = 6 + alias THREADS = 6 # num_logical_cores() + + alias BLOCK_N_REMAINDER = N % BLOCK_N + alias BLOCK_M_REMAINDER = M % BLOCK_M + + @parameter + fn bm_par(m_outer: Int): + var bm = m_outer * BLOCK_M + + for n_outer in range(0, N // BLOCK_N): + var bn = n_outer * BLOCK_N + + calculate_block[M, N, K, BLOCK_M, BLOCK_N, nelts](res, t1, t2, bm, bn) + + # Handle the remainder of N + @parameter + if BLOCK_N_REMAINDER > 0: + var bn = N - BLOCK_N_REMAINDER + + calculate_block[M, N, K, BLOCK_M, BLOCK_N_REMAINDER, nelts]( + res, t1, t2, bm, bn + ) + + parallelize[bm_par](M // BLOCK_M, M // BLOCK_M) + + # Handle the remainder of M + @parameter + if BLOCK_M_REMAINDER > 0: + var bm = M - BLOCK_M_REMAINDER + + for n_outer in range(0, N // BLOCK_N): + var bn = n_outer * BLOCK_N + + calculate_block[M, N, K, BLOCK_M_REMAINDER, BLOCK_N, nelts]( + res, t1, t2, bm, bn + ) + + # Handle corner remainder + @parameter + if BLOCK_N_REMAINDER > 0: + var bn = N - BLOCK_N_REMAINDER + + calculate_block[M, N, K, BLOCK_M_REMAINDER, BLOCK_N_REMAINDER, nelts]( + res, t1, t2, bm, bn + ) + + +fn dot_transpose_t2[ + A_shape: TensorShape, B_shape: TensorShape +](inout C: DTypePointer[dtype], A: DTypePointer[dtype], B: DTypePointer[dtype]): + dot[A_shape, TensorShape(B_shape[1], B_shape[0])](C, A, transpose_2D[B_shape](B)) + + +fn dot_transpose_t2[ + A_shape: TensorShape, B_shape: TensorShape +](inout C: Tensor[dtype], A: Tensor[dtype], B: Tensor[dtype]): + memset_zero[dtype](C.data(), C.num_elements()) + + dot[A_shape, TensorShape(B_shape[1], B_shape[0])](C, A, transpose_2D[B_shape](B)) + + # @parameter + # fn calc_row(i: Int): + # for j in range(B_shape[0]): + + # @parameter + # fn calc_row_A_B[nelts: Int](k: Int): + # var A_pos = i * A.dim(1) + k + # var B_pos = j * A.dim(1) + k + # var t_new_pos = i * C.dim(1) + j + + # C[t_new_pos] += ( + # A.load[nelts](A_pos) * B.load[nelts](B_pos) + # ).reduce_add() + + # vectorize[calc_row_A_B, nelts, size=A_shape[1]]() + + # parallelize[calc_row](A_shape[0], 1) + + +fn dot_transpose_t1[ + A_shape: TensorShape, B_shape: TensorShape +](inout C: Tensor[dtype], A: Tensor[dtype], B: Tensor[dtype]): + memset_zero[dtype](C.data(), C.num_elements()) + + dot[TensorShape(A_shape[1], A_shape[0]), B_shape](C, transpose_2D[A_shape](A), B) + + # @parameter + # fn calc_row(i: Int): + # for j in range(A_shape[0]): + + # @parameter + # fn calc_row_t_new_B[nelts: Int](k: Int): + # var A_pos = j * A.dim(1) + i + # var B_pos = j * B.dim(1) + k + # var t_new_pos = i * C.dim(1) + k + + # C.store[nelts]( + # t_new_pos, + # C.load[nelts](t_new_pos) + # + A[A_pos] * B.load[nelts](B_pos), + # ) + + # vectorize[calc_row_t_new_B, nelts, size=B_shape[1]]() + + # parallelize[calc_row](A_shape[1], 1) diff --git a/basalt/autograd/params.mojo b/basalt/autograd/params.mojo index 5d82848..37d682a 100644 --- a/basalt/autograd/params.mojo +++ b/basalt/autograd/params.mojo @@ -19,10 +19,8 @@ struct Param(CollectionElement, Stringable): self.data = data self.initializer = None - fn __init__(inout self, a: Scalar[dtype]): - var data = List[Scalar[dtype]]() - data.append(a) - self.data = data + fn __init__(inout self, data: Scalar[dtype]): + self.data = List[Scalar[dtype]](data) self.initializer = None fn __init__(inout self, initializer: String, *args: Scalar[dtype]): diff --git a/basalt/utils/collection.mojo b/basalt/utils/collection.mojo index 0a8aea9..16474a5 100644 --- a/basalt/utils/collection.mojo +++ b/basalt/utils/collection.mojo @@ -1,7 +1,7 @@ -from math import max -from memory.unsafe_pointer import UnsafePointer, move_from_pointee, initialize_pointee_copy, initialize_pointee_move, destroy_pointee +from math import max, divmod +from memory.unsafe_pointer import UnsafePointer, initialize_pointee_move, destroy_pointee -from basalt import Tensor, TensorShape, Symbol +from basalt import Tensor, Symbol struct Collection(CollectionElement, Sized): @@ -108,10 +108,29 @@ struct Collection(CollectionElement, Sized): fn get_index(self, symbol_name: UInt32) -> Int: """ Returns the index of the tensor with the given symbol name. - """ - for i in range(self.size): - if self.symbols[i] == symbol_name: - return i + """ + alias factor = 8 + # 2 -> 5.32s MNIST + # 4 -> 4.95s MNIST + # 8 -> 4.85s MNIST + # 16 -> 5.19s MNIST + # NOTE: This ideally should just be a hashmap + + for i in range(0, self.size, factor): + var elems = self.symbols.load[width=factor](i) == symbol_name + + for j in range(factor): + if elems[j]: + return i + j + + var split = divmod(self.size, factor) + + for i in range(split[1]): + var index = split[0] + i + + if self.symbols[index] == symbol_name: + return index + return -1 @always_inline("nodebug") diff --git a/basalt/utils/onnx_utils.mojo b/basalt/utils/onnx_utils.mojo index 9eeda44..3e67164 100644 --- a/basalt/utils/onnx_utils.mojo +++ b/basalt/utils/onnx_utils.mojo @@ -6,33 +6,13 @@ from basalt.nn.model import Parameters from basalt.nn.tensor import Tensor, TensorShape from basalt.autograd.attributes import Attribute, AttributeType from basalt.autograd.ops import OP +from basalt.autograd.graph import Node + +from .tensor_creation_utils import to_numpy, copy_np_data # NOTE: Maybe we could create our own model representation and from there convert to onnx or others (well we already have it in reallity) # NOTE: Torch doesn't import onnx, need onnx2torch and it doesn't support operators like reshape? -fn to_numpy(tensor: Tensor) raises -> PythonObject: - var np = Python.import_module("numpy") - - np.set_printoptions(4) - var rank = tensor.rank() - var pyarray: PythonObject = np.array([0]) - - if rank == 1: - pyarray = np.empty((tensor.dim(0))) - elif rank == 2: - pyarray = np.empty((tensor.dim(0), tensor.dim(1))) - elif rank == 3: - pyarray = np.empty((tensor.dim(0), tensor.dim(1), tensor.dim(2))) - elif rank == 4: - pyarray = np.empty((tensor.dim(0), tensor.dim(1), tensor.dim(2), tensor.dim(3))) - else: - print("Error: rank not supported: ", rank) - - for i in range(tensor.num_elements()): - pyarray.itemset((i), tensor[i]) - - return pyarray - fn make_onnx_attribute(op: OP, attr: Attribute) raises -> PythonObject: var onnx = Python.import_module("onnx") @@ -68,9 +48,7 @@ fn make_onnx_attribute(op: OP, attr: Attribute) raises -> PythonObject: else: raise Error("Unsupported attribute name for operator " + str(op)) - if (op == OP.CONV2D and attr_name) == "pads" or ( - op == OP.MAXPOOL2D and attr_name - ) == "pads": + if (op == OP.CONV2D or op == OP.MAXPOOL2D) and attr_name == "pads": # Special case for pads in conv and maxpool, onnx wants pads to be [x1_begin, x2_begin…x1_end, x2_end,…], attr_value.append(attr_value[0]) attr_value.append(attr_value[1]) @@ -190,15 +168,88 @@ fn load_onnx_model( + data_shape ) - var data = data_np.flatten() - - # It would be better to use memcpy here - for j in range(len(data)): - model_parameters.tensors[g.params.symbols[i]][j] = data[j].to_float64() + copy_np_data(model_parameters.tensors[g.params.symbols[i]], data_np) else: raise Error("Unsupported data type") +fn create_attributes_and_constant_inputs(node: Node, node_number: Int) raises -> (List[PythonObject], List[PythonObject]): + var onnx = Python.import_module("onnx") + var np = Python.import_module("numpy") + + var attributes = List[PythonObject]() + var inputs = List[PythonObject]() + + for i in range(len(node.attributes)): + var attr = node.attributes[i] + + @parameter + fn to_np_array(attr: Attribute) raises -> PythonObject: + if not attr.type == AttributeType.INTS: + raise Error("Attribute is not a shape") + + var values_np: PythonObject + if attr.type == AttributeType.INTS: + var shape = attr.to_shape() + values_np = PythonObject([]) + for j in range(shape.rank()): + values_np.append(shape[j]) + elif attr.type == AttributeType.FLOAT: + values_np = attr.to_scalar[DType.float64]() + elif attr.type == AttributeType.INT: + values_np = attr.to_int() + else: + raise Error("Unsupported attribute type") + + var np_array = np.array(values_np, dtype=np.int64) + + return onnx.numpy_helper.from_array(np_array) + + # Special cases where attributes are considered as inputs, so we create Constant inputs + if node.operator == OP.RESHAPE: + if str(attr.name) == "shape": + var outputs = PythonObject([]) + outputs.append(str(node.operator) + "_" + str(attr.name) + "_" + str(node_number)) + var temp_node = onnx.helper.make_node( + op_type="Constant", + inputs=[], + outputs=outputs, + value=to_np_array(attr), + ) + + inputs.append(temp_node) + elif node.operator == OP.CLIP: + if str(attr.name) == "min" or str(attr.name) == "max": + var outputs = PythonObject([]) + outputs.append(str(node.operator) + "_" + str(attr.name) + "_" + str(node_number)) + var temp_node = onnx.helper.make_node( + op_type="Constant", + inputs=[], + outputs=outputs, + value=to_np_array(attr), + ) + + inputs.append(temp_node) + elif node.operator == OP.SQUEEZE or node.operator == OP.UNSQUEEZE: + if str(attr.name) == "dims": + var outputs = PythonObject([]) + outputs.append(str(node.operator) + "_" + str(attr.name) + "_" + str(node_number)) + var temp_node = onnx.helper.make_node( + op_type="Constant", + inputs=[], + outputs=outputs, + value=to_np_array(attr), + ) + + inputs.append(temp_node) + else: + var attr_value = make_onnx_attribute(node.operator, attr) + + attributes.append(attr_value) + + return (attributes, inputs) + + fn export_onnx_model(model_path: Path, model_parameters: Parameters, g: Graph) raises: # Create onnx model with data and nodes var onnx = Python.import_module("onnx") @@ -261,6 +312,14 @@ fn export_onnx_model(model_path: Path, model_parameters: Parameters, g: Graph) r var onnx_output = onnx_helper.make_tensor_value_info(name, dtype, shape) graph.value_info.append(onnx_output) + # Process attributes + var attributes_and_inputs = create_attributes_and_constant_inputs(node, i) + var attributes = attributes_and_inputs[0] + var inputs_constant = attributes_and_inputs[1] + for j in range(len(inputs_constant)): + inputs.append(inputs_constant[j].output[0]) + graph.node.append(inputs_constant[j]) + # Create onnx node var onnx_node = onnx_helper.make_node( op_type, @@ -268,33 +327,8 @@ fn export_onnx_model(model_path: Path, model_parameters: Parameters, g: Graph) r outputs, name, ) - - # Process attributes - for j in range(len(node.attributes)): - var attr = node.attributes[j] - var attr_value = make_onnx_attribute(node.operator, attr) - - # Special case for reshape, shape in reshape is not an attribute, instead it is an input because they can be dynamic - if not node.operator == OP.RESHAPE: - onnx_node.attribute.append(attr_value) - - # Special case for reshape, shape in reshape is not an attribute, instead it is an input because they can be dynamic (it can be the result of another operator, don't know why) - if node.operator == OP.RESHAPE: - var shape = node.attributes[0].to_shape() - var list_shape = PythonObject([]) - for j in range(shape.rank()): - list_shape.append(shape[j]) - - graph.initializer.append( - onnx_helper.make_tensor( - name=name + "_shape", - data_type=onnx.TensorProto.INT64, - dims=(shape.rank(),), - vals=list_shape, - ) - ) - - onnx_node.input.append(name + "_shape") + for attribute in attributes: + onnx_node.attribute.append(attribute[]) graph.node.append(onnx_node) diff --git a/basalt/utils/tensor_creation_utils.mojo b/basalt/utils/tensor_creation_utils.mojo new file mode 100644 index 0000000..375279a --- /dev/null +++ b/basalt/utils/tensor_creation_utils.mojo @@ -0,0 +1,73 @@ +from python import Python + +# maybe this functions should be from the Tensor struct (like tensor.to_numpy()) and tensor.__init__(np_array: PythonObject) to create a tensor from a numpy array and tensor.copy_np_data(np_array: PythonObject) to copy the numpy array to the tensor. + + +fn to_numpy(tensor: Tensor) -> PythonObject: + try: + var np = Python.import_module("numpy") + + np.set_printoptions(4) + + var rank = tensor.rank() + var dims = PythonObject([]) + for i in range(rank): + dims.append(tensor.dim(i)) + var pyarray: PythonObject = np.empty(dims, dtype=np.float32) + + var pointer = int(pyarray.__array_interface__["data"][0].to_float64()) + var pointer_d = DTypePointer[tensor.dtype](address=pointer) + memcpy(pointer_d, tensor.data(), tensor.num_elements()) + + _ = tensor + + return pyarray^ + except e: + print("Error in to numpy", e) + return PythonObject() + + +fn to_tensor(np_array: PythonObject) raises -> Tensor[dtype]: + var shape = List[Int]() + for i in range(np_array.ndim): + shape.append(int(np_array.shape[i].to_float64())) + if np_array.ndim == 0: + # When the numpy array is a scalar, you need or the reshape to a size 1 ndarray or do this, if not the memcpy gets a memory error (Maybe because it is a register value?). + var tensor = Tensor[dtype](TensorShape(1)) + tensor[0] = np_array.to_float64().cast[dtype]() + return tensor^ + + var tensor = Tensor[dtype](TensorShape(shape)) + + var np_array_2 = np_array.copy() + try: + var np = Python.import_module("numpy") + np_array_2 = np.float32(np_array_2) + except e: + print("Error in to tensor", e) + + var pointer = int(np_array_2.__array_interface__["data"][0].to_float64()) + var pointer_d = DTypePointer[tensor.dtype](address=pointer) + memcpy(tensor.data(), pointer_d, tensor.num_elements()) + + _ = np_array_2 + _ = np_array + + return tensor^ + + +fn copy_np_data(tensor: Tensor, np_array: PythonObject) raises: + var np_array_2 = np_array.copy() + try: + var np = Python.import_module("numpy") + np_array_2 = np.float32(np_array_2) + except e: + print("Error in to tensor", e) + + var pointer = int(np_array_2.__array_interface__["data"][0].to_float64()) + var pointer_d = DTypePointer[tensor.dtype](address=pointer) + memcpy(tensor.data(), pointer_d, tensor.num_elements()) + + _ = np_array_2 + _ = np_array + _ = tensor diff --git a/basalt/utils/tensorutils.mojo b/basalt/utils/tensorutils.mojo index fea82bc..b3b70de 100644 --- a/basalt/utils/tensorutils.mojo +++ b/basalt/utils/tensorutils.mojo @@ -48,14 +48,8 @@ fn broadcast_shapes(s1: TensorShape, s2: TensorShape) -> TensorShape: var ndim = max(s1.rank(), s2.rank()) var diff = abs(s1.rank() - s2.rank()) - var big: TensorShape - var small: TensorShape - if s1.rank() > s2.rank(): - big = s1 - small = s2 - else: - big = s2 - small = s1 + var big = s1 if s1.rank() > s2.rank() else s2 + var small = s2 if s1.rank() > s2.rank() else s1 var res = StaticIntTuple[MAX_RANK](-1) @@ -67,12 +61,7 @@ fn broadcast_shapes(s1: TensorShape, s2: TensorShape) -> TensorShape: elif a == 1 or b == 1: res[i] = a * b else: - # NOTE: consider assert and allow the function raises - var message: String = "[ERROR] Shapes " + str(s1) + " and " + str( - s2 - ) + " cannot be broadcasted together." - print(message) - # raise Error(message) + print("[ERROR] Shapes " + str(s1) + " and " + str(s2) + " cannot be broadcasted together.") for i in range(diff - 1, -1, -1): res[i] = big[i] @@ -91,9 +80,7 @@ fn broadcast_shapes(*s: TensorShape) -> TensorShape: @always_inline -fn broadcast_calculate_strides[ - size: Int, shape: TensorShape, broadcast_shape: TensorShape -]() -> StaticIntTuple[size]: +fn broadcast_calculate_strides[size: Int, shape: TensorShape, broadcast_shape: TensorShape]() -> StaticIntTuple[size]: alias shape_rank = shape.rank() alias diff = size - shape_rank @@ -107,181 +94,6 @@ fn broadcast_calculate_strides[ return strides - -# ----- Dot functions ----- -@always_inline -fn calculate_block[ - M: Int, N: Int, K: Int, BLOCK_M: Int, BLOCK_N: Int, nelts: Int -]( - res: DTypePointer[dtype], - t1: DTypePointer[dtype], - t2: DTypePointer[dtype], - bm: Int, - bn: Int, -): - # Compute tile - var acc = stack_allocation[BLOCK_M * BLOCK_N, dtype]() - memset_zero[dtype](acc, BLOCK_M * BLOCK_N) - - for k in range(K): - - @unroll - for m in range(BLOCK_M): - - @parameter - fn inner_n[nelts: Int](n: Int): - acc.store[width=nelts]( - m * BLOCK_N + n, - SIMD[dtype, nelts] - .splat(t1[(bm + m) * K + k]) - .fma( - t2.load[width=nelts](k * N + (bn + n)), - acc.load[width=nelts](m * BLOCK_N + n), - ), - ) - - vectorize[inner_n, nelts](BLOCK_N) - - # Store tile - for m in range(BLOCK_M): - - @parameter - fn vec_store[nelts: Int](n: Int): - res.store[width=nelts]( - (bm + m) * N + (bn + n), acc.load[width=nelts](m * BLOCK_N + n) - ) - - vectorize[vec_store, nelts](BLOCK_N) - - -@parameter -@always_inline -fn dot[ - t1_shape: TensorShape, t2_shape: TensorShape -](inout res: Tensor[dtype], t1: Tensor[dtype], t2: Tensor[dtype]): - dot[t1_shape, t2_shape](res.data(), t1.data(), t2.data()) - - -@parameter -@always_inline -fn dot[ - t1_shape: TensorShape, t2_shape: TensorShape -](res: DTypePointer[dtype], t1: DTypePointer[dtype], t2: DTypePointer[dtype]): - alias M = t1_shape[0] # t1[0] - alias K = t1_shape[1] # t1[1], t2[0] - alias N = t2_shape[1] # t2[1] - - # simdwidthof[dtype]() = 8 for float32 - alias nelts = simdwidthof[dtype]() - alias BLOCK_N = 8 * 2 - alias BLOCK_M = 6 - alias THREADS = 6 # num_logical_cores() - - alias BLOCK_N_REMAINDER = N % BLOCK_N - alias BLOCK_M_REMAINDER = M % BLOCK_M - - @parameter - fn bm_par(m_outer: Int): - var bm = m_outer * BLOCK_M - - for n_outer in range(0, N // BLOCK_N): - var bn = n_outer * BLOCK_N - - calculate_block[M, N, K, BLOCK_M, BLOCK_N, nelts](res, t1, t2, bm, bn) - - # Handle the remainder of N - @parameter - if BLOCK_N_REMAINDER > 0: - var bn = N - BLOCK_N_REMAINDER - - calculate_block[M, N, K, BLOCK_M, BLOCK_N_REMAINDER, nelts]( - res, t1, t2, bm, bn - ) - - parallelize[bm_par](M // BLOCK_M, M // BLOCK_M) - - # Handle the remainder of M - @parameter - if BLOCK_M_REMAINDER > 0: - var bm = M - BLOCK_M_REMAINDER - - for n_outer in range(0, N // BLOCK_N): - var bn = n_outer * BLOCK_N - - calculate_block[M, N, K, BLOCK_M_REMAINDER, BLOCK_N, nelts]( - res, t1, t2, bm, bn - ) - - # Handle corner remainder - @parameter - if BLOCK_N_REMAINDER > 0: - var bn = N - BLOCK_N_REMAINDER - - calculate_block[M, N, K, BLOCK_M_REMAINDER, BLOCK_N_REMAINDER, nelts]( - res, t1, t2, bm, bn - ) - - -fn dot_transpose_t2[ - A_shape: TensorShape, B_shape: TensorShape -](inout C: DTypePointer[dtype], A: DTypePointer[dtype], B: DTypePointer[dtype]): - dot[A_shape, TensorShape(B_shape[1], B_shape[0])](C, A, transpose_2D[B_shape](B)) - - -fn dot_transpose_t2[ - A_shape: TensorShape, B_shape: TensorShape -](inout C: Tensor[dtype], A: Tensor[dtype], B: Tensor[dtype]): - memset_zero[dtype](C.data(), C.num_elements()) - - dot[A_shape, TensorShape(B_shape[1], B_shape[0])](C, A, transpose_2D[B_shape](B)) - - # @parameter - # fn calc_row(i: Int): - # for j in range(B_shape[0]): - - # @parameter - # fn calc_row_A_B[nelts: Int](k: Int): - # var A_pos = i * A.dim(1) + k - # var B_pos = j * A.dim(1) + k - # var t_new_pos = i * C.dim(1) + j - - # C[t_new_pos] += ( - # A.load[nelts](A_pos) * B.load[nelts](B_pos) - # ).reduce_add() - - # vectorize[calc_row_A_B, nelts, size=A_shape[1]]() - - # parallelize[calc_row](A_shape[0], 1) - - -fn dot_transpose_t1[ - A_shape: TensorShape, B_shape: TensorShape -](inout C: Tensor[dtype], A: Tensor[dtype], B: Tensor[dtype]): - memset_zero[dtype](C.data(), C.num_elements()) - - dot[TensorShape(A_shape[1], A_shape[0]), B_shape](C, transpose_2D[A_shape](A), B) - - # @parameter - # fn calc_row(i: Int): - # for j in range(A_shape[0]): - - # @parameter - # fn calc_row_t_new_B[nelts: Int](k: Int): - # var A_pos = j * A.dim(1) + i - # var B_pos = j * B.dim(1) + k - # var t_new_pos = i * C.dim(1) + k - - # C.store[nelts]( - # t_new_pos, - # C.load[nelts](t_new_pos) - # + A[A_pos] * B.load[nelts](B_pos), - # ) - - # vectorize[calc_row_t_new_B, nelts, size=B_shape[1]]() - - # parallelize[calc_row](A_shape[1], 1) - - # ----- Element-wise unary operations ----- @always_inline fn elwise_transform[ diff --git a/profile.sh b/profile.sh deleted file mode 100755 index 870754f..0000000 --- a/profile.sh +++ /dev/null @@ -1,118 +0,0 @@ -#!/bin/bash - -function profile() { - if [ ! -d ~/FlameGraph ]; then - InstallFlameGraph - fi - - if [ -f /proc/sys/fs/binfmt_misc/WSLInterop ]; then - profileLinux "$1" - else - case "$OSTYPE" in - darwin*) - profileMac "$1" - ;; - linux-gnu*|msys) - profileLinux "$1" - ;; - esac - fi -} - -function profileLinux() { - local mojo_file=$1 - LinuxInstallDependencies - LinuxPermissions - runProfile "$mojo_file" -} - -function profileMac() { - local mojo_file=$1 - MacInstallDependencies - MacPermissions - runProfile "$mojo_file" -} - -function runProfile() { - local mojo_file=$1 - local mojo_name="${mojo_file%.mojo}" - local temp_dir="./temp" - local perf_output="$temp_dir/out.perf" - local flamegraph_output="flamegraph.svg" - - echo "Profiling $mojo_file..." - - mkdir -p "$temp_dir" - - echo "Building $mojo_file..." - mojo build -I . "$mojo_file" - - echo "Stripping debug symbols..." - mv "$mojo_name" "$temp_dir/run.exe" - llvm-strip --strip-debug "$temp_dir/run.exe" - - echo "Running perf record..." - sudo perf record -F 99 -a -g -o "$perf_output" -- "$temp_dir/run.exe" - - echo "Generating flamegraph..." - sudo perf script -i "$perf_output" | ~/FlameGraph/stackcollapse-perf.pl | ~/FlameGraph/flamegraph.pl > "$flamegraph_output" - - echo "Opening flamegraph: $flamegraph_output" - - if command -v open &> /dev/null; then - open "$flamegraph_output" - elif command -v explorer.exe &> /dev/null; then - explorer.exe "$flamegraph_output" - elif command -v google-chrome &> /dev/null; then - google-chrome "$flamegraph_output" - fi - - echo "Cleaning up temporary files..." - rm -rf "$temp_dir" - - echo "Profiling completed." -} - -function LinuxInstallDependencies() { - if ! command -v perf &> /dev/null; then - echo "Installing perf for Linux/WSL" - sudo apt-get update - sudo apt-get install -y linux-tools-common linux-tools-generic - fi - - if ! command -v llvm-strip &> /dev/null; then - echo "Installing LLVM for Linux/WSL" - sudo apt-get install -y llvm - fi -} - -function MacInstallDependencies() { - if ! command -v perf &> /dev/null; then - echo "Installing perf for Mac" - brew install perf - fi - - if ! command -v llvm-strip &> /dev/null; then - echo "Installing LLVM for Mac" - brew install llvm - fi -} - -function InstallFlameGraph() { - echo "Installing FlameGraph" - git clone https://github.com/brendangregg/FlameGraph.git - mv FlameGraph ~/FlameGraph -} - -function LinuxPermissions() { - echo "Setting Linux/WSL permissions" - echo 0 | sudo tee /proc/sys/kernel/kptr_restrict > /dev/null - echo -1 | sudo tee /proc/sys/kernel/perf_event_paranoid > /dev/null - sudo sysctl -p > /dev/null -} - -function MacPermissions() { - echo "Setting Mac permissions" -} - -profile "$1" diff --git a/tests/mojo/test_backward.mojo b/tests/mojo/test_backward.mojo index 167a232..d8acc45 100644 --- a/tests/mojo/test_backward.mojo +++ b/tests/mojo/test_backward.mojo @@ -157,10 +157,15 @@ fn test_POW() raises: fill(temp, (2**2) * log[dtype, 1](2)) expected_grad2[0] = tsum(temp) - test_binary_op_backward[OP.POW, t1_shape, t2_shape, ug_shape]( - t1, t2, ug, expected_grad1, expected_grad2 - ) + test_binary_op_backward[OP.POW, t1_shape, t2_shape, ug_shape](t1, t2, ug, expected_grad1, expected_grad2) + + fill(t1, 0.0) + fill(t2, 0) + fill(ug, 1.0) + fill(expected_grad1, 0.0) + fill(expected_grad2, 0.0) + test_binary_op_backward[OP.POW, t1_shape, t2_shape, ug_shape](t1, t2, ug, expected_grad1, expected_grad2) fn test_SUM() raises: alias t1_shape = TensorShape(2, 3) diff --git a/tests/mojo/test_tensorutils.mojo b/tests/mojo/test_tensorutils.mojo index 1612f05..6c3dedf 100644 --- a/tests/mojo/test_tensorutils.mojo +++ b/tests/mojo/test_tensorutils.mojo @@ -3,9 +3,9 @@ from testing import assert_equal, assert_almost_equal from math import sqrt, exp, round, add, sub, mul, div from basalt import dtype, nelts +from basalt.autograd.ops.matmul import dot from basalt.utils.tensorutils import ( fill, - dot, elwise_transform, elwise_pow, elwise_op, diff --git a/tests/testing_utils.mojo b/tests/testing_utils.mojo index e28d311..40bb73b 100644 --- a/tests/testing_utils.mojo +++ b/tests/testing_utils.mojo @@ -7,6 +7,7 @@ from basalt.autograd import Graph, OP from basalt.autograd.ops.ops import backward_op from basalt.autograd.attributes import AttributeVector from basalt.nn import Tensor, TensorShape, Model +from basalt.utils.tensor_creation_utils import to_numpy, to_tensor # The below regex should be used to convert deprecated calls @@ -176,59 +177,6 @@ fn test_ternary_op_backward[ assert_tensors_equal["almost"](grad_3, grad_3_expected) -fn to_numpy(tensor: Tensor) -> PythonObject: - try: - var np = Python.import_module("numpy") - - np.set_printoptions(4) - - var rank = tensor.rank() - var dims = PythonObject([]) - for i in range(rank): - dims.append(tensor.dim(i)) - var pyarray: PythonObject = np.empty(dims, dtype=np.float32) - - var pointer = int(pyarray.__array_interface__['data'][0].to_float64()) - var pointer_d = DTypePointer[tensor.dtype](address=pointer) - memcpy(pointer_d, tensor.data(), tensor.num_elements()) - - _ = tensor - - return pyarray ^ - except e: - print("Error in to numpy", e) - return PythonObject() - - -fn to_tensor(np_array: PythonObject) raises -> Tensor[dtype]: - var shape = List[Int]() - for i in range(np_array.ndim): - shape.append(int(np_array.shape[i].to_float64())) - if np_array.ndim == 0: - # When the numpy array is a scalar, you need or the reshape to a size 1 ndarray or do this, if not the memcpy gets a memory error (Maybe because it is a register value?). - var tensor = Tensor[dtype](TensorShape(1)) - tensor[0] = np_array.to_float64().cast[dtype]() - return tensor ^ - - var tensor = Tensor[dtype](TensorShape(shape)) - - var np_array_2 = np_array.copy() - try: - var np = Python.import_module("numpy") - np_array_2 = np.float32(np_array_2) - except e: - print("Error in to tensor", e) - - var pointer = int(np_array_2.__array_interface__['data'][0].to_float64()) - var pointer_d = DTypePointer[tensor.dtype](address=pointer) - memcpy(tensor.data(), pointer_d, tensor.num_elements()) - - _ = np_array_2 - _ = np_array - - return tensor ^ - - fn create_graph_concat( t1_shape: TensorShape, t2_shape: TensorShape, t3_shape: TensorShape, dim: Int ) -> Graph: From aec36c74ea2a120036d0327eddb2d38c2dc053ad Mon Sep 17 00:00:00 2001 From: NKspartan Date: Sun, 2 Jun 2024 16:52:49 -0600 Subject: [PATCH 08/16] Fixed error in create_symbol when creating an input --- basalt/autograd/graph.mojo | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/basalt/autograd/graph.mojo b/basalt/autograd/graph.mojo index 4038bdc..b4fdf53 100644 --- a/basalt/autograd/graph.mojo +++ b/basalt/autograd/graph.mojo @@ -39,13 +39,13 @@ struct Graph: var symbol = Symbol(self.symbol_count, dtype, shape, trainable) self.symbol_count += 1 - if data is not None: - self.params.put(symbol, data.take()) - else: - self.params.put(symbol) - if is_input: self.inputs.append(symbol) + else: + if data is not None: + self.params.put(symbol, data.take()) + else: + self.params.put(symbol) return symbol From ab9174a7390aabd267749414171d0eb6c4a23c7a Mon Sep 17 00:00:00 2001 From: NKspartan Date: Sun, 2 Jun 2024 21:23:21 -0600 Subject: [PATCH 09/16] Added forward torch test for upsample --- tests/python/test_mlops_torch.mojo | 44 +++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/tests/python/test_mlops_torch.mojo b/tests/python/test_mlops_torch.mojo index 2f4747c..4821d52 100644 --- a/tests/python/test_mlops_torch.mojo +++ b/tests/python/test_mlops_torch.mojo @@ -108,6 +108,18 @@ fn torch_unary_op( indices[dim] = py.slice(start, end, step) expected = input_1.flip(flip_dims)[indices] + elif op == OP.UPSAMPLE: + var attrs = attrs.value() + var scales = attrs["scales"].value().to_shape() + var mode = attrs["mode"].value().to_string() + + var scales_py = PythonObject([]) + for i in range(scales.rank()): + scales_py.append(scales[i]) + + expected = torch.nn.functional.interpolate( + input_1, scale_factor=scales_py, mode=mode + ) else: print("Error: op not supported (returning the value input_1): ", op) expected = input_1 @@ -429,16 +441,34 @@ fn test_SLICE() raises: test_unary_op_backward[OP.SLICE, t1_shape, ug_shape_0_2, attrs_0_2](t1, ug, expected_and_grad.grad_1) +fn test_UPSAMPLE() raises: + alias t1_shape = TensorShape(40, 40, 120, 120) + var t1: Tensor[dtype] = Tensor[dtype](t1_shape) + rand(t1.data(), t1.num_elements()) + + alias attributes = AttributeVector( + Attribute("scales", TensorShape(2, 2)), + Attribute("mode", "nearest") + ) + + alias ug_shape = TensorShape(40, 40, 240, 240) + var ug = Tensor[dtype](ug_shape) + + var expected_and_grad = torch_unary_op(OP.UPSAMPLE, t1, ug, attributes) + test_unary_op[OP.UPSAMPLE, t1_shape, attributes](t1, expected_and_grad.expected) + + fn main(): print("Running mlops (compare with torch) tests") try: - test_SIGMOID() - test_RELU() - test_TANH() - test_CLIP() - test_SQUEEZE() - test_UNSQUEEZE() - test_SLICE() + # test_SIGMOID() + # test_RELU() + # test_TANH() + # test_CLIP() + # test_SQUEEZE() + # test_UNSQUEEZE() + # test_SLICE() + test_UPSAMPLE() except e: print("[ERROR] Error in mlops (compare with torch)") print(e) From fa84ae523809b077b3ca1a99da8546b1081ccd48 Mon Sep 17 00:00:00 2001 From: NKspartan Date: Sun, 2 Jun 2024 21:26:11 -0600 Subject: [PATCH 10/16] Added yolov8 model --- examples/yolo_v8_utils.py | 85 +++++++++++ examples/yolov8.mojo | 305 ++++++++++++++++++++++++++++++++++++++ examples/yolov8n_onnx.py | 159 ++++++++++++++++++++ 3 files changed, 549 insertions(+) create mode 100644 examples/yolo_v8_utils.py create mode 100644 examples/yolov8.mojo create mode 100644 examples/yolov8n_onnx.py diff --git a/examples/yolo_v8_utils.py b/examples/yolo_v8_utils.py new file mode 100644 index 0000000..1081068 --- /dev/null +++ b/examples/yolo_v8_utils.py @@ -0,0 +1,85 @@ +import cv2 +import numpy as np + +CLASSES = { + 0: "person", 1: "bicycle", 2: "car", 3: "motorcycle", 4: "airplane", 5: "bus", 6: "train", 7: "truck", 8: "boat", 9: "traffic light", 10: "fire hydrant", 11: "stop sign", 12: "parking meter", 13: "bench", 14: "bird", 15: "cat", 16: "dog", 17: "horse", 18: "sheep", 19: "cow", 20: "elephant", 21: "bear", 22: "zebra", 23: "giraffe", 24: "backpack", 25: "umbrella", 26: "handbag", 27: "tie", 28: "suitcase", 29: "frisbee", 30: "skis", 31: "snowboard", 32: "sports ball", 33: "kite", 34: "baseball bat", 35: "baseball glove", 36: "skateboard", 37: "surfboard", 38: "tennis racket", + 39: "bottle", 40: "wine glass", 41: "cup", 42: "fork", 43: "knife", 44: "spoon", 45: "bowl", 46: "banana", 47: "apple", 48: "sandwich", 49: "orange", 50: "broccoli", 51: "carrot", 52: "hot dog", 53: "pizza", 54: "donut", 55: "cake", 56: "chair", 57: "couch", 58: "potted plant", 59: "bed", 60: "dining table", 61: "toilet", 62: "tv", 63: "laptop", 64: "mouse", 65: "remote", 66: "keyboard", 67: "cell phone", 68: "microwave", 69: "oven", 70: "toaster", 71: "sink", 72: "refrigerator", 73: "book", 74: "clock", 75: "vase", 76: "scissors", 77: "teddy bear", 78: "hair drier", 79: "toothbrush", +} + +colors = np.random.uniform(0, 255, size=(len(CLASSES), 3)) + +# This is needed because calling this code directly in mojo gives error of numpy array circular base dependency + + +def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h): + label = f'{CLASSES[class_id]} ({confidence:.2f})' + color = colors[class_id] + cv2.rectangle(img, (x, y), (x_plus_w, y_plus_h), color, 2) + cv2.putText(img, label, (x - 10, y - 10), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) + + +def get_image(image_path): + # load the image + original_image: np.ndarray = cv2.imread(image_path) + [height, width, _] = original_image.shape + length = max((height, width)) + image = np.zeros((length, length, 3), np.uint8) + image[0:height, 0:width] = original_image + scale = length / 640 + + blob = cv2.dnn.blobFromImage( + image, scalefactor=1 / 255, size=(640, 640), swapRB=True) + + return blob + + +def draw_bounding_box_yolo(image_path, outputs): + original_image: np.ndarray = cv2.imread(image_path) + [height, width, _] = original_image.shape + length = max((height, width)) + image = np.zeros((length, length, 3), np.uint8) + image[0:height, 0:width] = original_image + scale = length / 640 + + outputs = np.array([cv2.transpose(outputs[0])]) + rows = outputs.shape[1] + + boxes = [] + scores = [] + class_ids = [] + + for i in range(rows): + classes_scores = outputs[0][i][4:] + (minScore, maxScore, minClassLoc, (x, maxClassIndex) + ) = cv2.minMaxLoc(classes_scores) + if maxScore >= 0.25: + box = [ + outputs[0][i][0] - (0.5 * outputs[0][i][2] + ), outputs[0][i][1] - (0.5 * outputs[0][i][3]), + outputs[0][i][2], outputs[0][i][3]] + boxes.append(box) + scores.append(maxScore) + class_ids.append(maxClassIndex) + + result_boxes = cv2.dnn.NMSBoxes(boxes, scores, 0.25, 0.45, 0.5) + + detections = [] + for i in range(len(result_boxes)): + index = result_boxes[i] + box = boxes[index] + detection = { + 'class_id': class_ids[index], + 'class_name': CLASSES[class_ids[index]], + 'confidence': scores[index], + 'box': box, + 'scale': scale} + detections.append(detection) + draw_bounding_box(original_image, class_ids[index], scores[index], round(box[0] * scale), round(box[1] * scale), + round((box[0] + box[2]) * scale), round((box[1] + box[3]) * scale)) + + cv2.imshow('image', original_image) + cv2.waitKey(0) + cv2.destroyAllWindows() + + print(detections) diff --git a/examples/yolov8.mojo b/examples/yolov8.mojo new file mode 100644 index 0000000..8f76344 --- /dev/null +++ b/examples/yolov8.mojo @@ -0,0 +1,305 @@ +import basalt.nn as nn +from basalt import Tensor, TensorShape +from basalt import Graph, Symbol, OP, dtype +from basalt.autograd.attributes import AttributeVector, Attribute +from basalt.utils.tensor_creation_utils import to_tensor, to_numpy + +from python import Python +from math import ceil, max + + +fn Conv( + inout g: Graph, + x: Symbol, + out_channels: Int, + kernel_size: Int, + padding: Int, + stride: Int, +) -> Symbol: + # NOTE: This is functionally equivalent to the Conv2D -> BatchNorm2D (removed in graph) -> SiLU (According to ONNX) + var conv = nn.Conv2d(g, x, out_channels, kernel_size, padding, stride) + var sigmoid = g.op(OP.SIGMOID, conv) + return g.op(OP.MUL, conv, sigmoid) + + +fn Conv( + inout g: Graph, + x: Symbol, + weight: Symbol, + bias: Symbol, + kernel_size: StaticIntTuple[2], + padding: StaticIntTuple[2], + stride: StaticIntTuple[2], +) -> Symbol: + # NOTE: This is functionally equivalent to the Conv2D -> BatchNorm2D (removed in graph) -> SiLU (According to ONNX) + var conv = g.op(OP.CONV2D, x, weight, bias, attributes=AttributeVector( + Attribute("padding", padding), + Attribute("stride", stride), + Attribute("dilation", StaticIntTuple[2](1, 1)), + )) + var sigmoid = g.op(OP.SIGMOID, conv) + return g.op(OP.MUL, conv, sigmoid) + + +fn C2f( + inout g: Graph, + x: Symbol, + out_channels: Int, + n: Int, + shortcut: Bool +) -> Symbol: + var conv = Conv(g, x, out_channels, 1, 0, 1) + + var split_size = out_channels // 2 + var split_sections = List[Int](split_size, split_size) + var split = g.split(conv, split_sections, dim=1) + + # declare the weights for the last conv here because that is the order in onnx file + var n_temp = 1 + if n > 1: + n_temp = 2 + var weight = g.param(TensorShape(out_channels, split_size * (n + 2), 1, 1)) + var bias = g.param(TensorShape(out_channels)) + + @parameter + fn bottleneck( + x: Symbol, out_channels: Int, shortcut: Bool = False + ) -> Symbol: + var conv1 = Conv(g, x, out_channels, 3, 1, 1) + var conv2 = Conv(g, conv1, out_channels, 3, 1, 1) + + if shortcut: + return g.op(OP.ADD, x, conv2) + else: + return conv2 + + var y1 = bottleneck(split[1], split_size, shortcut) + var y2 = y1 + + var concat_list = List[Symbol]() # add ability to concat to receive a list, becauase the the concatenation has to be done for each bottleneck layer that was run + + # NOTE: This assumes n >= 1 (Could add a constrained for it later) + for i in range(1, n): + y2 = bottleneck(y2, split_size, shortcut) + # concat_list.append(y2) + + # add ability to concat to receive a list, becauase the the concatenation has to be done for each bottleneck layer that was run + var y: Symbol + if n > 1: + y = g.concat(split[0], split[1], y1, y2, dim=1) + else: + y = g.concat(split[0], split[1], y1, dim=1) + + return Conv(g, y, weight, bias, 1, 0, 1) + + +fn SPPF(inout g: Graph, x: Symbol, out_channels: Int) -> Symbol: + var conv = Conv(g, x, out_channels // 2, 1, 0, 1) + + var maxpool2d_1 = nn.MaxPool2d(g, conv, kernel_size=5, stride=StaticIntTuple[2](1), padding=2) + var maxpool2d_2 = nn.MaxPool2d(g, maxpool2d_1, kernel_size=5, stride=StaticIntTuple[2](1), padding=2) + var maxpool2d_3 = nn.MaxPool2d(g, maxpool2d_2, kernel_size=5, stride=StaticIntTuple[2](1), padding=2) + + var y = g.concat(conv, maxpool2d_1, maxpool2d_2, maxpool2d_3, dim=1) + + return Conv(g, y, out_channels, 1, 0, 1) + + +fn Detect(inout g: Graph, x: Symbol, out_channels: Int, nc: Int, detect_conv: Int) -> Symbol: + # self.nc = nc # number of classes + # self.nl = len(ch) # number of detection layers + # self.reg_max = 16 # DFL channels (ch[0] // 16 to scale 4/8/12/16/20 for n/s/m/l/x) + # self.no = nc + self.reg_max * 4 # number of outputs per anchor + + var reg_max = 16 + + var c2 = max(max(16, out_channels // 4), reg_max * 4) + var c3 = max(0, nc) # channels + + if detect_conv == 1: + var conv1 = Conv(g, x, c2, 3, 1, 1) + var conv1_2 = Conv(g, conv1, c2, 3, 1, 1) + var conv1_3 = nn.Conv2d(g, conv1_2, 4 * reg_max, 1, 0, 1) + + return conv1_3 + else: + var conv2 = Conv(g, x, c3, 3, 1, 1) + var conv2_2 = Conv(g, conv2, c3, 3, 1, 1) + var conv2_3 = nn.Conv2d(g, conv2_2, nc, 1, 0, 1) + + return conv2_3 + + +fn YoloV8(batch_size: Int, yolo_model_type: StaticTuple[Float64, 3]) -> Graph: + var g = Graph() + var x = g.input(TensorShape(batch_size, 3, 640, 640)) + + # Adapted from https://private-user-images.githubusercontent.com/27466624/239739723-57391d0f-1848-4388-9f30-88c2fb79233f.jpg?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3MTUxMTk0MDYsIm5iZiI6MTcxNTExOTEwNiwicGF0aCI6Ii8yNzQ2NjYyNC8yMzk3Mzk3MjMtNTczOTFkMGYtMTg0OC00Mzg4LTlmMzAtODhjMmZiNzkyMzNmLmpwZz9Y>LUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNDA1MDclMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjQwNTA3VDIxNTgyNlomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPTNlZTdkY2ZiMDA0Y2VlOGZkYjllN2FkYTQ1MTY5OWY1YzYwNjIxZDM4OTZiYWRiMGU5YWQxNzkyMTcwNGNmNTQmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0JmFjdG9yX2lkPTAma2V5X2lkPTAmcmVwb19pZD0wIn0.0ocPCiokkivvk95bQCds6Nt0EblUrHZElycV311ImF4. Some values (output_channels, stride, etc..) are different in the onnx file and the graph image. + + # Backbone + var out_channels_1 = int(64 * yolo_model_type[1]) + var conv_1 = Conv(g, x, out_channels_1, 3, 1, 2) + var out_channels_2 = int(128 * yolo_model_type[1]) + var conv_2 = Conv(g, conv_1, out_channels_2, 3, 1, 2) + var C2F_n_1 = int((3 * yolo_model_type[0]) + 1) # ceil + var C2f_1 = C2f(g, conv_2, out_channels_2, n=C2F_n_1, shortcut=True) + var out_channels_3 = int(256 * yolo_model_type[1]) + var conv_3 = Conv(g, C2f_1, out_channels_3, 3, 1, 2) + var C2F_n_2 = int((6 * yolo_model_type[0]) + 1) # ceil + var C2f_2 = C2f(g, conv_3, out_channels_3, n=C2F_n_2, shortcut=True) + + var out_channels_4 = int(512 * yolo_model_type[1]) + var conv_4 = Conv(g, C2f_2, out_channels_4, 3, 1, 2) + var C2f_3 = C2f(g, conv_4, out_channels_4, n=C2F_n_2, shortcut=True) + + var out_channels_5 = int(512 * yolo_model_type[1] * yolo_model_type[2]) + var conv_5 = Conv(g, C2f_3, out_channels_5, 3, 1, 2) + var C2f_4 = C2f(g, conv_5, out_channels_5, n=C2F_n_1, shortcut=True) + var SPPF_1 = SPPF(g, C2f_4, out_channels_5) + + # Head + var upsample_1 = g.op(OP.UPSAMPLE, SPPF_1, attributes=AttributeVector(Attribute("mode", "nearest"), Attribute("scales", TensorShape(2, 2)))) + + # The order of concats was wrong + var concat_1 = g.concat(upsample_1, C2f_3, dim=1) + + var out_channels_6 = int(512 * yolo_model_type[1]) + var C2f_5 = C2f(g, concat_1, out_channels_6, n=C2F_n_1, shortcut=False) + + var upsample_2 = g.op(OP.UPSAMPLE, C2f_5, attributes=AttributeVector(Attribute("mode", "nearest"), Attribute("scales", TensorShape(2, 2)))) + + var concat_2 = g.concat(upsample_2, C2f_2, dim=1) + + var out_channels_7 = int(256 * yolo_model_type[1]) + var C2f_6 = C2f(g, concat_2, out_channels_7, n=C2F_n_1, shortcut=False) + + var conv_6 = Conv(g, C2f_6, out_channels_7, 3, 1, 2) + var concat_3 = g.concat(conv_6, C2f_5, dim=1) + var C2f_7 = C2f(g, concat_3, out_channels_6, n=C2F_n_1, shortcut=False) + + var conv_7 = Conv(g, C2f_7, out_channels_6, 3, 1, 2) + var concat_4 = g.concat(conv_7, SPPF_1, dim=1) + var out_channels_8 = int(512 * yolo_model_type[1] * yolo_model_type[2]) + var C2f_8 = C2f(g, concat_4, out_channels_8, n=C2F_n_1, shortcut=False) + + # Detect + # declare them this way because the order of initializers in the onnx file is like this + var detect_1 = Detect(g, C2f_6, out_channels_7, 80, 1) + var detect_2 = Detect(g, C2f_7, out_channels_6, 80, 1) + var detect_3 = Detect(g, C2f_8, out_channels_8, 80, 1) + + var detect_1_1 = Detect(g, C2f_6, out_channels_7, 80, 2) + var detect_2_1 = Detect(g, C2f_7, out_channels_6, 80, 2) + var detect_3_1 = Detect(g, C2f_8, out_channels_8, 80, 2) + + var concat_detect_1 = g.concat(detect_1, detect_1_1, dim=1) + var concat_detect_2 = g.concat(detect_2, detect_2_1, dim=1) + var concat_detect_3 = g.concat(detect_3, detect_3_1, dim=1) + + # -------- output + var reshape_1 = g.op(OP.RESHAPE, concat_detect_1, attributes=AttributeVector(Attribute("shape", TensorShape(1, 144, concat_detect_1.shape[2] * concat_detect_1.shape[3])))) + + var reshape_2 = g.op(OP.RESHAPE, concat_detect_2, attributes=AttributeVector(Attribute("shape", TensorShape(1, 144, concat_detect_2.shape[2] * concat_detect_2.shape[3])))) + + var reshape_3 = g.op(OP.RESHAPE, concat_detect_3, attributes=AttributeVector(Attribute("shape", TensorShape(1, 144, concat_detect_3.shape[2] * concat_detect_3.shape[3])))) + + # -- + + var concat_5 = g.concat(reshape_1, reshape_2, reshape_3, dim=2) + + var split_sections = List[Int](64, 80) + var split_1 = g.split(concat_5, split_sections, dim=1) + + var for_second_concat = g.op(OP.SIGMOID, split_1[1]) + + var reshape_4 = g.op(OP.RESHAPE, split_1[0], attributes=AttributeVector(Attribute("shape", TensorShape(1, 4, 16, 8400)))) + + var transpose_1 = g.op(OP.TRANSPOSE, reshape_4, attributes=AttributeVector(Attribute("axes", List[Int](0, 2, 1, 3)))) + + var softmax = nn.Softmax(g, transpose_1, axis=1) + + var conv_norm_1 = nn.Conv2d(g, softmax, 1, 1, 0, 1, 1) + + var reshape_5 = g.op(OP.RESHAPE, conv_norm_1, attributes=AttributeVector(Attribute("shape", TensorShape(1, 4, 8400)))) + + var slice_1 = g.op(OP.SLICE, reshape_5, attributes=AttributeVector( + Attribute("axes", List[Int](1)), + Attribute("starts", List[Int](0)), + Attribute("ends", List[Int](2)))) + var slice_2 = g.op(OP.SLICE, reshape_5, attributes=AttributeVector( + Attribute("axes", List[Int](1)), + Attribute("starts", List[Int](2)), + Attribute("ends", List[Int](4)))) + + var sub_constant_value = g.input(TensorShape(1, 2, 8400)) + var sub_with_constant_1 = g.op(OP.SUB, sub_constant_value, slice_1) + var add_constant_value = g.input(TensorShape(1, 2, 8400)) + var add_with_constant_2 = g.op(OP.ADD, add_constant_value, slice_2) + + var add_1 = g.op(OP.ADD, sub_with_constant_1, add_with_constant_2) + var sub_1 = g.op(OP.SUB, add_with_constant_2, sub_with_constant_1) + + var div_1 = g.op(OP.DIV, add_1, 2) + + var concat_6 = g.concat(div_1, sub_1, dim=1) + + var mul_constant_value = g.input(TensorShape(1, 8400)) + var mul_with_constant_1 = g.op(OP.MUL, concat_6, mul_constant_value) + + var concat_7 = g.concat(mul_with_constant_1, for_second_concat, dim=1) + + g.out(concat_7) + + return g ^ + + +alias yolov8_n = StaticTuple[Float64, 3]( + 0.33, 0.25, 2 +) # d (depth_multiplier), w (width_multiplier), r (ratio) +# var yolov8_s +# var yolov8_m + + +fn get_constant_values_from_onnx_model(model_path: String) raises -> List[Tensor[dtype]]: + var onnx = Python.import_module("onnx") + + var model = onnx.load(model_path) + + var result = List[Tensor[dtype]]() + + for node in model.graph.node: + if node.op_type == "Constant": + for attr in node.attribute: + if attr.name == 'value': + var tensor = onnx.numpy_helper.to_array(attr.t) + if node.name == "/model.22/Constant_9": + result.append(to_tensor(tensor)) + if node.name == "/model.22/Constant_10": + result.append(to_tensor(tensor)) + if node.name == "/model.22/Constant_12": + result.append(to_tensor(tensor)) + + return result + + +fn main() raises: + alias graph = YoloV8(1, yolov8_n) + var model = nn.Model[graph]() + + # try: graph.render("node") + # except: print("Could not render graph") + + + model.load_model_data("./examples/data/yolov8n.onnx") + + var constant_values = get_constant_values_from_onnx_model("./examples/data/yolov8n.onnx") + + Python.add_to_path("./examples") + var get_image = Python.import_module("yolo_v8_utils") + + var image_tensor = to_tensor(get_image.get_image('./examples/data/bus.jpg')) + + + var res = model.inference(image_tensor, constant_values[0], constant_values[1], constant_values[2]) + + get_image.draw_bounding_box_yolo("bus.jpg", to_numpy(res[0])) \ No newline at end of file diff --git a/examples/yolov8n_onnx.py b/examples/yolov8n_onnx.py new file mode 100644 index 0000000..b880cad --- /dev/null +++ b/examples/yolov8n_onnx.py @@ -0,0 +1,159 @@ +import cv2.dnn +import numpy as np +import onnxruntime as rt + + +CLASSES = {0: "person", + 1: "bicycle", + 2: "car", + 3: "motorcycle", + 4: "airplane", + 5: "bus", + 6: "train", + 7: "truck", + 8: "boat", + 9: "traffic light", + 10: "fire hydrant", + 11: "stop sign", + 12: "parking meter", + 13: "bench", + 14: "bird", + 15: "cat", + 16: "dog", + 17: "horse", + 18: "sheep", + 19: "cow", + 20: "elephant", + 21: "bear", + 22: "zebra", + 23: "giraffe", + 24: "backpack", + 25: "umbrella", + 26: "handbag", + 27: "tie", + 28: "suitcase", + 29: "frisbee", + 30: "skis", + 31: "snowboard", + 32: "sports ball", + 33: "kite", + 34: "baseball bat", + 35: "baseball glove", + 36: "skateboard", + 37: "surfboard", + 38: "tennis racket", + 39: "bottle", + 40: "wine glass", + 41: "cup", + 42: "fork", + 43: "knife", + 44: "spoon", + 45: "bowl", + 46: "banana", + 47: "apple", + 48: "sandwich", + 49: "orange", + 50: "broccoli", + 51: "carrot", + 52: "hot dog", + 53: "pizza", + 54: "donut", + 55: "cake", + 56: "chair", + 57: "couch", + 58: "potted plant", + 59: "bed", + 60: "dining table", + 61: "toilet", + 62: "tv", + 63: "laptop", + 64: "mouse", + 65: "remote", + 66: "keyboard", + 67: "cell phone", + 68: "microwave", + 69: "oven", + 70: "toaster", + 71: "sink", + 72: "refrigerator", + 73: "book", + 74: "clock", + 75: "vase", + 76: "scissors", + 77: "teddy bear", + 78: "hair drier", + 79: "toothbrush", +} + +colors = np.random.uniform(0, 255, size=(len(CLASSES), 3)) + + +def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h): + label = f'{CLASSES[class_id]} ({confidence:.2f})' + color = colors[class_id] + cv2.rectangle(img, (x, y), (x_plus_w, y_plus_h), color, 2) + cv2.putText(img, label, (x - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) + + +def main(onnx_model, input_image): + model: cv2.dnn.Net = cv2.dnn.readNetFromONNX(onnx_model) + original_image: np.ndarray = cv2.imread(input_image) + [height, width, _] = original_image.shape + length = max((height, width)) + image = np.zeros((length, length, 3), np.uint8) + image[0:height, 0:width] = original_image + scale = length / 640 + + blob = cv2.dnn.blobFromImage(image, scalefactor=1 / 255, size=(640, 640), swapRB=True) + # model.setInput(blob) + # outputs = model.forward() + # print(outputs.shape) + + model2 = rt.InferenceSession(onnx_model) + + outputs = model2.run(None, {"images": blob})[0] + + + outputs = np.array([cv2.transpose(outputs[0])]) + rows = outputs.shape[1] + + boxes = [] + scores = [] + class_ids = [] + + for i in range(rows): + classes_scores = outputs[0][i][4:] + (minScore, maxScore, minClassLoc, (x, maxClassIndex)) = cv2.minMaxLoc(classes_scores) + if maxScore >= 0.25: + box = [ + outputs[0][i][0] - (0.5 * outputs[0][i][2]), outputs[0][i][1] - (0.5 * outputs[0][i][3]), + outputs[0][i][2], outputs[0][i][3]] + boxes.append(box) + scores.append(maxScore) + class_ids.append(maxClassIndex) + + result_boxes = cv2.dnn.NMSBoxes(boxes, scores, 0.25, 0.45, 0.5) + + detections = [] + for i in range(len(result_boxes)): + index = result_boxes[i] + box = boxes[index] + detection = { + 'class_id': class_ids[index], + 'class_name': CLASSES[class_ids[index]], + 'confidence': scores[index], + 'box': box, + 'scale': scale} + detections.append(detection) + draw_bounding_box(original_image, class_ids[index], scores[index], round(box[0] * scale), round(box[1] * scale), + round((box[0] + box[2]) * scale), round((box[1] + box[3]) * scale)) + + cv2.imshow('image', original_image) + cv2.waitKey(0) + cv2.destroyAllWindows() + + + # print(detections) + + +main('examples/data/yolov8n.onnx', "examples/data/bus.jpg") \ No newline at end of file From c5a2d8d80c3c893fe21e22602f98bb5ecbc1341a Mon Sep 17 00:00:00 2001 From: StijnWoestenborghs Date: Mon, 3 Jun 2024 13:09:58 +0200 Subject: [PATCH 11/16] add yolov8 cam example --- examples/yolo_v8_utils.py | 141 +++++++++++++++++++++++++++------- examples/yolov8.mojo | 8 +- examples/yolov8_cam.mojo | 85 +++++++++++++++++++++ examples/yolov8n_onnx.py | 157 +++----------------------------------- 4 files changed, 213 insertions(+), 178 deletions(-) create mode 100644 examples/yolov8_cam.mojo diff --git a/examples/yolo_v8_utils.py b/examples/yolo_v8_utils.py index 1081068..e7b22e4 100644 --- a/examples/yolo_v8_utils.py +++ b/examples/yolo_v8_utils.py @@ -1,17 +1,94 @@ import cv2 import numpy as np + CLASSES = { - 0: "person", 1: "bicycle", 2: "car", 3: "motorcycle", 4: "airplane", 5: "bus", 6: "train", 7: "truck", 8: "boat", 9: "traffic light", 10: "fire hydrant", 11: "stop sign", 12: "parking meter", 13: "bench", 14: "bird", 15: "cat", 16: "dog", 17: "horse", 18: "sheep", 19: "cow", 20: "elephant", 21: "bear", 22: "zebra", 23: "giraffe", 24: "backpack", 25: "umbrella", 26: "handbag", 27: "tie", 28: "suitcase", 29: "frisbee", 30: "skis", 31: "snowboard", 32: "sports ball", 33: "kite", 34: "baseball bat", 35: "baseball glove", 36: "skateboard", 37: "surfboard", 38: "tennis racket", - 39: "bottle", 40: "wine glass", 41: "cup", 42: "fork", 43: "knife", 44: "spoon", 45: "bowl", 46: "banana", 47: "apple", 48: "sandwich", 49: "orange", 50: "broccoli", 51: "carrot", 52: "hot dog", 53: "pizza", 54: "donut", 55: "cake", 56: "chair", 57: "couch", 58: "potted plant", 59: "bed", 60: "dining table", 61: "toilet", 62: "tv", 63: "laptop", 64: "mouse", 65: "remote", 66: "keyboard", 67: "cell phone", 68: "microwave", 69: "oven", 70: "toaster", 71: "sink", 72: "refrigerator", 73: "book", 74: "clock", 75: "vase", 76: "scissors", 77: "teddy bear", 78: "hair drier", 79: "toothbrush", + 0: "person", + 1: "bicycle", + 2: "car", + 3: "motorcycle", + 4: "airplane", + 5: "bus", + 6: "train", + 7: "truck", + 8: "boat", + 9: "traffic light", + 10: "fire hydrant", + 11: "stop sign", + 12: "parking meter", + 13: "bench", + 14: "bird", + 15: "cat", + 16: "dog", + 17: "horse", + 18: "sheep", + 19: "cow", + 20: "elephant", + 21: "bear", + 22: "zebra", + 23: "giraffe", + 24: "backpack", + 25: "umbrella", + 26: "handbag", + 27: "tie", + 28: "suitcase", + 29: "frisbee", + 30: "skis", + 31: "snowboard", + 32: "sports ball", + 33: "kite", + 34: "baseball bat", + 35: "baseball glove", + 36: "skateboard", + 37: "surfboard", + 38: "tennis racket", + 39: "bottle", + 40: "wine glass", + 41: "cup", + 42: "fork", + 43: "knife", + 44: "spoon", + 45: "bowl", + 46: "banana", + 47: "apple", + 48: "sandwich", + 49: "orange", + 50: "broccoli", + 51: "carrot", + 52: "hot dog", + 53: "pizza", + 54: "donut", + 55: "cake", + 56: "chair", + 57: "couch", + 58: "potted plant", + 59: "bed", + 60: "dining table", + 61: "toilet", + 62: "tv", + 63: "laptop", + 64: "mouse", + 65: "remote", + 66: "keyboard", + 67: "cell phone", + 68: "microwave", + 69: "oven", + 70: "toaster", + 71: "sink", + 72: "refrigerator", + 73: "book", + 74: "clock", + 75: "vase", + 76: "scissors", + 77: "teddy bear", + 78: "hair drier", + 79: "toothbrush", } colors = np.random.uniform(0, 255, size=(len(CLASSES), 3)) -# This is needed because calling this code directly in mojo gives error of numpy array circular base dependency - -def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h): +def bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h): label = f'{CLASSES[class_id]} ({confidence:.2f})' color = colors[class_id] cv2.rectangle(img, (x, y), (x_plus_w, y_plus_h), color, 2) @@ -19,27 +96,22 @@ def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h): cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) -def get_image(image_path): +def get_model_input(image_path): # load the image original_image: np.ndarray = cv2.imread(image_path) - [height, width, _] = original_image.shape + height, width, _ = original_image.shape length = max((height, width)) - image = np.zeros((length, length, 3), np.uint8) - image[0:height, 0:width] = original_image - scale = length / 640 - - blob = cv2.dnn.blobFromImage( - image, scalefactor=1 / 255, size=(640, 640), swapRB=True) + image = np.pad(original_image, ((0, length - height), (0, length - width), (0, 0)), mode='constant', constant_values=0) + blob = cv2.dnn.blobFromImage(image, scalefactor=1 / 255, size=(640, 640), swapRB=True) return blob -def draw_bounding_box_yolo(image_path, outputs): - original_image: np.ndarray = cv2.imread(image_path) - [height, width, _] = original_image.shape +def draw_bounding_box_yolo(original_image, outputs): + height, width, _ = original_image.shape length = max((height, width)) image = np.zeros((length, length, 3), np.uint8) - image[0:height, 0:width] = original_image + image[:height, :width] = original_image scale = length / 640 outputs = np.array([cv2.transpose(outputs[0])]) @@ -51,13 +123,14 @@ def draw_bounding_box_yolo(image_path, outputs): for i in range(rows): classes_scores = outputs[0][i][4:] - (minScore, maxScore, minClassLoc, (x, maxClassIndex) - ) = cv2.minMaxLoc(classes_scores) + (_, maxScore, _, (_, maxClassIndex)) = cv2.minMaxLoc(classes_scores) if maxScore >= 0.25: box = [ - outputs[0][i][0] - (0.5 * outputs[0][i][2] - ), outputs[0][i][1] - (0.5 * outputs[0][i][3]), - outputs[0][i][2], outputs[0][i][3]] + outputs[0][i][0] - (0.5 * outputs[0][i][2]), + outputs[0][i][1] - (0.5 * outputs[0][i][3]), + outputs[0][i][2], + outputs[0][i][3] + ] boxes.append(box) scores.append(maxScore) class_ids.append(maxClassIndex) @@ -75,10 +148,26 @@ def draw_bounding_box_yolo(image_path, outputs): 'box': box, 'scale': scale} detections.append(detection) - draw_bounding_box(original_image, class_ids[index], scores[index], round(box[0] * scale), round(box[1] * scale), - round((box[0] + box[2]) * scale), round((box[1] + box[3]) * scale)) - - cv2.imshow('image', original_image) + + bounding_box( + original_image, + class_ids[index], + scores[index], + round(box[0] * scale), + round(box[1] * scale), + round((box[0] + box[2]) * scale), + round((box[1] + box[3]) * scale) + ) + + return detections + + +def draw_bbox_from_image(image_path, outputs): + image: np.ndarray = cv2.imread(image_path) + + detections = draw_bounding_box_yolo(image, outputs) + + cv2.imshow('image', image) cv2.waitKey(0) cv2.destroyAllWindows() diff --git a/examples/yolov8.mojo b/examples/yolov8.mojo index 8f76344..eb3c16c 100644 --- a/examples/yolov8.mojo +++ b/examples/yolov8.mojo @@ -295,11 +295,9 @@ fn main() raises: var constant_values = get_constant_values_from_onnx_model("./examples/data/yolov8n.onnx") Python.add_to_path("./examples") - var get_image = Python.import_module("yolo_v8_utils") - - var image_tensor = to_tensor(get_image.get_image('./examples/data/bus.jpg')) - + var yolo_utils = Python.import_module("yolo_v8_utils") + var image_tensor = to_tensor(yolo_utils.get_model_input('./examples/data/bus.jpg')) var res = model.inference(image_tensor, constant_values[0], constant_values[1], constant_values[2]) - get_image.draw_bounding_box_yolo("bus.jpg", to_numpy(res[0])) \ No newline at end of file + yolo_utils.draw_bbox_from_image("./examples/data/bus.jpg", to_numpy(res[0])) \ No newline at end of file diff --git a/examples/yolov8_cam.mojo b/examples/yolov8_cam.mojo new file mode 100644 index 0000000..1c8b2c3 --- /dev/null +++ b/examples/yolov8_cam.mojo @@ -0,0 +1,85 @@ +import sys +from time.time import now +from python.python import Python +from math import max + +from yolov8 import YoloV8, get_constant_values_from_onnx_model + +import basalt.nn as nn +from basalt import Tensor, TensorShape, dtype +from basalt.utils.tensor_creation_utils import to_tensor, to_numpy + + +fn cam( + inout model: nn.Model, + constants: List[Tensor[dtype]] +) raises: + + Python.add_to_path("./examples") + var yolo_utils = Python.import_module("yolo_v8_utils") + + var cv2 = Python.import_module("cv2") + var np = Python.import_module("numpy") + var cap = cv2.VideoCapture(0) + + if not cap.isOpened(): + print("Error: Could not open webcam") + sys.exit(1) + + var height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT).to_float64() + var width = cap.get(cv2.CAP_PROP_FRAME_WIDTH).to_float64() + var length = max(height, width) + var pads = np.array([0, length - height, 0, length - width, 0, 0], dtype=np.int32).reshape(3, 2) + + var last_time = now() + + while True: + var r = cap.read() + + if not r[0]: + print("Error: Could not read frame") + break + + var image = np.pad(r[1], pads, mode='constant', constant_values=0) + var blob = cv2.dnn.blobFromImage(image, scalefactor=1/255, size=(640, 640), swapRB=True) + + var res = model.inference(to_tensor(blob), constants[0], constants[1], constants[2]) + + yolo_utils.draw_bounding_box_yolo(r[1], to_numpy(res[0])) + cv2.imshow( + 'Basalt', + cv2.putText( + r[1], + "FPS: " + String(1e9 / (now() - last_time)), + (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.30, (0, 0, 0), 1, cv2.LINE_AA + ) + ) + + last_time = now() + if int(cv2.waitKey(1) & 0xFF) == 27 or cv2.getWindowProperty('Basalt', cv2.WND_PROP_VISIBLE) < 1: + cv2.destroyAllWindows() + sys.exit() + + +fn main(): + + alias yolov8_n = StaticTuple[Float64, 3]( + 0.33, 0.25, 2 + ) # d (depth_multiplier), w (width_multiplier), r (ratio) + + alias graph = YoloV8(1, yolov8_n) + var model = nn.Model[graph]() + + model.load_model_data("./examples/data/yolov8n.onnx") + + try: + var constant_values = get_constant_values_from_onnx_model("./examples/data/yolov8n.onnx") + + cam( + model, + constant_values + ) + + except e: + print("Error in cam() function") + print(e) diff --git a/examples/yolov8n_onnx.py b/examples/yolov8n_onnx.py index b880cad..5b4eb2d 100644 --- a/examples/yolov8n_onnx.py +++ b/examples/yolov8n_onnx.py @@ -2,158 +2,21 @@ import numpy as np import onnxruntime as rt - -CLASSES = {0: "person", - 1: "bicycle", - 2: "car", - 3: "motorcycle", - 4: "airplane", - 5: "bus", - 6: "train", - 7: "truck", - 8: "boat", - 9: "traffic light", - 10: "fire hydrant", - 11: "stop sign", - 12: "parking meter", - 13: "bench", - 14: "bird", - 15: "cat", - 16: "dog", - 17: "horse", - 18: "sheep", - 19: "cow", - 20: "elephant", - 21: "bear", - 22: "zebra", - 23: "giraffe", - 24: "backpack", - 25: "umbrella", - 26: "handbag", - 27: "tie", - 28: "suitcase", - 29: "frisbee", - 30: "skis", - 31: "snowboard", - 32: "sports ball", - 33: "kite", - 34: "baseball bat", - 35: "baseball glove", - 36: "skateboard", - 37: "surfboard", - 38: "tennis racket", - 39: "bottle", - 40: "wine glass", - 41: "cup", - 42: "fork", - 43: "knife", - 44: "spoon", - 45: "bowl", - 46: "banana", - 47: "apple", - 48: "sandwich", - 49: "orange", - 50: "broccoli", - 51: "carrot", - 52: "hot dog", - 53: "pizza", - 54: "donut", - 55: "cake", - 56: "chair", - 57: "couch", - 58: "potted plant", - 59: "bed", - 60: "dining table", - 61: "toilet", - 62: "tv", - 63: "laptop", - 64: "mouse", - 65: "remote", - 66: "keyboard", - 67: "cell phone", - 68: "microwave", - 69: "oven", - 70: "toaster", - 71: "sink", - 72: "refrigerator", - 73: "book", - 74: "clock", - 75: "vase", - 76: "scissors", - 77: "teddy bear", - 78: "hair drier", - 79: "toothbrush", -} - -colors = np.random.uniform(0, 255, size=(len(CLASSES), 3)) - - -def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h): - label = f'{CLASSES[class_id]} ({confidence:.2f})' - color = colors[class_id] - cv2.rectangle(img, (x, y), (x_plus_w, y_plus_h), color, 2) - cv2.putText(img, label, (x - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) +from yolo_v8_utils import get_model_input, draw_bbox_from_image def main(onnx_model, input_image): + blob = get_model_input(input_image) + model: cv2.dnn.Net = cv2.dnn.readNetFromONNX(onnx_model) - original_image: np.ndarray = cv2.imread(input_image) - [height, width, _] = original_image.shape - length = max((height, width)) - image = np.zeros((length, length, 3), np.uint8) - image[0:height, 0:width] = original_image - scale = length / 640 - - blob = cv2.dnn.blobFromImage(image, scalefactor=1 / 255, size=(640, 640), swapRB=True) - # model.setInput(blob) - # outputs = model.forward() - # print(outputs.shape) - - model2 = rt.InferenceSession(onnx_model) - - outputs = model2.run(None, {"images": blob})[0] - - - outputs = np.array([cv2.transpose(outputs[0])]) - rows = outputs.shape[1] - - boxes = [] - scores = [] - class_ids = [] - - for i in range(rows): - classes_scores = outputs[0][i][4:] - (minScore, maxScore, minClassLoc, (x, maxClassIndex)) = cv2.minMaxLoc(classes_scores) - if maxScore >= 0.25: - box = [ - outputs[0][i][0] - (0.5 * outputs[0][i][2]), outputs[0][i][1] - (0.5 * outputs[0][i][3]), - outputs[0][i][2], outputs[0][i][3]] - boxes.append(box) - scores.append(maxScore) - class_ids.append(maxClassIndex) - - result_boxes = cv2.dnn.NMSBoxes(boxes, scores, 0.25, 0.45, 0.5) - - detections = [] - for i in range(len(result_boxes)): - index = result_boxes[i] - box = boxes[index] - detection = { - 'class_id': class_ids[index], - 'class_name': CLASSES[class_ids[index]], - 'confidence': scores[index], - 'box': box, - 'scale': scale} - detections.append(detection) - draw_bounding_box(original_image, class_ids[index], scores[index], round(box[0] * scale), round(box[1] * scale), - round((box[0] + box[2]) * scale), round((box[1] + box[3]) * scale)) - - cv2.imshow('image', original_image) - cv2.waitKey(0) - cv2.destroyAllWindows() + model = rt.InferenceSession(onnx_model) + outputs = model.run(None, {"images": blob})[0] - # print(detections) + draw_bbox_from_image(input_image, outputs) -main('examples/data/yolov8n.onnx', "examples/data/bus.jpg") \ No newline at end of file +main( + onnx_model='examples/data/yolov8n.onnx', + input_image="examples/data/bus.jpg" +) \ No newline at end of file From 5cc351e449a1acce03b5f88e6e00f17282638a8d Mon Sep 17 00:00:00 2001 From: NKspartan Date: Tue, 4 Jun 2024 22:58:34 -0600 Subject: [PATCH 12/16] Made assert_tensors_equal vectorized --- tests/testing_utils.mojo | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tests/testing_utils.mojo b/tests/testing_utils.mojo index 40bb73b..3706f54 100644 --- a/tests/testing_utils.mojo +++ b/tests/testing_utils.mojo @@ -1,6 +1,7 @@ from python.python import Python from collections import OptionalReg from testing import assert_equal, assert_almost_equal +from algorithm import vectorize from basalt import dtype from basalt.autograd import Graph, OP @@ -20,13 +21,24 @@ fn assert_tensors_equal[ mode == "exact" or mode == "almost", "Mode must be either 'exact' or 'almost'" ]() + alias nelts = simdwidthof[dtype]() + assert_equal(t1.shape(), t2.shape(), "Tensor shape mismatch") - for i in range(t1.num_elements()): + @parameter + fn v_iter[nelts: Int](i: Int) raises: + @parameter if mode == "almost": - assert_almost_equal(t1[i], t2[i], rtol=1e-5, atol=1e-5, msg=msg) + assert_almost_equal(t1.load[nelts](i), t2.load[nelts](i), rtol=1e-5, atol=1e-5, msg=msg) else: - assert_equal(t1[i], t2[i], msg=msg) + assert_equal(t1.load[nelts](i), t2.load[nelts](i), msg=msg) + + for i in range(0, t1.num_elements() - nelts + 1, nelts): + v_iter[nelts](i) + + # Check the remaining elements + for i in range(nelts * (t1.num_elements() // nelts), t1.num_elements()): + v_iter[1](i) fn test_unary_op[ From 4af6f50015018536ee8b354900d74358e1a4b816 Mon Sep 17 00:00:00 2001 From: NKspartan Date: Tue, 4 Jun 2024 23:00:33 -0600 Subject: [PATCH 13/16] Added linear upsample and more torch tests --- basalt/autograd/ops/mlops.mojo | 137 +++++++++++++++++++++++++---- tests/python/test_mlops_torch.mojo | 33 +++++-- 2 files changed, 150 insertions(+), 20 deletions(-) diff --git a/basalt/autograd/ops/mlops.mojo b/basalt/autograd/ops/mlops.mojo index 995e0c4..4713862 100644 --- a/basalt/autograd/ops/mlops.mojo +++ b/basalt/autograd/ops/mlops.mojo @@ -1,6 +1,7 @@ from algorithm import vectorize, parallelize -from math import exp, pow, max, min, abs +from math import exp, pow, max, min, abs, ceil, floor from math.limit import min_finite, max_finite +from memory import stack_allocation from basalt import Tensor, TensorShape from basalt.utils.tensorutils import elwise_transform @@ -667,30 +668,136 @@ struct UPSAMPLE: t1_shape: TensorShape, attributes: AttributeVector, ](inout res: Tensor[dtype], t1: Tensor[dtype]): - # Input is [N, C, D in, H in, W in], N is batch size and C is number of channels. Ranks 3-D, 4-D or 5-D tensors. - alias scales = attributes["scales"].value().to_shape() # Has to match input size (the last dimensions D, H and W) or just be one value + # Input is [N, C, D in, H in, W in], N is batch size and C is number of channels. Ranks 3-D, 4-D or 5-D tensors (only works on the spatial dimensions). + alias scales = attributes["scales"].value().to_shape() # Has to match spatial input dims (the last dimensions D, H and W) alias mode = attributes["mode"].value().to_string() + # alias align_corners = attributes["align_corners"].value().to_bool() if attributes["align_corners"] else false + + @parameter + fn get_coordination_mode() -> String: + if mode == "linear": + return "half_pixel" + else: + return "asymmetric" + alias coordination_transforamtion = get_coordination_mode() alias strides = t1_shape.strides() - alias total_length = t1_shape.num_elements() + var strides_res = res.strides() + + var res_shape = res.shape() - alias first_loop = total_length // strides[1] + alias first_loop = t1_shape[0] * t1_shape[1] + + @always_inline + fn pos_asymmetric(pos: Int, scale: Int) -> Int: + return pos // scale + + @always_inline + fn pos_half_pixel(pos: Int, scale: Int) -> Float64: + return max(0.0, (pos + 0.5) / scale - 0.5) - var strides_res = res.strides() @parameter - if mode == "nearest": + @always_inline + fn get_value_interpolate[size: Int]( + indeces_t1: StaticTuple[Float64, size], + index_t1_sum: Float64 + ) -> SIMD[t1.dtype, 1]: @parameter - fn p_iter(i: Int): - var offset = i * strides[1] - var offset_res = i * strides_res[1] + if mode == "nearest": + return t1[int(index_t1_sum)] + elif mode == "linear": + var t1_pos_floor = floor(indeces_t1[1]) + var t1_pos_ceil = min(ceil(indeces_t1[1]), t1_shape[2] - 1) + + var v1 = t1[int(indeces_t1[0]) + int(t1_pos_floor)] + var v2 = t1[int(indeces_t1[0]) + int(t1_pos_ceil)] + + return v1 + (v2 - v1) * (indeces_t1[1] - t1_pos_floor) + else: + return 0 + + @always_inline + fn get_t1_position( + pos: Int, scale: Int, dim: Int + ) -> Float64: + @parameter + if coordination_transforamtion == "asymmetric": + return pos_asymmetric(pos, scale) + elif coordination_transforamtion == "half_pixel": + return pos_half_pixel(pos, scale) + else: + return 0 + + @parameter + fn p_iter(i: Int): + var offset_t1 = i * strides[1] + var offset_res = i * strides_res[1] + + @parameter + if t1_shape.rank() == 3: + var positions_t1 = StaticTuple[Float64, 2](0) + var positions_res = StaticIntTuple[2](0) + + positions_res[0] = offset_res + positions_t1[0] = offset_t1 + + @parameter + fn v_iter[nelts: Int](j: Int): + positions_res[1] = j + + var index_res = positions_res[0] + positions_res[1] + var values = res.load[nelts](index_res) + + for k in range(nelts): + positions_t1[1] = get_t1_position(j + k, scales[scales.rank() - 1], 0) + + values[k] = get_value_interpolate( + positions_t1, + positions_t1[0] + positions_t1[1]) + + res.store[nelts](index_res, values) + - Self.recursive_iter[2, t1_shape, scales]( - res, t1, strides_res, offset, offset_res) + vectorize[v_iter, nelts](res_shape[res.rank() - 1]) + elif t1_shape.rank() == 4: + var positions_t1 = StaticTuple[Float64, 3](0) + var positions_res = StaticIntTuple[3](0) - parallelize[p_iter](first_loop) - else: - pass + positions_res[0] = offset_res + positions_t1[0] = offset_t1 + + for j in range(res_shape[2]): + positions_res[1] = j * strides_res[2] + positions_t1[1] = get_t1_position(j, scales[0], 0) * strides[2] + + @parameter + fn v_iter_1[nelts: Int](k: Int): + positions_res[2] = k + + var index_res = positions_res[0] + positions_res[1] + positions_res[2] + var values = res.load[nelts](index_res) + + for l in range(nelts): + positions_t1[2] = get_t1_position(k + l, scales[scales.rank() - 1], 1) + + values[l] = get_value_interpolate( + positions_t1, + positions_t1[0] + positions_t1[1] + positions_t1[2]) + + res.store[nelts](index_res, values) + + vectorize[v_iter_1, nelts](res_shape[res.rank() - 1]) + + elif t1_shape.rank() == 5: + for j in range(res.shape()[2]): + for k in range(res.shape()[3]): + pass + else: + # Error + pass + + parallelize[p_iter](first_loop) @staticmethod fn backward[ diff --git a/tests/python/test_mlops_torch.mojo b/tests/python/test_mlops_torch.mojo index 4821d52..ccf9b2f 100644 --- a/tests/python/test_mlops_torch.mojo +++ b/tests/python/test_mlops_torch.mojo @@ -442,21 +442,44 @@ fn test_SLICE() raises: fn test_UPSAMPLE() raises: - alias t1_shape = TensorShape(40, 40, 120, 120) - var t1: Tensor[dtype] = Tensor[dtype](t1_shape) + alias t1_shape = TensorShape(41, 41, 43) + var t1 = Tensor[dtype](t1_shape) rand(t1.data(), t1.num_elements()) alias attributes = AttributeVector( - Attribute("scales", TensorShape(2, 2)), - Attribute("mode", "nearest") + Attribute("scales", TensorShape(3)), + Attribute("mode", "linear") ) - alias ug_shape = TensorShape(40, 40, 240, 240) + alias ug_shape = TensorShape(41, 41, 129) var ug = Tensor[dtype](ug_shape) var expected_and_grad = torch_unary_op(OP.UPSAMPLE, t1, ug, attributes) test_unary_op[OP.UPSAMPLE, t1_shape, attributes](t1, expected_and_grad.expected) + alias attributes_2 = AttributeVector( + Attribute("scales", TensorShape(3)), + Attribute("mode", "nearest") + ) + + expected_and_grad = torch_unary_op(OP.UPSAMPLE, t1, ug, attributes_2) + test_unary_op[OP.UPSAMPLE, t1_shape, attributes_2](t1, expected_and_grad.expected) + + alias t1_shape_1 = TensorShape(40, 40, 120, 120) + t1 = Tensor[dtype](t1_shape_1) + rand(t1.data(), t1.num_elements()) + + alias attributes_3 = AttributeVector( + Attribute("scales", TensorShape(2, 3)), + Attribute("mode", "nearest") + ) + + alias ug_shape_1 = TensorShape(40, 40, 240, 360) + ug = Tensor[dtype](ug_shape_1) + + expected_and_grad = torch_unary_op(OP.UPSAMPLE, t1, ug, attributes_3) + test_unary_op[OP.UPSAMPLE, t1_shape_1, attributes_3](t1, expected_and_grad.expected) + fn main(): print("Running mlops (compare with torch) tests") From 6295ce64b82eb37fe6aed322242035806b8591dc Mon Sep 17 00:00:00 2001 From: NKspartan Date: Thu, 6 Jun 2024 17:23:08 -0600 Subject: [PATCH 14/16] Added 5D ability to upsample --- basalt/autograd/ops/mlops.mojo | 30 +++++++++++++++++++++++++++++- tests/python/test_mlops_torch.mojo | 22 ++++++++++++++++++++-- 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/basalt/autograd/ops/mlops.mojo b/basalt/autograd/ops/mlops.mojo index 4713862..58841f7 100644 --- a/basalt/autograd/ops/mlops.mojo +++ b/basalt/autograd/ops/mlops.mojo @@ -729,6 +729,7 @@ struct UPSAMPLE: else: return 0 + # it is possble to use gather, the only problem is to be able to create a simd arange (vectorized if it is with a for loop it is the same probably). (And from tests it seems to be slower, maybe because i do a lot of casts and because the arange of positions is not vectorized) @parameter fn p_iter(i: Int): var offset_t1 = i * strides[1] @@ -790,9 +791,36 @@ struct UPSAMPLE: vectorize[v_iter_1, nelts](res_shape[res.rank() - 1]) elif t1_shape.rank() == 5: + var positions_t1 = StaticTuple[Float64, 4](0) + var positions_res = StaticIntTuple[4](0) + + positions_res[0] = offset_res + positions_t1[0] = offset_t1 + for j in range(res.shape()[2]): + positions_res[1] = j * strides_res[2] + positions_t1[1] = get_t1_position(j, scales[0], 0) * strides[2] for k in range(res.shape()[3]): - pass + positions_res[2] = k * strides_res[3] + positions_t1[2] = get_t1_position(k, scales[1], 1) * strides[3] + + @parameter + fn v_iter_2[nelts: Int](l: Int): + positions_res[3] = l + + var index_res = positions_res[0] + positions_res[1] + positions_res[2] + positions_res[3] + var values = res.load[nelts](index_res) + + for m in range(nelts): + positions_t1[3] = get_t1_position(l + m, scales[scales.rank() - 1], 2) + + values[m] = get_value_interpolate( + positions_t1, + positions_t1[0] + positions_t1[1] + positions_t1[2] + positions_t1[3]) + + res.store[nelts](index_res, values) + + vectorize[v_iter_2, nelts](res_shape[res.rank() - 1]) else: # Error pass diff --git a/tests/python/test_mlops_torch.mojo b/tests/python/test_mlops_torch.mojo index ccf9b2f..a3ae6e3 100644 --- a/tests/python/test_mlops_torch.mojo +++ b/tests/python/test_mlops_torch.mojo @@ -448,18 +448,19 @@ fn test_UPSAMPLE() raises: alias attributes = AttributeVector( Attribute("scales", TensorShape(3)), - Attribute("mode", "linear") + Attribute("mode", "nearest") ) alias ug_shape = TensorShape(41, 41, 129) var ug = Tensor[dtype](ug_shape) + rand(ug.data(), ug.num_elements()) var expected_and_grad = torch_unary_op(OP.UPSAMPLE, t1, ug, attributes) test_unary_op[OP.UPSAMPLE, t1_shape, attributes](t1, expected_and_grad.expected) alias attributes_2 = AttributeVector( Attribute("scales", TensorShape(3)), - Attribute("mode", "nearest") + Attribute("mode", "linear") ) expected_and_grad = torch_unary_op(OP.UPSAMPLE, t1, ug, attributes_2) @@ -476,10 +477,27 @@ fn test_UPSAMPLE() raises: alias ug_shape_1 = TensorShape(40, 40, 240, 360) ug = Tensor[dtype](ug_shape_1) + rand(ug.data(), ug.num_elements()) expected_and_grad = torch_unary_op(OP.UPSAMPLE, t1, ug, attributes_3) test_unary_op[OP.UPSAMPLE, t1_shape_1, attributes_3](t1, expected_and_grad.expected) + alias t1_shape_2 = TensorShape(5, 5, 10, 20, 60) + t1 = Tensor[dtype](t1_shape_2) + rand(t1.data(), t1.num_elements()) + + alias attributes_4 = AttributeVector( + Attribute("scales", TensorShape(2, 3, 4)), + Attribute("mode", "nearest") + ) + + alias ug_shape_2 = TensorShape(5, 5, 20, 60, 240) + ug = Tensor[dtype](ug_shape_2) + rand(ug.data(), ug.num_elements()) + + expected_and_grad = torch_unary_op(OP.UPSAMPLE, t1, ug, attributes_4) + test_unary_op[OP.UPSAMPLE, t1_shape_2, attributes_4](t1, expected_and_grad.expected) + fn main(): print("Running mlops (compare with torch) tests") From de1fb4149736cd17cf3f7befe57f838045db142f Mon Sep 17 00:00:00 2001 From: NKspartan Date: Thu, 6 Jun 2024 18:40:46 -0600 Subject: [PATCH 15/16] Added bilinear op --- basalt/autograd/ops/mlops.mojo | 49 ++++++++++++++++++++---------- tests/python/test_mlops_torch.mojo | 18 ++++++++--- 2 files changed, 46 insertions(+), 21 deletions(-) diff --git a/basalt/autograd/ops/mlops.mojo b/basalt/autograd/ops/mlops.mojo index 58841f7..e06dbe1 100644 --- a/basalt/autograd/ops/mlops.mojo +++ b/basalt/autograd/ops/mlops.mojo @@ -675,7 +675,7 @@ struct UPSAMPLE: @parameter fn get_coordination_mode() -> String: - if mode == "linear": + if mode == "linear" or mode == "bilinear": return "half_pixel" else: return "asymmetric" @@ -700,12 +700,16 @@ struct UPSAMPLE: @parameter @always_inline fn get_value_interpolate[size: Int]( - indeces_t1: StaticTuple[Float64, size], - index_t1_sum: Float64 + indeces_t1: StaticTuple[Float64, size] ) -> SIMD[t1.dtype, 1]: @parameter if mode == "nearest": - return t1[int(index_t1_sum)] + var indeces_t1_sum = indeces_t1[0] + @unroll + for i in range(1, size): + indeces_t1_sum += indeces_t1[i] * strides[i + 1] + + return t1[int(indeces_t1_sum)] elif mode == "linear": var t1_pos_floor = floor(indeces_t1[1]) var t1_pos_ceil = min(ceil(indeces_t1[1]), t1_shape[2] - 1) @@ -714,6 +718,25 @@ struct UPSAMPLE: var v2 = t1[int(indeces_t1[0]) + int(t1_pos_ceil)] return v1 + (v2 - v1) * (indeces_t1[1] - t1_pos_floor) + elif mode == "bilinear": + var t1_pos_floor_y = floor(indeces_t1[1]) + var t1_pos_ceil_y = min(ceil(indeces_t1[1]), t1_shape[2] - 1) + + var t1_pos_floor_x = floor(indeces_t1[2]) + var t1_pos_ceil_x = min(ceil(indeces_t1[2]), t1_shape[3] - 1) + + var v1 = t1[int(indeces_t1[0]) + int(t1_pos_floor_y) * strides[2] + int(t1_pos_floor_x) * strides[3]] + var v2 = t1[int(indeces_t1[0]) + int(t1_pos_floor_y) * strides[2] + int(t1_pos_ceil_x) * strides[3]] + var v3 = t1[int(indeces_t1[0]) + int(t1_pos_ceil_y) * strides[2] + int(t1_pos_floor_x) * strides[3]] + var v4 = t1[int(indeces_t1[0]) + int(t1_pos_ceil_y) * strides[2] + int(t1_pos_ceil_x) * strides[3]] + + var wy = indeces_t1[1] - t1_pos_floor_y + var wx = indeces_t1[2] - t1_pos_floor_x + + var top_interp = v1 + (v2 - v1) * wx + var bottom_interp = v3 + (v4 - v3) * wx + + return top_interp + (bottom_interp - top_interp) * wy else: return 0 @@ -753,9 +776,7 @@ struct UPSAMPLE: for k in range(nelts): positions_t1[1] = get_t1_position(j + k, scales[scales.rank() - 1], 0) - values[k] = get_value_interpolate( - positions_t1, - positions_t1[0] + positions_t1[1]) + values[k] = get_value_interpolate(positions_t1) res.store[nelts](index_res, values) @@ -770,7 +791,7 @@ struct UPSAMPLE: for j in range(res_shape[2]): positions_res[1] = j * strides_res[2] - positions_t1[1] = get_t1_position(j, scales[0], 0) * strides[2] + positions_t1[1] = get_t1_position(j, scales[0], 0) @parameter fn v_iter_1[nelts: Int](k: Int): @@ -782,9 +803,7 @@ struct UPSAMPLE: for l in range(nelts): positions_t1[2] = get_t1_position(k + l, scales[scales.rank() - 1], 1) - values[l] = get_value_interpolate( - positions_t1, - positions_t1[0] + positions_t1[1] + positions_t1[2]) + values[l] = get_value_interpolate(positions_t1) res.store[nelts](index_res, values) @@ -799,10 +818,10 @@ struct UPSAMPLE: for j in range(res.shape()[2]): positions_res[1] = j * strides_res[2] - positions_t1[1] = get_t1_position(j, scales[0], 0) * strides[2] + positions_t1[1] = get_t1_position(j, scales[0], 0) for k in range(res.shape()[3]): positions_res[2] = k * strides_res[3] - positions_t1[2] = get_t1_position(k, scales[1], 1) * strides[3] + positions_t1[2] = get_t1_position(k, scales[1], 1) @parameter fn v_iter_2[nelts: Int](l: Int): @@ -814,9 +833,7 @@ struct UPSAMPLE: for m in range(nelts): positions_t1[3] = get_t1_position(l + m, scales[scales.rank() - 1], 2) - values[m] = get_value_interpolate( - positions_t1, - positions_t1[0] + positions_t1[1] + positions_t1[2] + positions_t1[3]) + values[m] = get_value_interpolate(positions_t1) res.store[nelts](index_res, values) diff --git a/tests/python/test_mlops_torch.mojo b/tests/python/test_mlops_torch.mojo index a3ae6e3..69a4a12 100644 --- a/tests/python/test_mlops_torch.mojo +++ b/tests/python/test_mlops_torch.mojo @@ -466,7 +466,7 @@ fn test_UPSAMPLE() raises: expected_and_grad = torch_unary_op(OP.UPSAMPLE, t1, ug, attributes_2) test_unary_op[OP.UPSAMPLE, t1_shape, attributes_2](t1, expected_and_grad.expected) - alias t1_shape_1 = TensorShape(40, 40, 120, 120) + alias t1_shape_1 = TensorShape(20, 20, 120, 120) t1 = Tensor[dtype](t1_shape_1) rand(t1.data(), t1.num_elements()) @@ -475,18 +475,26 @@ fn test_UPSAMPLE() raises: Attribute("mode", "nearest") ) - alias ug_shape_1 = TensorShape(40, 40, 240, 360) + alias ug_shape_1 = TensorShape(20, 20, 240, 360) ug = Tensor[dtype](ug_shape_1) rand(ug.data(), ug.num_elements()) expected_and_grad = torch_unary_op(OP.UPSAMPLE, t1, ug, attributes_3) test_unary_op[OP.UPSAMPLE, t1_shape_1, attributes_3](t1, expected_and_grad.expected) + alias attributes_4 = AttributeVector( + Attribute("scales", TensorShape(2, 3)), + Attribute("mode", "bilinear") + ) + + expected_and_grad = torch_unary_op(OP.UPSAMPLE, t1, ug, attributes_4) + test_unary_op[OP.UPSAMPLE, t1_shape_1, attributes_4](t1, expected_and_grad.expected) + alias t1_shape_2 = TensorShape(5, 5, 10, 20, 60) t1 = Tensor[dtype](t1_shape_2) rand(t1.data(), t1.num_elements()) - alias attributes_4 = AttributeVector( + alias attributes_5 = AttributeVector( Attribute("scales", TensorShape(2, 3, 4)), Attribute("mode", "nearest") ) @@ -495,8 +503,8 @@ fn test_UPSAMPLE() raises: ug = Tensor[dtype](ug_shape_2) rand(ug.data(), ug.num_elements()) - expected_and_grad = torch_unary_op(OP.UPSAMPLE, t1, ug, attributes_4) - test_unary_op[OP.UPSAMPLE, t1_shape_2, attributes_4](t1, expected_and_grad.expected) + expected_and_grad = torch_unary_op(OP.UPSAMPLE, t1, ug, attributes_5) + test_unary_op[OP.UPSAMPLE, t1_shape_2, attributes_5](t1, expected_and_grad.expected) fn main(): From 11e84112320fadbd8049e1cd00de445fb85cc663 Mon Sep 17 00:00:00 2001 From: NKspartan Date: Fri, 19 Jul 2024 17:58:42 -0600 Subject: [PATCH 16/16] Merged with main --- basalt/autograd/attributes.mojo | 18 +- basalt/autograd/graph.mojo | 2 +- basalt/autograd/ops/basics.mojo | 16 +- basalt/autograd/ops/conv.mojo | 1 - basalt/autograd/ops/dynamics.mojo | 8 +- basalt/autograd/ops/matmul.mojo | 2 +- basalt/autograd/ops/mlops.mojo | 248 +++++++++---- basalt/autograd/ops/ops.mojo | 84 ++++- basalt/autograd/ops/pool.mojo | 6 +- basalt/nn/__init__.mojo | 9 +- basalt/nn/activations.mojo | 13 +- basalt/nn/model.mojo | 17 +- basalt/nn/optim.mojo | 12 +- basalt/nn/tensor.mojo | 15 +- basalt/utils/bytes.mojo | 16 +- basalt/utils/collection.mojo | 16 +- basalt/utils/datasets.mojo | 6 +- basalt/utils/itertools.mojo | 2 +- basalt/utils/math_util.mojo | 41 +++ basalt/utils/onnx_utils.mojo | 8 +- basalt/utils/perf_utils.mojo | 17 +- basalt/utils/rand_utils.mojo | 3 +- basalt/utils/tensor_creation_utils.mojo | 18 +- basalt/utils/tensorutils.mojo | 12 +- examples/yolov8.mojo | 3 +- examples/yolov8_cam.mojo | 2 +- tests/mojo/test_activations.mojo | 56 ++- tests/mojo/test_mlops.mojo | 409 +++++++++++++++------ tests/mojo/test_tensorutils.mojo | 5 +- tests/mojo/test_tensorutils_data.mojo | 3 +- tests/python/test_mlops_torch.mojo | 224 ++++++++--- tests/python/test_models_mnist.mojo | 2 +- tests/python/test_models_regression.mojo | 2 +- tests/python/test_models_sin_estimate.mojo | 4 +- 34 files changed, 915 insertions(+), 385 deletions(-) create mode 100644 basalt/utils/math_util.mojo diff --git a/basalt/autograd/attributes.mojo b/basalt/autograd/attributes.mojo index 5a57a9a..3e1c3b3 100644 --- a/basalt/autograd/attributes.mojo +++ b/basalt/autograd/attributes.mojo @@ -1,4 +1,5 @@ from collections import Optional, OptionalReg +from utils.static_tuple import StaticTuple from basalt.nn.tensor import Tensor, TensorShape, MAX_RANK from basalt.utils.bytes import Bytes, scalar_to_bytes, bytes_to_scalar @@ -45,9 +46,8 @@ struct AttributeVector(Sized, Stringable, CollectionElement): var attributes: StaticTuple[Attribute, MAX_ATTRS] var size: Int - @always_inline("nodebug") fn __init__(inout self, *attributes: Attribute): - self.attributes = StaticTuple[Attribute, MAX_ATTRS]() + self.attributes = StaticTuple[Attribute, MAX_ATTRS](Attribute("", "")) self.size = len(attributes) for i in range(self.size): self.attributes[i] = attributes[i] @@ -67,12 +67,10 @@ struct AttributeVector(Sized, Stringable, CollectionElement): return self.attributes[i] return None - @always_inline("nodebug") fn append(inout self, attribute: Attribute): self.attributes[self.size] = attribute self.size += 1 - @always_inline("nodebug") fn __str__(self) -> String: var s: String = "[" for i in range(self.size): @@ -90,7 +88,6 @@ struct Attribute(Stringable, CollectionElement): var type: AttributeType var size: Int - @always_inline("nodebug") fn __init__(inout self, name: String, value: String): self.data_shape = StaticIntTuple[MAX_RANK]() self.name = Bytes[MAX_NAME_CHARS](name) @@ -98,7 +95,6 @@ struct Attribute(Stringable, CollectionElement): self.type = AttributeType.STRING self.size = len(value) - @always_inline("nodebug") fn __init__(inout self, name: String, value: TensorShape): self.data_shape = StaticIntTuple[MAX_RANK]() self.name = Bytes[MAX_NAME_CHARS](name) @@ -109,7 +105,6 @@ struct Attribute(Stringable, CollectionElement): for i in range(self.size): self.data_shape[i] = value._shape[i] - @always_inline("nodebug") fn __init__[N: Int](inout self, name: String, value: StaticIntTuple[N]): constrained[N < MAX_RANK, "Attribute rank must be less than MAX_RANK."]() @@ -122,7 +117,6 @@ struct Attribute(Stringable, CollectionElement): for i in range(self.size): self.data_shape[i] = value[i] - @always_inline("nodebug") fn __init__[dtype: DType](inout self, name: String, value: Scalar[dtype]): constrained[dtype.is_numeric(), "Attribute value must be numeric."]() @@ -132,29 +126,23 @@ struct Attribute(Stringable, CollectionElement): self.type = AttributeType(dtype) self.size = 1 - @always_inline("nodebug") fn __init__(inout self, name: String, value: Int): self.__init__(name, Int64(value)) self.data_shape[0] = 1 - @always_inline("nodebug") fn __init__(inout self, name: String, value: FloatLiteral): self.__init__(name, Float64(value)) self.data_shape[0] = 1 - @always_inline("nodebug") fn __str__(self) -> String: return "Attribute(" + str(self.name) + ", " + "..." + ")" - @always_inline("nodebug") fn to_string(self) -> String: return str(self.data) - @always_inline("nodebug") fn to_shape(self) -> TensorShape: return TensorShape(rank=self.size, shape=self.data_shape) - @always_inline("nodebug") fn to_static[N: Int](self) -> StaticIntTuple[N]: constrained[N < MAX_RANK, "Attribute rank must be less than MAX_RANK."]() @@ -165,13 +153,11 @@ struct Attribute(Stringable, CollectionElement): return result - @always_inline("nodebug") fn to_scalar[dtype: DType](self) -> Scalar[dtype]: constrained[dtype.is_numeric(), "Attribute value must be numeric."]() return bytes_to_scalar[dtype](self.data) - @always_inline("nodebug") fn to_int(self) -> Int: return int(self.to_scalar[DType.int64]()) diff --git a/basalt/autograd/graph.mojo b/basalt/autograd/graph.mojo index b4fdf53..cd28b09 100644 --- a/basalt/autograd/graph.mojo +++ b/basalt/autograd/graph.mojo @@ -43,7 +43,7 @@ struct Graph: self.inputs.append(symbol) else: if data is not None: - self.params.put(symbol, data.take()) + self.params.put(symbol, data.value()[]) else: self.params.put(symbol) diff --git a/basalt/autograd/ops/basics.mojo b/basalt/autograd/ops/basics.mojo index 2305377..74662ca 100644 --- a/basalt/autograd/ops/basics.mojo +++ b/basalt/autograd/ops/basics.mojo @@ -1,12 +1,15 @@ -from math import add, sub, mul, div, log, exp +from math import log, exp from algorithm import vectorize from memory import memcpy +from utils.numerics import isinf from basalt import Tensor, TensorShape from basalt.nn.tensor import MAX_RANK from basalt.utils.tensorutils import * from basalt.autograd.attributes import Attribute, AttributeVector from basalt.autograd.ops.matmul import dot, dot_transpose_t1, dot_transpose_t2 +from basalt.utils.math_util import add, sub, mul, div + """ Implement forward and backward operations for basic tensor manipulations. @@ -316,7 +319,9 @@ struct POW: # d(x^y) / dx = y * x^(y-1) # d(x^y) / dy = sum( x^y * log(x) ) var res_grad: Tensor[dtype] - var a = int(t2[0]) + var a = t2[0] + + alias epsilon = 1e-12 @parameter if tensor_id == 0: @@ -329,13 +334,18 @@ struct POW: vectorize[vec_pow_bw_x, nelts](t1_shape.num_elements()) else: + # Gradient of the exponent res_grad = Tensor[dtype](t2_shape) # t2_shape == TensorShape(1) @parameter fn vec_pow_bw_y[nelts: Int](i: Int): + # the case when the value passed to log is 0.0 + var temp_log = log(t1.load[nelts](i)) + var temp_log_is_inf = isinf(temp_log) + temp_log = temp_log_is_inf.select(0, temp_log) res_grad[0] += ( (t1.load[nelts](i) ** a) - * log(t1.load[nelts](i)) + * temp_log * ug.load[nelts](i) ).reduce_add() diff --git a/basalt/autograd/ops/conv.mojo b/basalt/autograd/ops/conv.mojo index 4592c51..774eb03 100644 --- a/basalt/autograd/ops/conv.mojo +++ b/basalt/autograd/ops/conv.mojo @@ -2,7 +2,6 @@ from basalt import Tensor, TensorShape from basalt.autograd.attributes import AttributeVector from algorithm import parallelize, vectorize, tile -from math import divmod from utils.loop import unroll diff --git a/basalt/autograd/ops/dynamics.mojo b/basalt/autograd/ops/dynamics.mojo index 0f304ef..5c30493 100644 --- a/basalt/autograd/ops/dynamics.mojo +++ b/basalt/autograd/ops/dynamics.mojo @@ -33,7 +33,7 @@ struct CONCAT: fn forward[attributes: AttributeVector]( inputs: List[Symbol], outputs: List[Symbol], - parameters: Parameters, + inout parameters: Parameters, ): alias dim = attributes["dim"].value().to_int() if attributes["dim"] else 0 var n_chunks = Self.calc_chunks(inputs[0].shape, dim) @@ -58,7 +58,7 @@ struct CONCAT: fn backward[input_id: Int, attributes: AttributeVector]( inputs: List[Symbol], outputs: List[Symbol], - parameters: Parameters, + inout parameters: Parameters, ) -> Tensor[dtype]: alias dim = attributes["dim"].value().to_int() if attributes["dim"] else 0 var n_chunks = Self.calc_chunks(inputs[0].shape, dim) @@ -113,7 +113,7 @@ struct SPLIT: fn forward[attributes: AttributeVector]( inputs: List[Symbol], outputs: List[Symbol], - parameters: Parameters, + inout parameters: Parameters, ): alias dim = attributes["dim"].value().to_int() if attributes["dim"] else 0 alias sections = attributes["sections"].value().to_shape() @@ -139,7 +139,7 @@ struct SPLIT: fn backward[input_id: Int, attributes: AttributeVector]( inputs: List[Symbol], outputs: List[Symbol], - parameters: Parameters, + inout parameters: Parameters, ) -> Tensor[dtype]: alias dim = attributes["dim"].value().to_int() if attributes["dim"] else 0 alias sections = attributes["sections"].value().to_shape() diff --git a/basalt/autograd/ops/matmul.mojo b/basalt/autograd/ops/matmul.mojo index e638f81..bc2cf2b 100644 --- a/basalt/autograd/ops/matmul.mojo +++ b/basalt/autograd/ops/matmul.mojo @@ -18,7 +18,7 @@ fn calculate_block[ for k in range(K): - @unroll + @parameter for m in range(BLOCK_M): @parameter diff --git a/basalt/autograd/ops/mlops.mojo b/basalt/autograd/ops/mlops.mojo index e06dbe1..30a61e8 100644 --- a/basalt/autograd/ops/mlops.mojo +++ b/basalt/autograd/ops/mlops.mojo @@ -1,7 +1,7 @@ from algorithm import vectorize, parallelize -from math import exp, pow, max, min, abs, ceil, floor -from math.limit import min_finite, max_finite -from memory import stack_allocation +from math import exp, floor, ceil +from utils.numerics import min_finite, max_finite +from utils.static_tuple import StaticTuple from basalt import Tensor, TensorShape from basalt.utils.tensorutils import elwise_transform @@ -54,7 +54,7 @@ struct SIGMOID: vectorize[vec_sigmoid_bw, nelts](ug_shape.num_elements()) - return res_grad ^ + return res_grad^ struct RELU: @@ -102,7 +102,62 @@ struct RELU: vectorize[vec_relu_bw, nelts](ug_shape.num_elements()) - return res_grad ^ + return res_grad^ + + +struct LEAKYRELU: + @staticmethod + fn result_shape(t1_shape: TensorShape) -> TensorShape: + return t1_shape + + @staticmethod + fn forward[ + t1_shape: TensorShape, + attributes: AttributeVector, + ](inout res: Tensor[dtype], t1: Tensor[dtype]): + """Forward operation of leaky_relu.""" + + fn leaky_relu[ + type: DType, + simd_width: Int, + ](x: SIMD[type, simd_width]) -> SIMD[type, simd_width]: + var negative_slope = attributes["negative_slope"].value().to_scalar[ + type + ]() + return (x > 0).select(x, x * negative_slope) + + elwise_transform[leaky_relu](res, t1) + + @staticmethod + fn backward[ + ug_shape: TensorShape, + t1_shape: TensorShape, + attributes: AttributeVector, + ](ug: Tensor[dtype], t1: Tensor[dtype]) -> Tensor[dtype]: + """Backward operation of leaky_relu.""" + + @always_inline + fn leaky_relu_bw[ + type: DType, simd_width: Int + ](x: SIMD[type, simd_width]) -> SIMD[type, simd_width]: + var negative_slope = attributes["negative_slope"].value().to_scalar[ + type + ]() + + return (x > 0).select[type](1, negative_slope) + + var res_grad = Tensor[dtype](ug_shape) + + @parameter + fn vec_leaky_relu_bw[nelts: Int](idx: Int): + res_grad.store[nelts]( + idx, + leaky_relu_bw(t1.load[nelts](idx)) * ug.load[nelts](idx), + ) + + vectorize[vec_leaky_relu_bw, nelts](ug_shape.num_elements()) + + return res_grad^ struct TANH: @@ -148,7 +203,7 @@ struct TANH: vectorize[vec_tanh_bw, nelts](ug_shape.num_elements()) - return res_grad ^ + return res_grad^ struct CLIP: @@ -166,12 +221,12 @@ struct CLIP: alias min_attr = attributes["min"] alias max_attr = attributes["max"] - var min_val = min_attr.value().to_scalar[dtype]() if min_attr else min_finite[ + var min_val = min_attr.value().to_scalar[ dtype - ]() - var max_val = max_attr.value().to_scalar[dtype]() if max_attr else max_finite[ + ]() if min_attr else min_finite[dtype]() + var max_val = max_attr.value().to_scalar[ dtype - ]() + ]() if max_attr else max_finite[dtype]() @parameter fn vec_clip[nelts: Int](i: Int): @@ -189,12 +244,12 @@ struct CLIP: alias min_attr = attributes["min"] alias max_attr = attributes["max"] - var min_val = min_attr.value().to_scalar[dtype]() if min_attr else min_finite[ + var min_val = min_attr.value().to_scalar[ dtype - ]() - var max_val = max_attr.value().to_scalar[dtype]() if max_attr else max_finite[ + ]() if min_attr else min_finite[dtype]() + var max_val = max_attr.value().to_scalar[ dtype - ]() + ]() if max_attr else max_finite[dtype]() var res_grad = Tensor[dtype](t_shape) @@ -203,17 +258,21 @@ struct CLIP: var val = t.load[nelts](i) res_grad.store[nelts]( i, - ((val >= min_val) * (val <= max_val)).select(ug.load[nelts](i), 0), + ((val >= min_val) * (val <= max_val)).select( + ug.load[nelts](i), 0 + ), ) vectorize[vec_clip_bw, nelts, size = t_shape.num_elements()]() - return res_grad ^ + return res_grad^ struct SQUEEZE: @staticmethod - fn result_shape(t1_shape: TensorShape, attributes: AttributeVector) -> TensorShape: + fn result_shape( + t1_shape: TensorShape, attributes: AttributeVector + ) -> TensorShape: var dim = attributes["dims"] var dims_to_squeeze = dim.value().to_shape() if dim else TensorShape() @@ -241,12 +300,14 @@ struct SQUEEZE: ](ug: Tensor[dtype], t1: Tensor[dtype]) -> Tensor[dtype]: var res_grad = Tensor[dtype](t1_shape) memcpy(res_grad.data(), ug.data(), ug.num_elements()) - return res_grad ^ + return res_grad^ struct UNSQUEEZE: @staticmethod - fn result_shape(t1_shape: TensorShape, attributes: AttributeVector) -> TensorShape: + fn result_shape( + t1_shape: TensorShape, attributes: AttributeVector + ) -> TensorShape: var dim = attributes["dims"] var dims_to_squeeze = dim.value().to_shape() if dim else TensorShape() @@ -278,7 +339,7 @@ struct UNSQUEEZE: ](ug: Tensor[dtype], t1: Tensor[dtype]) -> Tensor[dtype]: var res_grad = Tensor[dtype](t1_shape) memcpy(res_grad.data(), ug.data(), ug.num_elements()) - return res_grad ^ + return res_grad^ struct SLICE: @@ -287,7 +348,7 @@ struct SLICE: # Adjust negative indices & ensure they are within bounds. var s = slice if slice >= 0 else dim_size + slice return max(min(s, dim_size), 0) - + @staticmethod fn default_starts(shape: TensorShape) -> List[Int]: var starts = List[Int]() @@ -308,7 +369,7 @@ struct SLICE: for i in range(shape.rank()): steps.append(1) return steps^ - + @staticmethod fn default_axes(shape: TensorShape) -> List[Int]: # NOTE: axes can't be negative @@ -318,38 +379,55 @@ struct SLICE: return axes^ @staticmethod - fn result_shape(t1_shape: TensorShape, attributes: AttributeVector) -> TensorShape: + fn result_shape( + t1_shape: TensorShape, attributes: AttributeVector + ) -> TensorShape: # NOTE: Starts and ends have to be of the same size # NOTE: If axes not provided, starts and ends have to be of the same size as t1_shape var starts = attributes["starts"].value().to_shape() var ends = attributes["ends"].value().to_shape() - var steps = attributes["steps"].value().to_shape() if attributes["steps"] else Self.default_steps(starts) - var axes = attributes["axes"].value().to_shape() if attributes["axes"] else Self.default_axes(t1_shape) + var steps = attributes["steps"].value().to_shape() if attributes[ + "steps" + ] else Self.default_steps(starts) + var axes = attributes["axes"].value().to_shape() if attributes[ + "axes" + ] else Self.default_axes(t1_shape) var new_shape = t1_shape for i in range(starts.rank()): var axis = axes[i] - new_shape[axis] = len(range( - start = Self.adjust_boundary(starts[i], t1_shape[axis]), - end = Self.adjust_boundary(ends[i], t1_shape[axis]), - step = steps[i] - )) + new_shape[axis] = len( + range( + start=Self.adjust_boundary(starts[i], t1_shape[axis]), + end=Self.adjust_boundary(ends[i], t1_shape[axis]), + step=steps[i], + ) + ) return new_shape @staticmethod - fn reorder_positions[id: Int](original: TensorShape, axes: TensorShape, t1_shape: TensorShape) -> List[Int]: + fn reorder_positions[ + id: Int + ](original: TensorShape, axes: TensorShape, t1_shape: TensorShape) -> List[ + Int + ]: # Reorder the starts (id=0), ends (id=1) or steps (id=2) to match the order of the axes var updated: List[Int] @parameter - if id == 0: updated = Self.default_starts(t1_shape) - elif id == 1: updated = Self.default_ends(t1_shape) - else: updated = Self.default_steps(t1_shape) - + if id == 0: + updated = Self.default_starts(t1_shape) + elif id == 1: + updated = Self.default_ends(t1_shape) + else: + updated = Self.default_steps(t1_shape) + for i in range(axes.rank()): var axis = axes[i] - updated[axis] = original[i] if id == 2 else Self.adjust_boundary(original[i], t1_shape[axis]) + updated[axis] = original[i] if id == 2 else Self.adjust_boundary( + original[i], t1_shape[axis] + ) return updated^ @@ -362,12 +440,12 @@ struct SLICE: steps: List[Int], starts: List[Int], ends: List[Int], - backward_op: Bool = False + backward_op: Bool = False, ]( inout res: Tensor[dtype], t1: Tensor[dtype], last_dims: Int, - position: Int, + position: Int, last_position: Int, idx: Int, idx_original: Int, @@ -376,7 +454,9 @@ struct SLICE: alias t1_strides = original_shape.strides() var idx_temp = idx - var idx_original_temp = starts[position] * t1_strides[position] + idx_original + var idx_original_temp = starts[position] * t1_strides[ + position + ] + idx_original if position == last_position + 1: # Work on the last dimensions @@ -384,37 +464,50 @@ struct SLICE: alias stride = t1_strides[position] * steps[position] @parameter - fn v_slice[nelts: Int](k : Int): - + fn v_slice[nelts: Int](k: Int): @parameter if not backward_op: + @parameter if steps[position] == 1: - res.store[nelts](idx_temp + k, t1.load[nelts](idx_original_temp)) + res.store[nelts]( + idx_temp + k, t1.load[nelts](idx_original_temp) + ) else: res.store[nelts]( idx_temp + k, - t1.data().offset(idx_original_temp).simd_strided_load[nelts](stride) + t1.data() + .offset(idx_original_temp) + .simd_strided_load[nelts](stride), ) else: + @parameter if steps[position] == 1: res.store[nelts](idx_original_temp, t1.load[nelts](idx_temp + k)) else: - res.data().offset(idx_original_temp).simd_strided_store[nelts]( + res.data().offset(idx_original_temp).simd_strided_store[width=nelts]( t1.load[nelts](idx_temp + k), stride ) - + idx_original_temp += stride * nelts vectorize[v_slice, nelts](last_dims) - return + return for _ in range(shape[position]): - Self.recursive_iters_slice[shape, original_shape, steps, starts, ends, backward_op]( - res, t1, last_dims, position + 1, last_position, idx_temp, idx_original_temp + Self.recursive_iters_slice[ + shape, original_shape, steps, starts, ends, backward_op + ]( + res, + t1, + last_dims, + position + 1, + last_position, + idx_temp, + idx_original_temp, ) idx_temp += strides[position] @@ -427,10 +520,10 @@ struct SLICE: steps: List[Int], starts: List[Int], ends: List[Int], - backward_op: Bool = False + backward_op: Bool = False, ](inout res: Tensor[dtype], t1: Tensor[dtype]): alias strides = original_shape.strides() - + # Get the dimensions for vectorization var last_dims = 1 var positions_to_skip = 0 @@ -441,7 +534,7 @@ struct SLICE: positions_to_skip += 1 if starts[i] != 0 or ends[i] != original_shape[i] or steps[i] != 1: break - + # Get the dimensions for the first loop var first_dims = 1 var start_position = 0 @@ -452,31 +545,46 @@ struct SLICE: start_position += 1 var middle_dims = res_shape.num_elements() // last_dims // first_dims - + @parameter fn p_slice(i: Int): Self.recursive_iters_slice[ res_shape, original_shape, steps, starts, ends, backward_op ]( - res, t1, last_dims, start_position, res_shape.rank() - 1 - positions_to_skip, - i * middle_dims * last_dims, i * strides[start_position - 1] + res, + t1, + last_dims, + start_position, + res_shape.rank() - 1 - positions_to_skip, + i * middle_dims * last_dims, + i * strides[start_position - 1], ) parallelize[p_slice](first_dims) - + @staticmethod fn forward[ t1_shape: TensorShape, attributes: AttributeVector, ](inout res: Tensor[dtype], t1: Tensor[dtype]): - alias axes = attributes["axes"].value().to_shape() if attributes["axes"] else Self.default_axes(t1_shape) - alias starts = Self.reorder_positions[0](attributes["starts"].value().to_shape(), axes, t1_shape) - alias ends = Self.reorder_positions[1](attributes["ends"].value().to_shape(), axes, t1_shape) - alias steps = Self.reorder_positions[2](attributes["steps"].value().to_shape(), axes, t1_shape) if attributes["steps"] else Self.default_steps(t1_shape) + alias axes = attributes["axes"].value().to_shape() if attributes[ + "axes" + ] else Self.default_axes(t1_shape) + alias starts = Self.reorder_positions[0]( + attributes["starts"].value().to_shape(), axes, t1_shape + ) + alias ends = Self.reorder_positions[1]( + attributes["ends"].value().to_shape(), axes, t1_shape + ) + alias steps = Self.reorder_positions[2]( + attributes["steps"].value().to_shape(), axes, t1_shape + ) if attributes["steps"] else Self.default_steps(t1_shape) alias res_shape = Self.result_shape(t1_shape, attributes) - Self.slice_kernel[res_shape, t1_shape, steps, starts, ends, False](res, t1) + Self.slice_kernel[res_shape, t1_shape, steps, starts, ends, False]( + res, t1 + ) @staticmethod fn backward[ @@ -484,10 +592,18 @@ struct SLICE: t1_shape: TensorShape, attributes: AttributeVector = AttributeVector(), ](ug: Tensor[dtype], t1: Tensor[dtype]) -> Tensor[dtype]: - alias axes = attributes["axes"].value().to_shape() if attributes["axes"] else Self.default_axes(t1_shape) - alias starts = Self.reorder_positions[0](attributes["starts"].value().to_shape(), axes, t1_shape) - alias ends = Self.reorder_positions[1](attributes["ends"].value().to_shape(), axes, t1_shape) - alias steps = Self.reorder_positions[2](attributes["steps"].value().to_shape(), axes, t1_shape) if attributes["steps"] else Self.default_steps(t1_shape) + alias axes = attributes["axes"].value().to_shape() if attributes[ + "axes" + ] else Self.default_axes(t1_shape) + alias starts = Self.reorder_positions[0]( + attributes["starts"].value().to_shape(), axes, t1_shape + ) + alias ends = Self.reorder_positions[1]( + attributes["ends"].value().to_shape(), axes, t1_shape + ) + alias steps = Self.reorder_positions[2]( + attributes["steps"].value().to_shape(), axes, t1_shape + ) if attributes["steps"] else Self.default_steps(t1_shape) var res_grad = Tensor[dtype](t1_shape) @@ -705,7 +821,7 @@ struct UPSAMPLE: @parameter if mode == "nearest": var indeces_t1_sum = indeces_t1[0] - @unroll + @parameter for i in range(1, size): indeces_t1_sum += indeces_t1[i] * strides[i + 1] @@ -850,4 +966,4 @@ struct UPSAMPLE: t1_shape: TensorShape, attributes: AttributeVector = AttributeVector(), ](ug: Tensor[dtype], t1: Tensor[dtype]) -> Tensor[dtype]: - return t1 \ No newline at end of file + return t1 diff --git a/basalt/autograd/ops/ops.mojo b/basalt/autograd/ops/ops.mojo index c47f5ce..b870f78 100644 --- a/basalt/autograd/ops/ops.mojo +++ b/basalt/autograd/ops/ops.mojo @@ -15,7 +15,7 @@ from .basics import ( TRANSPOSE, FMA, ) -from .mlops import SIGMOID, RELU, TANH, CLIP, SQUEEZE, UNSQUEEZE, SLICE, INDEX, UPSAMPLE +from .mlops import SIGMOID, RELU, TANH, CLIP, SQUEEZE, UNSQUEEZE, SLICE, INDEX, UPSAMPLE, LEAKYRELU from .dynamics import CONCAT, SPLIT from .conv import CONV2D from .pool import MAXPOOL2D @@ -63,6 +63,7 @@ struct OP(Stringable): alias SLICE = OP(25, "SLICE") alias INDEX = OP(26, "INDEX") alias UPSAMPLE = OP(27, "UPSAMPLE") + alias LEAKYRELU = OP(28, "LEAKYRELU") var id: UInt8 var name: Bytes[16] @@ -89,10 +90,16 @@ fn static_result_shape( if len(operands) == 1: return static_result_shape(op, operands[0].shape, attributes) elif len(operands) == 2: - return static_result_shape(op, operands[0].shape, operands[1].shape, attributes) + return static_result_shape( + op, operands[0].shape, operands[1].shape, attributes + ) elif len(operands) == 3: return static_result_shape( - op, operands[0].shape, operands[1].shape, operands[2].shape, attributes + op, + operands[0].shape, + operands[1].shape, + operands[2].shape, + attributes, ) else: print("Error: Invalid number of operands") @@ -123,6 +130,8 @@ fn static_result_shape( return SIGMOID.result_shape(t1_shape) elif op == OP.RELU: return RELU.result_shape(t1_shape) + elif op == OP.LEAKYRELU: + return LEAKYRELU.result_shape(t1_shape) elif op == OP.TANH: return TANH.result_shape(t1_shape) elif op == OP.TRANSPOSE: @@ -241,6 +250,8 @@ fn forward_op[ SIGMOID.forward[t1_shape](res, t1) elif op == OP.RELU: RELU.forward[t1_shape](res, t1) + elif op == OP.LEAKYRELU: + LEAKYRELU.forward[t1_shape, attributes](res, t1) elif op == OP.TANH: TANH.forward[t1_shape](res, t1) elif op == OP.TRANSPOSE: @@ -264,7 +275,10 @@ fn forward_op[ fn forward_op[ - op: OP, t1_shape: TensorShape, t2_shape: TensorShape, attributes: AttributeVector + op: OP, + t1_shape: TensorShape, + t2_shape: TensorShape, + attributes: AttributeVector, ](inout res: Tensor[dtype], t1: Tensor[dtype], t2: Tensor[dtype]): """ Forward pass for binary operators. @@ -293,14 +307,21 @@ fn forward_op[ t2_shape: TensorShape, t3_shape: TensorShape, attributes: AttributeVector, -](inout res: Tensor[dtype], t1: Tensor[dtype], t2: Tensor[dtype], t3: Tensor[dtype]): +]( + inout res: Tensor[dtype], + t1: Tensor[dtype], + t2: Tensor[dtype], + t3: Tensor[dtype], +): """ Forward pass for ternary operators. """ @parameter if op == OP.CONV2D: - CONV2D.forward[t1_shape, t2_shape, t3_shape, attributes](res, t1, t2, t3) + CONV2D.forward[t1_shape, t2_shape, t3_shape, attributes]( + res, t1, t2, t3 + ) elif op == OP.FMA: FMA.forward[t1_shape, t2_shape, t3_shape](res, t1, t2, t3) else: @@ -313,7 +334,7 @@ fn forward_op[ ]( inputs: List[Symbol], outputs: List[Symbol], - parameters: Parameters, + inout parameters: Parameters, ): """ Forward pass for dynamic operators. @@ -357,6 +378,8 @@ fn backward_op[ res_grad = SIGMOID.backward[ug_shape, t1_shape](ug, t1) elif op == OP.RELU: res_grad = RELU.backward[ug_shape, t1_shape](ug, t1) + elif op == OP.LEAKYRELU: + res_grad = LEAKYRELU.backward[ug_shape, t1_shape, attributes](ug, t1) elif op == OP.TANH: res_grad = TANH.backward[ug_shape, t1_shape](ug, t1) elif op == OP.TRANSPOSE: @@ -387,7 +410,12 @@ fn backward_op[ t1_shape: TensorShape, t2_shape: TensorShape, attributes: AttributeVector, -](ug: Tensor[dtype], t1: Tensor[dtype], t2: Tensor[dtype], inout grad: Tensor[dtype]): +]( + ug: Tensor[dtype], + t1: Tensor[dtype], + t2: Tensor[dtype], + inout grad: Tensor[dtype], +): """ Backward pass for binary operators. """ @@ -395,17 +423,29 @@ fn backward_op[ @parameter if op == OP.ADD: - res_grad = ADD.backward[tensor_id, ug_shape, t1_shape, t2_shape](ug, t1, t2) + res_grad = ADD.backward[tensor_id, ug_shape, t1_shape, t2_shape]( + ug, t1, t2 + ) elif op == OP.SUB: - res_grad = SUB.backward[tensor_id, ug_shape, t1_shape, t2_shape](ug, t1, t2) + res_grad = SUB.backward[tensor_id, ug_shape, t1_shape, t2_shape]( + ug, t1, t2 + ) elif op == OP.MUL: - res_grad = MUL.backward[tensor_id, ug_shape, t1_shape, t2_shape](ug, t1, t2) + res_grad = MUL.backward[tensor_id, ug_shape, t1_shape, t2_shape]( + ug, t1, t2 + ) elif op == OP.DIV: - res_grad = DIV.backward[tensor_id, ug_shape, t1_shape, t2_shape](ug, t1, t2) + res_grad = DIV.backward[tensor_id, ug_shape, t1_shape, t2_shape]( + ug, t1, t2 + ) elif op == OP.POW: - res_grad = POW.backward[tensor_id, ug_shape, t1_shape, t2_shape](ug, t1, t2) + res_grad = POW.backward[tensor_id, ug_shape, t1_shape, t2_shape]( + ug, t1, t2 + ) elif op == OP.DOT: - res_grad = DOT.backward[tensor_id, ug_shape, t1_shape, t2_shape](ug, t1, t2) + res_grad = DOT.backward[tensor_id, ug_shape, t1_shape, t2_shape]( + ug, t1, t2 + ) else: print("[ERROR] Operator not found.") res_grad = Tensor[dtype](-1, -1) @@ -449,9 +489,9 @@ fn backward_op[ tensor_id, ug_shape, t1_shape, t2_shape, t3_shape, attributes ](ug, t1, t2, t3) elif op == OP.FMA: - res_grad = FMA.backward[tensor_id, ug_shape, t1_shape, t2_shape, t3_shape]( - ug, t1, t2, t3 - ) + res_grad = FMA.backward[ + tensor_id, ug_shape, t1_shape, t2_shape, t3_shape + ](ug, t1, t2, t3) else: print("[ERROR] Operator not found.") res_grad = Tensor[dtype](-1, -1) @@ -467,7 +507,7 @@ fn backward_op[ inputs: List[Symbol], outputs: List[Symbol], inout grad: Tensor[dtype], - parameters: Parameters, + inout parameters: Parameters, ): """ Backward pass for dynamic operators. @@ -475,9 +515,13 @@ fn backward_op[ var res_grad: Tensor[dtype] if op == OP.CONCAT: - res_grad = CONCAT.backward[input_id, attributes](inputs, outputs, parameters) + res_grad = CONCAT.backward[input_id, attributes]( + inputs, outputs, parameters + ) elif op == OP.SPLIT: - res_grad = SPLIT.backward[input_id, attributes](inputs, outputs, parameters) + res_grad = SPLIT.backward[input_id, attributes]( + inputs, outputs, parameters + ) else: print("[ERROR] Operator not found.") res_grad = Tensor[dtype](-1, -1) diff --git a/basalt/autograd/ops/pool.mojo b/basalt/autograd/ops/pool.mojo index 5e92740..3149cc1 100644 --- a/basalt/autograd/ops/pool.mojo +++ b/basalt/autograd/ops/pool.mojo @@ -1,4 +1,4 @@ -from math.limit import neginf +from utils.numerics import min_or_neg_inf from basalt import Tensor, TensorShape from basalt.autograd.attributes import AttributeVector @@ -48,7 +48,7 @@ struct MAXPOOL2D: for in_ch in range(input_shape[1]): for x in range(output_shape[2]): for y in range(output_shape[3]): - var max_val: Scalar[dtype] = neginf[dtype]() + var max_val: Scalar[dtype] = min_or_neg_inf[dtype]() var ix_base = x * stride[0] - padding[0] var iy_base = y * stride[1] - padding[1] for kx in range(kernel_size[0]): @@ -107,7 +107,7 @@ struct MAXPOOL2D: for in_ch in range(input_shape[1]): for x in range(ug_shape[2]): for y in range(ug_shape[3]): - var max_val: Scalar[dtype] = neginf[dtype]() + var max_val: Scalar[dtype] = min_or_neg_inf[dtype]() var max_idx: Int = -1 var ix_base = x * stride[0] - padding[0] var iy_base = y * stride[1] - padding[1] diff --git a/basalt/nn/__init__.mojo b/basalt/nn/__init__.mojo index 9c994a4..d85ab27 100644 --- a/basalt/nn/__init__.mojo +++ b/basalt/nn/__init__.mojo @@ -7,4 +7,11 @@ from .layers.pool import MaxPool2d from .layers.upsample import Upsample from .loss import MSELoss, CrossEntropyLoss -from .activations import Softmax, LogSoftmax, ReLU, Sigmoid, Tanh +from .activations import ( + Softmax, + LogSoftmax, + ReLU, + LeakyReLU, + Sigmoid, + Tanh, +) diff --git a/basalt/nn/activations.mojo b/basalt/nn/activations.mojo index 2264a54..9a83a0f 100644 --- a/basalt/nn/activations.mojo +++ b/basalt/nn/activations.mojo @@ -2,13 +2,22 @@ from basalt import Tensor, TensorShape from basalt import Graph, Symbol, OP from basalt.autograd.attributes import Attribute, AttributeVector -# '''Activation functions.''' - +# '''Activation functions.''' fn ReLU(inout g: Graph, input: Symbol) -> Symbol: return g.op(OP.RELU, input) +fn LeakyReLU( + inout g: Graph, input: Symbol, negative_slope: Scalar[dtype] +) -> Symbol: + return g.op( + OP.LEAKYRELU, + input, + attributes=AttributeVector(Attribute("negative_slope", negative_slope)), + ) + + fn Sigmoid(inout g: Graph, input: Symbol) -> Symbol: return g.op(OP.SIGMOID, input) diff --git a/basalt/nn/model.mojo b/basalt/nn/model.mojo index ed80c7e..a8993cf 100644 --- a/basalt/nn/model.mojo +++ b/basalt/nn/model.mojo @@ -80,7 +80,7 @@ struct Model[ # TODO: remove when ability to concatenate graphs (modules) # Removes the need for splitting in forward and inference mode - fn forward(inout self, *t_inputs: Tensor[dtype]) -> Tensor[dtype]: + fn forward(inout self, *t_inputs: Tensor[dtype]) -> ref[__lifetime_of(self)] Tensor[dtype]: # NOTE: Important detail here is that the order of the inputs must be the same as the order the inputs were defined in the graph. # Example: If you were te define the y_true before the x when creating the graph # @@ -117,7 +117,7 @@ struct Model[ # 2. Loop over all nodes and execute forward operations @parameter - fn fw_unroll[i: Int](): + for i in range(num_nodes): alias op = g.nodes[i].operator alias attrs = g.nodes[i].attributes @@ -169,8 +169,6 @@ struct Model[ if DEBUG == 1: self.perf_metrics.end_forward_pass(i) - unroll[fw_unroll, num_nodes]() - fn backward(inout self, *upper_grads: Tensor[dtype]): """ Main entrypoint of backward pass. @@ -191,7 +189,7 @@ struct Model[ # 2. Loop over all nodes in reverse order and execute backward operations @parameter - fn bw_unroll[i: Int](): + for i in range(g.nodes.size): alias reverse_i = g.nodes.size - i - 1 alias op = g.nodes[reverse_i].operator alias attrs = g.nodes[reverse_i].attributes @@ -206,7 +204,7 @@ struct Model[ if op.dynamic: @parameter - fn unroll_dynamic[j: Int](): + for j in range(num_operands): @parameter if g.nodes[reverse_i].inputs[j].trainable: backward_op[j, op, attrs]( @@ -215,9 +213,6 @@ struct Model[ self.parameters.grads[g.nodes[reverse_i].inputs[j]], self.parameters, ) - - unroll[unroll_dynamic, num_operands]() - else: # Statically known shapes and number of operands alias out = g.nodes[reverse_i].outputs[0] # or upper_grad symbol @@ -302,8 +297,6 @@ struct Model[ if DEBUG == 1: self.perf_metrics.end_backward_pass(i) - unroll[bw_unroll, g.nodes.size]() - fn allocate_tensor_memory(inout self): for i in range(len(g.inputs)): self.parameters.tensors.append( @@ -375,7 +368,7 @@ struct Model[ except e: print("Error loading model data:", e) - fn export_model(self, model_path: String): + fn export_model(inout self, model_path: String): var path = Path(model_path) print("Exporting model to:", path) diff --git a/basalt/nn/optim.mojo b/basalt/nn/optim.mojo index 1ba90f2..db6210a 100644 --- a/basalt/nn/optim.mojo +++ b/basalt/nn/optim.mojo @@ -1,9 +1,10 @@ -from math import add, mul, div, sqrt, sub +from math import sqrt from algorithm import vectorize, parallelize from .model import Parameters from basalt import Graph, Tensor, TensorShape from basalt.utils.collection import Collection +from basalt.utils.math_util import add, sub, mul, div fn get_trainable_parameters(g: Graph) -> List[Symbol]: @@ -20,13 +21,14 @@ fn get_trainable_parameters(g: Graph) -> List[Symbol]: return trainable_parameters ^ +@value struct Adam[ + lifetime: MutableLifetime, # using mutability and anylifetime, seems to give problem for now because the the reference can't now for sure if the lifetime is mutable or not + //, g: Graph, - mutability: __mlir_type.i1, - lifetime: AnyLifetime[mutability].type, trainable_parameters: List[Symbol] = get_trainable_parameters(g), ]: - var parameters: Reference[Parameters, mutability, lifetime] + var parameters: Reference[Parameters, True, lifetime] var lr: Scalar[dtype] var beta1: Scalar[dtype] @@ -39,7 +41,7 @@ struct Adam[ fn __init__( inout self, - parameters: Reference[Parameters, mutability, lifetime], + parameters: Reference[Parameters, True, lifetime], lr: Scalar[dtype] = 0.001, beta1: Scalar[dtype] = 0.9, beta2: Scalar[dtype] = 0.999, diff --git a/basalt/nn/tensor.mojo b/basalt/nn/tensor.mojo index b3fa551..63fb02a 100644 --- a/basalt/nn/tensor.mojo +++ b/basalt/nn/tensor.mojo @@ -1,4 +1,3 @@ -from math import min from testing import assert_true from algorithm import vectorize @@ -14,40 +13,34 @@ struct TensorShape(Stringable): var _rank: Int var _shape: StaticIntTuple[MAX_RANK] - @always_inline("nodebug") fn __init__(inout self, *shape: Int): self._rank = len(shape) self._shape = StaticIntTuple[MAX_RANK]() for i in range(min(self._rank, MAX_RANK)): self._shape[i] = shape[i] - @always_inline("nodebug") fn __init__(inout self, shapes: VariadicList[Int]): self._rank = len(shapes) self._shape = StaticIntTuple[MAX_RANK]() for i in range(min(self._rank, MAX_RANK)): self._shape[i] = shapes[i] - @always_inline("nodebug") fn __init__(inout self, shape: List[Int]): self._rank = len(shape) self._shape = StaticIntTuple[MAX_RANK]() for i in range(min(self._rank, MAX_RANK)): self._shape[i] = shape[i] - @always_inline("nodebug") fn __init__[num: Int](inout self, shape: StaticIntTuple[num]): self._rank = num self._shape = StaticIntTuple[MAX_RANK]() for i in range(min(self._rank, MAX_RANK)): self._shape[i] = shape[i] - @always_inline("nodebug") fn __init__(inout self, rank: Int, shape: StaticIntTuple[MAX_RANK]): self._rank = rank self._shape = shape - @always_inline("nodebug") fn __init__(inout self, owned shape: _TensorShape): self._rank = shape.rank() self._shape = StaticIntTuple[MAX_RANK]() @@ -117,19 +110,16 @@ struct Tensor[dtype: DType](Stringable, Movable, CollectionElement): var _data: DTypePointer[dtype] var _shape: TensorShape - @always_inline("nodebug") fn __init__(inout self, *dims: Int): self._shape = TensorShape(dims) self._data = DTypePointer[dtype].alloc(self._shape.num_elements()) memset_zero(self._data, self._shape.num_elements()) - @always_inline("nodebug") fn __init__(inout self, owned shape: TensorShape): self._data = DTypePointer[dtype].alloc(shape.num_elements()) memset_zero(self._data, shape.num_elements()) self._shape = shape - @always_inline("nodebug") fn __init__( inout self, owned data: DTypePointer[dtype], owned shape: TensorShape ): @@ -140,20 +130,17 @@ struct Tensor[dtype: DType](Stringable, Movable, CollectionElement): memcpy(self._data, data, self._shape.num_elements()) _ = data - @always_inline("nodebug") fn __init__(inout self, owned tensor: _Tensor[dtype]): self._data = DTypePointer[dtype].alloc(tensor.num_elements()) self._shape = tensor.shape() - memcpy(self._data, tensor.data(), self._shape.num_elements()) + memcpy(self._data, tensor.unsafe_ptr(), self._shape.num_elements()) _ = tensor - @always_inline("nodebug") fn __moveinit__(inout self, owned other: Tensor[dtype]): self._data = other._data self._shape = other._shape - @always_inline("nodebug") fn __copyinit__(inout self, other: Tensor[dtype]): # print("[WARNING] Copying tensor") self._data = DTypePointer[dtype].alloc(other._shape.num_elements()) diff --git a/basalt/utils/bytes.mojo b/basalt/utils/bytes.mojo index 498851b..8125a30 100644 --- a/basalt/utils/bytes.mojo +++ b/basalt/utils/bytes.mojo @@ -1,5 +1,6 @@ from math import nan -from math.limit import inf +from utils.numerics import inf +from utils.static_tuple import StaticTuple alias ScalarBytes = DType.uint64.sizeof() @@ -12,22 +13,18 @@ struct Bytes[capacity: Int](Stringable, CollectionElement, EqualityComparable): var data: StaticTuple[UInt8, capacity] - @always_inline("nodebug") fn __init__(inout self): - var data = StaticTuple[UInt8, capacity]() + var data = StaticTuple[UInt8, capacity](0) - @unroll for i in range(capacity): data[i] = 0 self.data = data - @always_inline("nodebug") fn __init__(inout self, s: String): - var data = StaticTuple[UInt8, capacity]() + var data = StaticTuple[UInt8, capacity](0) var length = len(s) - @unroll for i in range(capacity): data[i] = ord(s[i]) if i < length else 0 @@ -47,7 +44,6 @@ struct Bytes[capacity: Int](Stringable, CollectionElement, EqualityComparable): @always_inline("nodebug") fn __eq__(self, other: Self) -> Bool: - @unroll for i in range(capacity): if self[i] != other[i]: return False @@ -55,7 +51,6 @@ struct Bytes[capacity: Int](Stringable, CollectionElement, EqualityComparable): @always_inline("nodebug") fn __ne__(self, other: Self) -> Bool: - @unroll for i in range(capacity): if self[i] != other[i]: return True @@ -65,7 +60,6 @@ struct Bytes[capacity: Int](Stringable, CollectionElement, EqualityComparable): fn __str__(self) -> String: var result: String = "" - @unroll for i in range(capacity): var val = self[i] if val != 0: @@ -82,7 +76,6 @@ fn scalar_to_bytes[ var bits = bitcast[DType.uint64](value.cast[expand_type[dtype]()]()) var data = Bytes[Size]() - @unroll for i in range(ScalarBytes): data[i] = (bits >> (i << 3)).cast[DType.uint8]() @@ -94,7 +87,6 @@ fn bytes_to_scalar[dtype: DType](data: Bytes) -> Scalar[dtype]: var bits: UInt64 = 0 - @unroll for i in range(ScalarBytes): bits |= data[i].cast[DType.uint64]() << (i << 3) diff --git a/basalt/utils/collection.mojo b/basalt/utils/collection.mojo index 16474a5..1528844 100644 --- a/basalt/utils/collection.mojo +++ b/basalt/utils/collection.mojo @@ -1,4 +1,3 @@ -from math import max, divmod from memory.unsafe_pointer import UnsafePointer, initialize_pointee_move, destroy_pointee from basalt import Tensor, Symbol @@ -133,20 +132,17 @@ struct Collection(CollectionElement, Sized): return -1 - @always_inline("nodebug") - fn __refitem__[ - mutability: __mlir_type.i1, - lifetime: AnyLifetime[mutability].type, - ]( - self: Reference[Self, mutability, lifetime]._mlir_type, + fn __getitem__( + inout self, symbol: Symbol, - ) -> Reference[Tensor[dtype], mutability, lifetime]: + ) -> ref[__lifetime_of(self)] Tensor[dtype]: """ Returns a reference to the tensor with the given symbol. """ - var index = Reference(self)[].get_index(symbol.name) + var index = self.get_index(symbol.name) + - return (Reference(self)[].data + index)[] + return (self.data + index)[0] @always_inline("nodebug") fn clear(inout self): diff --git a/basalt/utils/datasets.mojo b/basalt/utils/datasets.mojo index cb019ae..ff5b356 100644 --- a/basalt/utils/datasets.mojo +++ b/basalt/utils/datasets.mojo @@ -1,11 +1,15 @@ from algorithm import vectorize -from math import div from basalt import dtype from basalt import Tensor, TensorShape from basalt.utils.tensorutils import elwise_op, tmean, tstd +@always_inline +fn div[dtype: DType, simd_width: Int](a: SIMD[dtype, simd_width], b: Scalar[dtype]) -> SIMD[dtype, simd_width]: + return a / b + + struct BostonHousing: alias n_inputs = 13 diff --git a/basalt/utils/itertools.mojo b/basalt/utils/itertools.mojo index fd7a6ce..2b7d3ab 100644 --- a/basalt/utils/itertools.mojo +++ b/basalt/utils/itertools.mojo @@ -36,7 +36,7 @@ struct _ProductIterator(Sized): var index = count % len(self.lists[i]) combination.append(self.lists[i][index]) count //= len(self.lists[i]) - combination._reverse() + combination.reverse() return combination ^ @always_inline("nodebug") diff --git a/basalt/utils/math_util.mojo b/basalt/utils/math_util.mojo new file mode 100644 index 0000000..faeab90 --- /dev/null +++ b/basalt/utils/math_util.mojo @@ -0,0 +1,41 @@ +@always_inline +fn add[ + dtype: DType, simd_width: Int +](a: SIMD[dtype, simd_width], b: SIMD[dtype, simd_width]) -> SIMD[ + dtype, simd_width +]: + return a + b + + +@always_inline +fn sub[ + dtype: DType, simd_width: Int +](a: SIMD[dtype, simd_width], b: SIMD[dtype, simd_width]) -> SIMD[ + dtype, simd_width +]: + return a - b + + +@always_inline +fn mul[ + dtype: DType, simd_width: Int +](a: SIMD[dtype, simd_width], b: SIMD[dtype, simd_width]) -> SIMD[ + dtype, simd_width +]: + return a * b + + +@always_inline +fn div[ + dtype: DType, simd_width: Int +](a: SIMD[dtype, simd_width], b: SIMD[dtype, simd_width]) -> SIMD[ + dtype, simd_width +]: + return a / b + + +@always_inline +fn round_simd[ + dtype: DType, simd_width: Int +](x: SIMD[dtype, simd_width]) -> SIMD[dtype, simd_width]: + return round(x) diff --git a/basalt/utils/onnx_utils.mojo b/basalt/utils/onnx_utils.mojo index 3e67164..fde4d90 100644 --- a/basalt/utils/onnx_utils.mojo +++ b/basalt/utils/onnx_utils.mojo @@ -163,9 +163,9 @@ fn load_onnx_model( "Shape mismatch for tensor " + str(i) + ". Expected shape: " - + model_tensor_shape + + str(model_tensor_shape) + ", got shape: " - + data_shape + + str(data_shape) ) copy_np_data(model_parameters.tensors[g.params.symbols[i]], data_np) @@ -250,7 +250,7 @@ fn create_attributes_and_constant_inputs(node: Node, node_number: Int) raises -> return (attributes, inputs) -fn export_onnx_model(model_path: Path, model_parameters: Parameters, g: Graph) raises: +fn export_onnx_model(model_path: Path, inout model_parameters: Parameters, g: Graph) raises: # Create onnx model with data and nodes var onnx = Python.import_module("onnx") var onnx_helper = Python.import_module("onnx.helper") @@ -289,7 +289,7 @@ fn export_onnx_model(model_path: Path, model_parameters: Parameters, g: Graph) r var op_type = make_onnx_operator_type(node.operator) var inputs = PythonObject([]) var outputs = PythonObject([]) - var name = str(node.operator) + "_node" + i + var name = str(node.operator) + "_node" + str(i) for j in range(len(node.inputs)): inputs.append(str(node.inputs[j].name)) diff --git a/basalt/utils/perf_utils.mojo b/basalt/utils/perf_utils.mojo index 9cf076c..bacd940 100644 --- a/basalt/utils/perf_utils.mojo +++ b/basalt/utils/perf_utils.mojo @@ -1,5 +1,4 @@ from time import now -from math import min from memory import memset from basalt.autograd.node import Node @@ -7,10 +6,10 @@ from basalt.autograd.node import Node @always_inline("nodebug") fn fit_string[num: Int](s: String) -> String: - var data = DTypePointer[DType.int8]().alloc(num + 1) + var data = DTypePointer[DType.uint8]().alloc(num + 1) var copy_len = min(num, len(s)) - memcpy(data, s._as_ptr(), copy_len) + memcpy(data, s.unsafe_uint8_ptr(), copy_len) memset(data + copy_len, ord(" "), num - copy_len) data[num] = 0 @@ -20,11 +19,11 @@ fn fit_string[num: Int](s: String) -> String: @always_inline("nodebug") fn truncate_decimals[num: Int](s: String) -> String: try: - var parts = s.split(delimiter=".") + var parts = s.split(".") var truncated = parts[0] if len(parts) > 1: - var decimal_parts = parts[1].split(delimiter="e") + var decimal_parts = parts[1].split("e") truncated += "." + fit_string[num](decimal_parts[0]) if len(decimal_parts) > 1: @@ -125,7 +124,7 @@ struct PerfMetrics: print(header) var header_length = len(header) - var seperator = DTypePointer[DType.int8]().alloc(header_length + 1) + var seperator = DTypePointer[DType.uint8]().alloc(header_length + 1) memset(seperator, ord("-"), header_length) seperator[header_length] = 0 @@ -146,11 +145,11 @@ struct PerfMetrics: var print_value = ( fit_string[5](str(i)) + "| " - + fit_string[15](value.node.operator) + + fit_string[15](str(value.node.operator)) + "| " - + fit_string[20](truncate_decimals[4](time)) + + fit_string[20](truncate_decimals[4](str(time))) + "| " - + fit_string[20](truncate_decimals[3](percentage) + " %") + + fit_string[20](truncate_decimals[3](str(percentage)) + " %") + "| " ) diff --git a/basalt/utils/rand_utils.mojo b/basalt/utils/rand_utils.mojo index 69fd80f..84b1925 100644 --- a/basalt/utils/rand_utils.mojo +++ b/basalt/utils/rand_utils.mojo @@ -1,6 +1,7 @@ from basalt import Tensor from random import rand, randn from algorithm import vectorize +from utils.static_tuple import StaticTuple @always_inline @@ -71,4 +72,4 @@ struct MersenneTwister: return y fn next_ui8(inout self) -> UInt8: - return self.next().value & 0xFF + return self.next().value & int(0xFF) diff --git a/basalt/utils/tensor_creation_utils.mojo b/basalt/utils/tensor_creation_utils.mojo index 375279a..7662331 100644 --- a/basalt/utils/tensor_creation_utils.mojo +++ b/basalt/utils/tensor_creation_utils.mojo @@ -39,12 +39,14 @@ fn to_tensor(np_array: PythonObject) raises -> Tensor[dtype]: var tensor = Tensor[dtype](TensorShape(shape)) - var np_array_2 = np_array.copy() + var np_array_2: PythonObject try: var np = Python.import_module("numpy") - np_array_2 = np.float32(np_array_2) + # copy is also necessary for ops like slices to make them contiguous instead of references. + np_array_2 = np.float32(np_array.copy()) except e: - print("Error in to tensor", e) + np_array_2 = np_array.copy() + print("Error in to_tensor", e) var pointer = int(np_array_2.__array_interface__["data"][0].to_float64()) var pointer_d = DTypePointer[tensor.dtype](address=pointer) @@ -56,13 +58,15 @@ fn to_tensor(np_array: PythonObject) raises -> Tensor[dtype]: return tensor^ -fn copy_np_data(tensor: Tensor, np_array: PythonObject) raises: - var np_array_2 = np_array.copy() +fn copy_np_data(inout tensor: Tensor, np_array: PythonObject) raises: + var np_array_2: PythonObject try: var np = Python.import_module("numpy") - np_array_2 = np.float32(np_array_2) + # copy is also necessary for ops like slices to make them contiguous instead of references. + np_array_2 = np.float32(np_array.copy()) except e: - print("Error in to tensor", e) + np_array_2 = np_array.copy() + print("Error in to_tensor", e) var pointer = int(np_array_2.__array_interface__["data"][0].to_float64()) var pointer_d = DTypePointer[tensor.dtype](address=pointer) diff --git a/basalt/utils/tensorutils.mojo b/basalt/utils/tensorutils.mojo index b3b70de..420ae5e 100644 --- a/basalt/utils/tensorutils.mojo +++ b/basalt/utils/tensorutils.mojo @@ -1,13 +1,17 @@ from sys.info import num_physical_cores -from algorithm import vectorize, parallelize, swap +from algorithm import vectorize, parallelize from memory import memset_zero, memset, stack_allocation -from math import sqrt, pow, equal, max, min, add, div, divmod, abs +from math import sqrt from random import rand +from utils.numerics import min_finite, max_finite from basalt import Tensor, TensorShape from basalt.nn.tensor import MAX_RANK +from basalt.utils.math_util import add, sub, mul, div +# ---- Start ----- + @always_inline fn fill[dtype: DType](inout t: Tensor[dtype], val: Scalar[dtype]): @parameter @@ -472,13 +476,13 @@ fn _reduce_max[ @always_inline fn tmax(t: Tensor[dtype]) -> Scalar[dtype]: - var starting_value = math.limit.min_finite[dtype]() + var starting_value = min_finite[dtype]() return reduce[max, _reduce_max](t, starting_value) @always_inline fn tmax(inout res: Tensor[dtype], t: Tensor[dtype], axis: Int): - var starting_value = math.limit.min_finite[dtype]() + var starting_value = min_finite[dtype]() reduce[max, _reduce_max](res, t, axis, starting_value) diff --git a/examples/yolov8.mojo b/examples/yolov8.mojo index eb3c16c..c23fd99 100644 --- a/examples/yolov8.mojo +++ b/examples/yolov8.mojo @@ -5,7 +5,8 @@ from basalt.autograd.attributes import AttributeVector, Attribute from basalt.utils.tensor_creation_utils import to_tensor, to_numpy from python import Python -from math import ceil, max +from math import ceil +from utils.static_tuple import StaticTuple fn Conv( diff --git a/examples/yolov8_cam.mojo b/examples/yolov8_cam.mojo index 1c8b2c3..2c0cb3d 100644 --- a/examples/yolov8_cam.mojo +++ b/examples/yolov8_cam.mojo @@ -1,7 +1,7 @@ import sys from time.time import now from python.python import Python -from math import max +from utils.static_tuple import StaticTuple from yolov8 import YoloV8, get_constant_values_from_onnx_model diff --git a/tests/mojo/test_activations.mojo b/tests/mojo/test_activations.mojo index 200215d..f2db8e9 100644 --- a/tests/mojo/test_activations.mojo +++ b/tests/mojo/test_activations.mojo @@ -8,6 +8,7 @@ from basalt.nn import ( Softmax, LogSoftmax, ReLU, + LeakyReLU, Sigmoid, Tanh, ) @@ -19,6 +20,9 @@ from tests import assert_tensors_equal alias Activation = fn (inout g: Graph, input: Symbol) -> Symbol alias AxisActivation = fn (inout g: Graph, input: Symbol, axis: Int) -> Symbol +alias LeakyReLUActivation = fn ( + inout g: Graph, input: Symbol, negative_slope: Scalar[dtype] +) -> Symbol fn create_graph[ @@ -30,7 +34,19 @@ fn create_graph[ var x = g.input(shape) var activation = func(g, x, axis) g.out(activation) - return g ^ + return g^ + + +fn create_graph[ + shape: TensorShape, + func: LeakyReLUActivation, + negative_slope: Scalar[dtype], +]() -> Graph: + var g = Graph() + var x = g.input(shape) + var activation = func(g, x, negative_slope) + g.out(activation) + return g^ fn create_graph[shape: TensorShape, func: Activation]() -> Graph: @@ -38,7 +54,7 @@ fn create_graph[shape: TensorShape, func: Activation]() -> Graph: var x = g.input(shape) var activation = func(g, x) g.out(activation) - return g ^ + return g^ fn test_graph[ @@ -56,6 +72,22 @@ fn test_graph[ assert_equal(len(graph.nodes), nodes) +fn test_graph[ + shape: TensorShape, + func: LeakyReLUActivation, + nodes: Int, + negative_slope: Scalar[dtype], +](input: Tensor[dtype], expected: Tensor[dtype]) raises: + alias graph = create_graph[shape, func, negative_slope]() + + var model = Model[graph](inference_only=True) + var res = model.inference(input)[0] + + assert_tensors_equal["almost"](res, expected) + assert_equal(len(graph.nodes), nodes) + + +# TODO: All these overloads feel redundant. Find a way to condense them fn test_graph[ shape: TensorShape, func: Activation, @@ -125,6 +157,25 @@ fn test_RELU() raises: test_graph[shape, ReLU, nodes](input, expected) +fn test_LEAKYRELU() raises: + alias negative_slope = 0.1 + + alias shape = TensorShape(2, 3) + alias nodes = 1 + + var input = Tensor[dtype](shape) + + for i in range(6): + input[i] = i - 3 + + var expected = Tensor[dtype](shape) + + for i in range(6): + expected[i] = i - 3 if i - 3 > 0 else negative_slope * (i - 3) + + test_graph[shape, LeakyReLU, nodes, negative_slope](input, expected) + + fn test_SIGMOID() raises: alias shape = TensorShape(2, 3) alias nodes = 1 @@ -156,6 +207,7 @@ fn main(): test_SOFTMAX() test_LOGSOFTMAX() test_RELU() + test_LEAKYRELU() test_SIGMOID() test_TANH() except e: diff --git a/tests/mojo/test_mlops.mojo b/tests/mojo/test_mlops.mojo index 045ae7b..07bdde5 100644 --- a/tests/mojo/test_mlops.mojo +++ b/tests/mojo/test_mlops.mojo @@ -1,11 +1,24 @@ from basalt import dtype, nelts from basalt.autograd import OP from basalt.autograd.attributes import AttributeVector, Attribute -from basalt.autograd.ops.mlops import SIGMOID, RELU, TANH, CLIP, SQUEEZE, UNSQUEEZE +from basalt.autograd.ops.mlops import ( + SIGMOID, + RELU, + LEAKYRELU, + TANH, + CLIP, + SQUEEZE, + UNSQUEEZE, +) from basalt.nn import Tensor, TensorShape from basalt.utils.tensorutils import fill -from tests import assert_tensors_equal, test_unary_op, test_unary_op_backward, to_numpy +from tests import ( + assert_tensors_equal, + test_unary_op, + test_unary_op_backward, + to_numpy, +) fn test_SIGMOID() raises: @@ -30,7 +43,9 @@ fn test_backward_SIGMOID() raises: expected_grad, 5.0 * 0.25 ) # 0.25 = d(sigmoid(0))/dx = sigmoid(0) * (1 - sigmoid(0)) - test_unary_op_backward[OP.SIGMOID, t1_shape, ug_shape](t1, ug, expected_grad) + test_unary_op_backward[OP.SIGMOID, t1_shape, ug_shape]( + t1, ug, expected_grad + ) fn test_RELU() raises: @@ -71,6 +86,53 @@ fn test_backward_RELU() raises: test_unary_op_backward[OP.RELU, t1_shape, ug_shape](t1, ug, expected_grad) +fn test_LEAKYRELU() raises: + alias t1_shape = TensorShape(2, 3) + var t1: Tensor[dtype] = Tensor[dtype](t1_shape) + # TODO: When tensors can do slices, this could be changed to two fill functions. + for i in range(3): + t1[i] = 3 + for i in range(3, 6): + t1[i] = -3 + + var expected = Tensor[dtype](2, 3) + for i in range(3): + expected[i] = 3 + for i in range(3, 6): + expected[i] = -0.3 + + test_unary_op[ + OP.LEAKYRELU, + t1_shape, + AttributeVector(Attribute("negative_slope", 0.1)), + ](t1, expected) + + +fn test_backward_LEAKYRELU() raises: + alias t1_shape = TensorShape(2, 3) + alias ug_shape = TensorShape(2, 3) + var t1: Tensor[dtype] = Tensor[dtype](t1_shape) + var ug: Tensor[dtype] = Tensor[dtype](ug_shape) + for i in range(3): + t1[i] = 3 + for i in range(3, 6): + t1[i] = -3 + fill(ug, 5.0) + + var expected_grad = Tensor[dtype](2, 3) + for i in range(3): + expected_grad[i] = 1 * 5.0 + for i in range(3, 6): + expected_grad[i] = 0.1 * 5.0 + + test_unary_op_backward[ + OP.LEAKYRELU, + t1_shape, + ug_shape, + AttributeVector(Attribute("negative_slope", 0.1)), + ](t1, ug, expected_grad) + + fn test_TANH() raises: alias t1_shape = TensorShape(2, 3) var t1: Tensor[dtype] = Tensor[dtype](t1_shape) @@ -110,7 +172,9 @@ fn test_CLIP() raises: for i in range(6): var val = Scalar[dtype](i - 3) expected_min[i] = val if (val > -1.1) else -1.1 - test_unary_op[OP.CLIP, t1_shape, AttributeVector(min_attr)](t1, expected_min) + test_unary_op[OP.CLIP, t1_shape, AttributeVector(min_attr)]( + t1, expected_min + ) # Clip with max alias max_attr = Attribute("max", 1.1) @@ -118,7 +182,9 @@ fn test_CLIP() raises: for i in range(6): var val = Scalar[dtype](i - 3) expected_max[i] = val if (val < 1.1) else 1.1 - test_unary_op[OP.CLIP, t1_shape, AttributeVector(max_attr)](t1, expected_max) + test_unary_op[OP.CLIP, t1_shape, AttributeVector(max_attr)]( + t1, expected_max + ) # Clip with min and max var expected = Tensor[dtype](2, 3) @@ -130,7 +196,9 @@ fn test_CLIP() raises: expected[i] = 1.1 else: expected[i] = val - test_unary_op[OP.CLIP, t1_shape, AttributeVector(min_attr, max_attr)](t1, expected) + test_unary_op[OP.CLIP, t1_shape, AttributeVector(min_attr, max_attr)]( + t1, expected + ) fn test_backward_CLIP() raises: @@ -152,7 +220,9 @@ fn test_backward_CLIP() raises: for i in range(6): var val = Scalar[dtype](i - 3) expected_min[i] = 5.0 if (val > -1.1) else 0.0 - test_unary_op_backward[OP.CLIP, t1_shape, ug_shape, min_attr](t1, ug, expected_min) + test_unary_op_backward[OP.CLIP, t1_shape, ug_shape, min_attr]( + t1, ug, expected_min + ) # Clip with max alias max_attr = AttributeVector(Attribute("max", 1.1)) @@ -160,7 +230,9 @@ fn test_backward_CLIP() raises: for i in range(6): var val = Scalar[dtype](i - 3) expected_max[i] = 5.0 if (val < 1.1) else 0.0 - test_unary_op_backward[OP.CLIP, t1_shape, ug_shape, max_attr](t1, ug, expected_max) + test_unary_op_backward[OP.CLIP, t1_shape, ug_shape, max_attr]( + t1, ug, expected_max + ) # Clip with min and max alias attrs = AttributeVector(Attribute("min", -1.1), Attribute("max", 1.1)) @@ -201,7 +273,9 @@ fn test_SQUEEZE() raises: expected = Tensor[dtype](1, 2, 3) fill(expected, 5.0) test_unary_op[ - OP.SQUEEZE, t1_shape, AttributeVector(Attribute("dims", TensorShape(2, 4))) + OP.SQUEEZE, + t1_shape, + AttributeVector(Attribute("dims", TensorShape(2, 4))), ](t1, expected) @@ -216,7 +290,9 @@ fn test_backward_SQUEEZE() raises: var expected_grad = Tensor[dtype](2, 1, 3, 1) fill(expected_grad, 5.0) - test_unary_op_backward[OP.SQUEEZE, t1_shape, ug_shape](t1, ug, expected_grad) + test_unary_op_backward[OP.SQUEEZE, t1_shape, ug_shape]( + t1, ug, expected_grad + ) fn test_UNSQUEEZE() raises: @@ -228,26 +304,34 @@ fn test_UNSQUEEZE() raises: var expected = Tensor[dtype](2, 1, 3, 1) fill(expected, 5.0) test_unary_op[ - OP.UNSQUEEZE, t1_shape, AttributeVector(Attribute("dims", TensorShape(1, 3))) + OP.UNSQUEEZE, + t1_shape, + AttributeVector(Attribute("dims", TensorShape(1, 3))), ](t1, expected) expected = Tensor[dtype](2, 1, 3) fill(expected, 5.0) test_unary_op[ - OP.UNSQUEEZE, t1_shape, AttributeVector(Attribute("dims", TensorShape(1))) + OP.UNSQUEEZE, + t1_shape, + AttributeVector(Attribute("dims", TensorShape(1))), ](t1, expected) expected = Tensor[dtype](1, 2, 3) fill(expected, 5.0) test_unary_op[ - OP.UNSQUEEZE, t1_shape, AttributeVector(Attribute("dims", TensorShape(-3))) + OP.UNSQUEEZE, + t1_shape, + AttributeVector(Attribute("dims", TensorShape(-3))), ](t1, expected) expected = Tensor[dtype](2, 1, 3, 1) fill(expected, 5.0) test_unary_op[ - OP.UNSQUEEZE, t1_shape, AttributeVector(Attribute("dims", TensorShape(-1, -3))) + OP.UNSQUEEZE, + t1_shape, + AttributeVector(Attribute("dims", TensorShape(-1, -3))), ](t1, expected) @@ -262,7 +346,9 @@ fn test_backward_UNSQUEEZE() raises: var expected_grad = Tensor[dtype](2, 3) fill(expected_grad, 5.0) - test_unary_op_backward[OP.UNSQUEEZE, t1_shape, ug_shape](t1, ug, expected_grad) + test_unary_op_backward[OP.UNSQUEEZE, t1_shape, ug_shape]( + t1, ug, expected_grad + ) fn test_SLICE() raises: @@ -270,7 +356,7 @@ fn test_SLICE() raises: var t1: Tensor[dtype] = Tensor[dtype](t1_shape) for i in range(t1.num_elements()): t1[i] = i - + alias slice = Slice(1, 3, 1) # dim = 0 @@ -278,15 +364,17 @@ fn test_SLICE() raises: for i in range(2): for j in range(4): for k in range(5): - expected_0[i*4*5 + j*5 + k] = (i + 1) * 4 * 5 + j * 5 + k + expected_0[i * 4 * 5 + j * 5 + k] = (i + 1) * 4 * 5 + j * 5 + k test_unary_op[ - OP.SLICE, t1_shape, AttributeVector( + OP.SLICE, + t1_shape, + AttributeVector( Attribute("starts", TensorShape(slice.start)), Attribute("ends", TensorShape(slice.end)), Attribute("steps", TensorShape(slice.step)), - Attribute("axes", TensorShape(0)) - ) + Attribute("axes", TensorShape(0)), + ), ](t1, expected_0) # dim = 1 @@ -294,15 +382,17 @@ fn test_SLICE() raises: for i in range(3): for j in range(2): for k in range(5): - expected_1[i*2*5 + j*5 + k] = i * 4 * 5 + (j + 1) * 5 + k + expected_1[i * 2 * 5 + j * 5 + k] = i * 4 * 5 + (j + 1) * 5 + k test_unary_op[ - OP.SLICE, t1_shape, AttributeVector( + OP.SLICE, + t1_shape, + AttributeVector( Attribute("starts", TensorShape(slice.start)), Attribute("ends", TensorShape(slice.end)), Attribute("steps", TensorShape(slice.step)), - Attribute("axes", TensorShape(1)) - ) + Attribute("axes", TensorShape(1)), + ), ](t1, expected_1) # dim = 2 @@ -310,15 +400,17 @@ fn test_SLICE() raises: for i in range(3): for j in range(4): for k in range(2): - expected_2[i*4*2 + j*2 + k] = i * 4 * 5 + j * 5 + (k + 1) - + expected_2[i * 4 * 2 + j * 2 + k] = i * 4 * 5 + j * 5 + (k + 1) + test_unary_op[ - OP.SLICE, t1_shape, AttributeVector( + OP.SLICE, + t1_shape, + AttributeVector( Attribute("starts", TensorShape(slice.start)), Attribute("ends", TensorShape(slice.end)), Attribute("steps", TensorShape(slice.step)), - Attribute("axes", TensorShape(2)) - ) + Attribute("axes", TensorShape(2)), + ), ](t1, expected_2) @@ -335,15 +427,19 @@ fn test_SLICE_step() raises: for i in range(3): for j in range(2): for k in range(2): - expected_0[i*2*2 + j*2 + k] = (i*2 + 1) * 2 * 2 + j * 2 + k + expected_0[i * 2 * 2 + j * 2 + k] = ( + (i * 2 + 1) * 2 * 2 + j * 2 + k + ) test_unary_op[ - OP.SLICE, t0_shape, AttributeVector( + OP.SLICE, + t0_shape, + AttributeVector( Attribute("starts", TensorShape(slice.start)), Attribute("ends", TensorShape(slice.end)), Attribute("steps", TensorShape(slice.step)), - Attribute("axes", TensorShape(0)) - ) + Attribute("axes", TensorShape(0)), + ), ](t0, expected_0) # dim = 1 @@ -356,15 +452,19 @@ fn test_SLICE_step() raises: for i in range(2): for j in range(3): for k in range(2): - expected_1[i*3*2 + j*2 + k] = i * 10 * 2 + (j*2 + 1) * 2 + k + expected_1[i * 3 * 2 + j * 2 + k] = ( + i * 10 * 2 + (j * 2 + 1) * 2 + k + ) test_unary_op[ - OP.SLICE, t1_shape, AttributeVector( + OP.SLICE, + t1_shape, + AttributeVector( Attribute("starts", TensorShape(slice.start)), Attribute("ends", TensorShape(slice.end)), Attribute("steps", TensorShape(slice.step)), - Attribute("axes", TensorShape(1)) - ) + Attribute("axes", TensorShape(1)), + ), ](t1, expected_1) # dim = 2 @@ -377,15 +477,19 @@ fn test_SLICE_step() raises: for i in range(2): for j in range(2): for k in range(3): - expected_2[i*2*3 + j*3 + k] = i * 2 * 10 + j * 10 + (k*2 + 1) + expected_2[i * 2 * 3 + j * 3 + k] = ( + i * 2 * 10 + j * 10 + (k * 2 + 1) + ) test_unary_op[ - OP.SLICE, t2_shape, AttributeVector( + OP.SLICE, + t2_shape, + AttributeVector( Attribute("starts", TensorShape(slice.start)), Attribute("ends", TensorShape(slice.end)), Attribute("steps", TensorShape(slice.step)), - Attribute("axes", TensorShape(2)) - ) + Attribute("axes", TensorShape(2)), + ), ](t2, expected_2) @@ -402,15 +506,19 @@ fn test_SLICE_neg() raises: for i in range(3): for j in range(2): for k in range(2): - expected_0[i*2*2 + j*2 + k] = StaticIntTuple[3](6, 4, 2)[i] * 2 * 2 + j * 2 + k + expected_0[i * 2 * 2 + j * 2 + k] = ( + StaticIntTuple[3](6, 4, 2)[i] * 2 * 2 + j * 2 + k + ) test_unary_op[ - OP.SLICE, t0_shape, AttributeVector( + OP.SLICE, + t0_shape, + AttributeVector( Attribute("starts", TensorShape(slice.start)), Attribute("ends", TensorShape(slice.end)), Attribute("steps", TensorShape(slice.step)), - Attribute("axes", TensorShape(0)) - ) + Attribute("axes", TensorShape(0)), + ), ](t0, expected_0) # dim = 1 @@ -423,15 +531,19 @@ fn test_SLICE_neg() raises: for i in range(2): for j in range(3): for k in range(2): - expected_1[i*3*2 + j*2 + k] = i * 10 * 2 + StaticIntTuple[3](6, 4, 2)[j] * 2 + k + expected_1[i * 3 * 2 + j * 2 + k] = ( + i * 10 * 2 + StaticIntTuple[3](6, 4, 2)[j] * 2 + k + ) test_unary_op[ - OP.SLICE, t1_shape, AttributeVector( + OP.SLICE, + t1_shape, + AttributeVector( Attribute("starts", TensorShape(slice.start)), Attribute("ends", TensorShape(slice.end)), Attribute("steps", TensorShape(slice.step)), - Attribute("axes", TensorShape(1)) - ) + Attribute("axes", TensorShape(1)), + ), ](t1, expected_1) # dim = 2 @@ -444,15 +556,19 @@ fn test_SLICE_neg() raises: for i in range(2): for j in range(2): for k in range(3): - expected_2[i*2*3 + j*3 + k] = i * 2 * 10 + j * 10 + StaticIntTuple[3](6, 4, 2)[k] + expected_2[i * 2 * 3 + j * 3 + k] = ( + i * 2 * 10 + j * 10 + StaticIntTuple[3](6, 4, 2)[k] + ) test_unary_op[ - OP.SLICE, t2_shape, AttributeVector( + OP.SLICE, + t2_shape, + AttributeVector( Attribute("starts", TensorShape(slice.start)), Attribute("ends", TensorShape(slice.end)), Attribute("steps", TensorShape(slice.step)), - Attribute("axes", TensorShape(2)) - ) + Attribute("axes", TensorShape(2)), + ), ](t2, expected_2) @@ -470,22 +586,35 @@ fn test_SLICE_multiple_axes() raises: for i in range(3): for j in range(3): for k in range(5): - expected[i*3*5 + j*5 + k] = StaticIntTuple[5](1, 3, 5, 7, 9)[i] * 32 * 40 + StaticIntTuple[3](3, 6, 9)[j] * 40 + StaticIntTuple[5](5, 7, 9, 11, 13)[k] - + expected[i * 3 * 5 + j * 5 + k] = ( + StaticIntTuple[5](1, 3, 5, 7, 9)[i] * 32 * 40 + + StaticIntTuple[3](3, 6, 9)[j] * 40 + + StaticIntTuple[5](5, 7, 9, 11, 13)[k] + ) + test_unary_op[ - OP.SLICE, t1_shape, AttributeVector( - Attribute("starts", TensorShape(slice_0.start, slice_1.start, slice_2.start)), - Attribute("ends", TensorShape(slice_0.end, slice_1.end, slice_2.end)), - Attribute("steps", TensorShape(slice_0.step, slice_1.step, slice_2.step)), + OP.SLICE, + t1_shape, + AttributeVector( + Attribute( + "starts", + TensorShape(slice_0.start, slice_1.start, slice_2.start), + ), + Attribute( + "ends", TensorShape(slice_0.end, slice_1.end, slice_2.end) + ), + Attribute( + "steps", TensorShape(slice_0.step, slice_1.step, slice_2.step) + ), # Attribute("axes", TensorShape(0, 1, 2)) - ) + ), ](t1, expected) alias t2_shape = TensorShape(20, 32, 40, 50) var t2: Tensor[dtype] = Tensor[dtype](t2_shape) for i in range(t2.num_elements()): t2[i] = i - + alias slice_2_1 = Slice(1, 6, 2) alias slice_2_2 = Slice(3, 10, 3) alias slice_2_3 = Slice(5, 15, 2) @@ -497,14 +626,42 @@ fn test_SLICE_multiple_axes() raises: for j in range(3): for k in range(5): for l in range(4): - expected_2[i*3*5*4 + j*5*4 + k*4 + l] = StaticIntTuple[5](1, 3, 5, 7, 9)[i] * 32 * 40 * 50 + StaticIntTuple[3](3, 6, 9)[j] * 40 * 50 + StaticIntTuple[5](5, 7, 9, 11, 13)[k] * 50 + StaticIntTuple[4](7, 11, 15, 19)[l] - + expected_2[i * 3 * 5 * 4 + j * 5 * 4 + k * 4 + l] = ( + StaticIntTuple[5](1, 3, 5, 7, 9)[i] * 32 * 40 * 50 + + StaticIntTuple[3](3, 6, 9)[j] * 40 * 50 + + StaticIntTuple[5](5, 7, 9, 11, 13)[k] * 50 + + StaticIntTuple[4](7, 11, 15, 19)[l] + ) + test_unary_op[ - OP.SLICE, t2_shape, AttributeVector( - Attribute("starts", TensorShape(slice_2_1.start, slice_2_2.start, slice_2_3.start, slice_2_4.start)), - Attribute("ends", TensorShape(slice_2_1.end, slice_2_2.end, slice_2_3.end, slice_2_4.end)), - Attribute("steps", TensorShape(slice_2_1.step, slice_2_2.step, slice_2_3.step, slice_2_4.step)), - ) + OP.SLICE, + t2_shape, + AttributeVector( + Attribute( + "starts", + TensorShape( + slice_2_1.start, + slice_2_2.start, + slice_2_3.start, + slice_2_4.start, + ), + ), + Attribute( + "ends", + TensorShape( + slice_2_1.end, slice_2_2.end, slice_2_3.end, slice_2_4.end + ), + ), + Attribute( + "steps", + TensorShape( + slice_2_1.step, + slice_2_2.step, + slice_2_3.step, + slice_2_4.step, + ), + ), + ), ](t2, expected_2) @@ -523,15 +680,18 @@ fn test_backward_SLICE() raises: for i in range(2): for j in range(4): for k in range(5): - expected_ug0[(i+1)*4*5 + j*5 + k] = 1.0 + expected_ug0[(i + 1) * 4 * 5 + j * 5 + k] = 1.0 test_unary_op_backward[ - OP.SLICE, t0_shape, ug0_shape, AttributeVector( + OP.SLICE, + t0_shape, + ug0_shape, + AttributeVector( Attribute("starts", TensorShape(slice_0.start)), Attribute("ends", TensorShape(slice_0.end)), Attribute("steps", TensorShape(slice_0.step)), - Attribute("axes", TensorShape(0)) - ) + Attribute("axes", TensorShape(0)), + ), ](t0, ug0, expected_ug0) # dim = 1 (step = 2) @@ -543,20 +703,23 @@ fn test_backward_SLICE() raises: alias ug1_shape = TensorShape(2, 3, 2) var ug1: Tensor[dtype] = Tensor[dtype](ug1_shape) fill(ug1, 1.0) - + var expected_ug1 = Tensor[dtype](t1_shape) for i in range(2): for j in range(3): for k in range(2): - expected_ug1[i*10*2 + (j*2 + 1)*2 + k] = 1.0 + expected_ug1[i * 10 * 2 + (j * 2 + 1) * 2 + k] = 1.0 test_unary_op_backward[ - OP.SLICE, t1_shape, ug1_shape, AttributeVector( + OP.SLICE, + t1_shape, + ug1_shape, + AttributeVector( Attribute("starts", TensorShape(slice_1.start)), Attribute("ends", TensorShape(slice_1.end)), Attribute("steps", TensorShape(slice_1.step)), - Attribute("axes", TensorShape(1)) - ) + Attribute("axes", TensorShape(1)), + ), ](t1, ug1, expected_ug1) # dim = 2 (step = -2) @@ -573,15 +736,20 @@ fn test_backward_SLICE() raises: for i in range(2): for j in range(2): for k in range(3): - expected_ug2[i*2*10 + j*10 + StaticIntTuple[3](6, 4, 2)[k]] = 1.0 + expected_ug2[ + i * 2 * 10 + j * 10 + StaticIntTuple[3](6, 4, 2)[k] + ] = 1.0 test_unary_op_backward[ - OP.SLICE, t2_shape, ug2_shape, AttributeVector( + OP.SLICE, + t2_shape, + ug2_shape, + AttributeVector( Attribute("starts", TensorShape(slice_2.start)), Attribute("ends", TensorShape(slice_2.end)), Attribute("steps", TensorShape(slice_2.step)), - Attribute("axes", TensorShape(2)) - ) + Attribute("axes", TensorShape(2)), + ), ](t2, ug2, expected_ug2) @@ -599,8 +767,12 @@ fn test_backward_SLICE_multiple_axes() raises: for i in range(3): for j in range(3): for k in range(5): - expected[i*3*5 + j*5 + k] = StaticIntTuple[5](1, 3, 5, 7, 9)[i] * 32 * 40 + StaticIntTuple[3](3, 6, 9)[j] * 40 + StaticIntTuple[5](5, 7, 9, 11, 13)[k] - + expected[i * 3 * 5 + j * 5 + k] = ( + StaticIntTuple[5](1, 3, 5, 7, 9)[i] * 32 * 40 + + StaticIntTuple[3](3, 6, 9)[j] * 40 + + StaticIntTuple[5](5, 7, 9, 11, 13)[k] + ) + alias ug_shape = TensorShape(3, 3, 5) var ug: Tensor[dtype] = Tensor[dtype](ug_shape) fill(ug, 1.0) @@ -609,14 +781,28 @@ fn test_backward_SLICE_multiple_axes() raises: for i in range(3): for j in range(3): for k in range(5): - expected_ug[StaticIntTuple[5](1, 3, 5, 7, 9)[i] * 32 * 40 + StaticIntTuple[3](3, 6, 9)[j] * 40 + StaticIntTuple[5](5, 7, 9, 11, 13)[k]] = 1.0 + expected_ug[ + StaticIntTuple[5](1, 3, 5, 7, 9)[i] * 32 * 40 + + StaticIntTuple[3](3, 6, 9)[j] * 40 + + StaticIntTuple[5](5, 7, 9, 11, 13)[k] + ] = 1.0 test_unary_op_backward[ - OP.SLICE, t1_shape, ug_shape, AttributeVector( - Attribute("starts", TensorShape(slice_0.start, slice_1.start, slice_2.start)), - Attribute("ends", TensorShape(slice_0.end, slice_1.end, slice_2.end)), - Attribute("steps", TensorShape(slice_0.step, slice_1.step, slice_2.step)), - ) + OP.SLICE, + t1_shape, + ug_shape, + AttributeVector( + Attribute( + "starts", + TensorShape(slice_0.start, slice_1.start, slice_2.start), + ), + Attribute( + "ends", TensorShape(slice_0.end, slice_1.end, slice_2.end) + ), + Attribute( + "steps", TensorShape(slice_0.step, slice_1.step, slice_2.step) + ), + ), ](t1, ug, expected_ug) @@ -646,8 +832,6 @@ fn test_INDEX() raises: ) ](t, expected) - print(expected) - fn test_INDEX_backward() raises: alias t1_shape = TensorShape(2, 3, 5) @@ -676,9 +860,6 @@ fn test_INDEX_backward() raises: ) ](t, ug, expected) - print(expected) - - fn test_UPSAMPLE() raises: alias t1_shape = TensorShape(2, 3, 5) var t = Tensor[dtype](t1_shape) @@ -723,17 +904,17 @@ fn test_UPSAMPLE() raises: fn main(): try: - # test_SIGMOID() - # test_RELU() - # test_TANH() - # test_CLIP() - # test_SQUEEZE() - # test_UNSQUEEZE() - # test_SLICE() - # test_SLICE_step() - # test_SLICE_neg() - # test_SLICE_multiple_axes() - # test_INDEX() + test_SIGMOID() + test_RELU() + test_TANH() + test_CLIP() + test_SQUEEZE() + test_UNSQUEEZE() + test_SLICE() + test_SLICE_step() + test_SLICE_neg() + test_SLICE_multiple_axes() + test_INDEX() test_UPSAMPLE() except e: print("[ERROR] Error in forward mlops") @@ -741,15 +922,15 @@ fn main(): return try: - # test_backward_SIGMOID() - # test_backward_RELU() - # test_backward_TANH() - # test_backward_CLIP() - # test_backward_SQUEEZE() - # test_backward_UNSQUEEZE() - # test_backward_SLICE() - # test_backward_SLICE_multiple_axes() - # test_INDEX_backward() + test_backward_SIGMOID() + test_backward_RELU() + test_backward_TANH() + test_backward_CLIP() + test_backward_SQUEEZE() + test_backward_UNSQUEEZE() + test_backward_SLICE() + test_backward_SLICE_multiple_axes() + test_INDEX_backward() pass except e: print("[ERROR] Error in backward mlops") diff --git a/tests/mojo/test_tensorutils.mojo b/tests/mojo/test_tensorutils.mojo index 6c3dedf..7ea9cc0 100644 --- a/tests/mojo/test_tensorutils.mojo +++ b/tests/mojo/test_tensorutils.mojo @@ -1,6 +1,6 @@ from random import rand from testing import assert_equal, assert_almost_equal -from math import sqrt, exp, round, add, sub, mul, div +from math import sqrt, exp from basalt import dtype, nelts from basalt.autograd.ops.matmul import dot @@ -20,6 +20,7 @@ from basalt.utils.tensorutils import ( transpose, ) from basalt.nn import Tensor, TensorShape +from basalt.utils.math_util import add, sub, mul, div, round_simd from tests import assert_tensors_equal @@ -81,7 +82,7 @@ fn test_elwise_transform() raises: assert_tensors_equal(B_res, C) var C_res = Tensor[dtype](2, 10) - elwise_transform[round](C_res, C) + elwise_transform[round_simd](C_res, C) assert_tensors_equal(C_res, D) diff --git a/tests/mojo/test_tensorutils_data.mojo b/tests/mojo/test_tensorutils_data.mojo index 4cf956e..3a7466f 100644 --- a/tests/mojo/test_tensorutils_data.mojo +++ b/tests/mojo/test_tensorutils_data.mojo @@ -1,8 +1,7 @@ -from math import add - from basalt import dtype, nelts from basalt.nn import Tensor, TensorShape from basalt.utils.tensorutils import fill, elwise_op +from basalt.utils.math_util import add fn generate_tensor(*shape: Int) -> Tensor[dtype]: diff --git a/tests/python/test_mlops_torch.mojo b/tests/python/test_mlops_torch.mojo index 69a4a12..ba6288f 100644 --- a/tests/python/test_mlops_torch.mojo +++ b/tests/python/test_mlops_torch.mojo @@ -1,5 +1,5 @@ from random import rand -from math.limit import min_finite, max_finite +from utils.numerics import min_finite, max_finite from collections.optional import OptionalReg, Optional from python.python import Python from python.object import PythonObject @@ -47,6 +47,11 @@ fn torch_unary_op( expected = torch.sigmoid(input_1) elif op == OP.RELU: expected = torch.relu(input_1) + elif op == OP.LEAKYRELU: + expected = torch.nn.functional.leaky_relu( + input_1, + attrs.value()["negative_slope"].value().to_scalar[dtype](), + ) elif op == OP.TANH: expected = torch.tanh(input_1) elif op == OP.CLIP: @@ -65,7 +70,9 @@ fn torch_unary_op( var dim = attrs["dims"] if dim: - expected = torch.squeeze(input_1, dim=dim.value().to_shape()[0]) + expected = torch.squeeze( + input_1, dim=dim.value().to_shape()[0] + ) else: expected = torch.squeeze(input_1) elif attrs_tuple: @@ -78,7 +85,9 @@ fn torch_unary_op( var dim = attrs["dims"] if dim: - expected = torch.unsqueeze(input_1, dim=dim.value().to_shape()[0]) + expected = torch.unsqueeze( + input_1, dim=dim.value().to_shape()[0] + ) else: expected = torch.unsqueeze(input_1, 0) elif attrs_tuple: @@ -102,11 +111,11 @@ fn torch_unary_op( if step < 0: flip_dims.append(dim) - step = step *- 1 + step = step * -1 end, start = (end + 1) * -1, (start + 1) * -1 indices[dim] = py.slice(start, end, step) - + expected = input_1.flip(flip_dims)[indices] elif op == OP.UPSAMPLE: var attrs = attrs.value() @@ -171,6 +180,31 @@ fn test_RELU() raises: ) +fn test_LEAKYRELU() raises: + alias t1_shape = TensorShape(37, 63, 107) + alias ug_shape = TensorShape(37, 63, 107) + var t1: Tensor[dtype] = Tensor[dtype](t1_shape) + rand(t1.data(), t1.num_elements()) + + var ug = Tensor[dtype](ug_shape) + rand(ug.data(), ug.num_elements()) + + var expected_and_grad = torch_unary_op( + OP.LEAKYRELU, t1, ug, AttributeVector(Attribute("negative_slope", Float32(0.1))) + ) + test_unary_op[ + OP.LEAKYRELU, + t1_shape, + AttributeVector(Attribute("negative_slope", 0.1)), + ](t1, expected_and_grad.expected) + test_unary_op_backward[ + OP.LEAKYRELU, + t1_shape, + ug_shape, + AttributeVector(Attribute("negative_slope", 0.1)), + ](t1, ug, expected_and_grad.grad_1) + + fn test_TANH() raises: alias t1_shape = TensorShape(37, 63, 107) alias ug_shape = TensorShape(37, 63, 107) @@ -205,23 +239,27 @@ fn test_CLIP() raises: # Clip with min alias min_attr = Attribute("min", 0.3333) - expected_and_grad = torch_unary_op(OP.CLIP, t1, ug, AttributeVector(min_attr)) + expected_and_grad = torch_unary_op( + OP.CLIP, t1, ug, AttributeVector(min_attr) + ) test_unary_op[OP.CLIP, t1_shape, AttributeVector(min_attr)]( t1, expected_and_grad.expected ) - test_unary_op_backward[OP.CLIP, t1_shape, ug_shape, AttributeVector(min_attr)]( - t1, ug, expected_and_grad.grad_1 - ) + test_unary_op_backward[ + OP.CLIP, t1_shape, ug_shape, AttributeVector(min_attr) + ](t1, ug, expected_and_grad.grad_1) # Clip with max alias max_attr = Attribute("max", 0.6666) - expected_and_grad = torch_unary_op(OP.CLIP, t1, ug, AttributeVector(max_attr)) + expected_and_grad = torch_unary_op( + OP.CLIP, t1, ug, AttributeVector(max_attr) + ) test_unary_op[OP.CLIP, t1_shape, AttributeVector(max_attr)]( t1, expected_and_grad.expected ) - test_unary_op_backward[OP.CLIP, t1_shape, ug_shape, AttributeVector(max_attr)]( - t1, ug, expected_and_grad.grad_1 - ) + test_unary_op_backward[ + OP.CLIP, t1_shape, ug_shape, AttributeVector(max_attr) + ](t1, ug, expected_and_grad.grad_1) # Clip with min and max expected_and_grad = torch_unary_op( @@ -261,9 +299,9 @@ fn test_SQUEEZE() raises: test_unary_op[OP.SQUEEZE, t1_shape, AttributeVector(dim)]( t1, expected_and_grad.expected ) - test_unary_op_backward[OP.SQUEEZE, t1_shape, ug_shape_1, AttributeVector(dim)]( - t1, ug, expected_and_grad.grad_1 - ) + test_unary_op_backward[ + OP.SQUEEZE, t1_shape, ug_shape_1, AttributeVector(dim) + ](t1, ug, expected_and_grad.grad_1) alias ug_shape_2 = TensorShape(20, 28, 1) ug = Tensor[dtype](ug_shape_2) @@ -271,13 +309,15 @@ fn test_SQUEEZE() raises: alias dim_2 = Attribute("dims", TensorShape(1)) - expected_and_grad = torch_unary_op(OP.SQUEEZE, t1, ug, AttributeVector(dim_2)) + expected_and_grad = torch_unary_op( + OP.SQUEEZE, t1, ug, AttributeVector(dim_2) + ) test_unary_op[OP.SQUEEZE, t1_shape, AttributeVector(dim_2)]( t1, expected_and_grad.expected ) - test_unary_op_backward[OP.SQUEEZE, t1_shape, ug_shape_2, AttributeVector(dim_2)]( - t1, ug, expected_and_grad.grad_1 - ) + test_unary_op_backward[ + OP.SQUEEZE, t1_shape, ug_shape_2, AttributeVector(dim_2) + ](t1, ug, expected_and_grad.grad_1) # Squeeze with multiple dims ug = Tensor[dtype](ug_shape) @@ -294,9 +334,9 @@ fn test_SQUEEZE() raises: test_unary_op[OP.SQUEEZE, t1_shape, AttributeVector(dims)]( t1, expected_and_grad.expected ) - test_unary_op_backward[OP.SQUEEZE, t1_shape, ug_shape, AttributeVector(dims)]( - t1, ug, expected_and_grad.grad_1 - ) + test_unary_op_backward[ + OP.SQUEEZE, t1_shape, ug_shape, AttributeVector(dims) + ](t1, ug, expected_and_grad.grad_1) fn test_UNSQUEEZE() raises: @@ -310,13 +350,15 @@ fn test_UNSQUEEZE() raises: alias dim = Attribute("dims", TensorShape(1)) - var expected_and_grad = torch_unary_op(OP.UNSQUEEZE, t1, ug, AttributeVector(dim)) + var expected_and_grad = torch_unary_op( + OP.UNSQUEEZE, t1, ug, AttributeVector(dim) + ) test_unary_op[OP.UNSQUEEZE, t1_shape, AttributeVector(dim)]( t1, expected_and_grad.expected ) - test_unary_op_backward[OP.UNSQUEEZE, t1_shape, ug_shape, AttributeVector(dim)]( - t1, ug, expected_and_grad.grad_1 - ) + test_unary_op_backward[ + OP.UNSQUEEZE, t1_shape, ug_shape, AttributeVector(dim) + ](t1, ug, expected_and_grad.grad_1) # Unsqueeze with multiple dims alias ug_shape_2 = TensorShape(20, 1, 28, 1) @@ -333,9 +375,9 @@ fn test_UNSQUEEZE() raises: test_unary_op[OP.UNSQUEEZE, t1_shape, AttributeVector(dims)]( t1, expected_and_grad.expected ) - test_unary_op_backward[OP.UNSQUEEZE, t1_shape, ug_shape_2, AttributeVector(dims)]( - t1, ug, expected_and_grad.grad_1 - ) + test_unary_op_backward[ + OP.UNSQUEEZE, t1_shape, ug_shape_2, AttributeVector(dims) + ](t1, ug, expected_and_grad.grad_1) fn test_SLICE() raises: @@ -349,17 +391,23 @@ fn test_SLICE() raises: Attribute("starts", TensorShape(slice_0.start)), Attribute("ends", TensorShape(slice_0.end)), Attribute("steps", TensorShape(slice_0.step)), - Attribute("axes", TensorShape(0)) + Attribute("axes", TensorShape(0)), ) alias ug_shape = TensorShape(65, 322, 317) var ug = Tensor[dtype](ug_shape) rand(ug.data(), ug.num_elements()) - var attrs_tuple_0 = PythonObject((slice_0.start, slice_0.end, slice_0.step, 0)) - var expected_and_grad = torch_unary_op(OP.SLICE, t1, ug, attrs_tuple=attrs_tuple_0) + var attrs_tuple_0 = PythonObject( + (slice_0.start, slice_0.end, slice_0.step, 0) + ) + var expected_and_grad = torch_unary_op( + OP.SLICE, t1, ug, attrs_tuple=attrs_tuple_0 + ) test_unary_op[OP.SLICE, t1_shape, attrs_0](t1, expected_and_grad.expected) - test_unary_op_backward[OP.SLICE, t1_shape, ug_shape, attrs_0](t1, ug, expected_and_grad.grad_1) + test_unary_op_backward[OP.SLICE, t1_shape, ug_shape, attrs_0]( + t1, ug, expected_and_grad.grad_1 + ) # dim = 1 alias slice_1 = Slice(10, 311, 5) @@ -367,17 +415,23 @@ fn test_SLICE() raises: Attribute("starts", TensorShape(slice_1.start)), Attribute("ends", TensorShape(slice_1.end)), Attribute("steps", TensorShape(slice_1.step)), - Attribute("axes", TensorShape(1)) + Attribute("axes", TensorShape(1)), ) alias ug_shape_1 = TensorShape(430, 61, 317) ug = Tensor[dtype](ug_shape_1) rand(ug.data(), ug.num_elements()) - var attrs_tuple_1 = PythonObject((slice_1.start, slice_1.end, slice_1.step, 1)) - expected_and_grad = torch_unary_op(OP.SLICE, t1, ug, attrs_tuple=attrs_tuple_1) + var attrs_tuple_1 = PythonObject( + (slice_1.start, slice_1.end, slice_1.step, 1) + ) + expected_and_grad = torch_unary_op( + OP.SLICE, t1, ug, attrs_tuple=attrs_tuple_1 + ) test_unary_op[OP.SLICE, t1_shape, attrs_1](t1, expected_and_grad.expected) - test_unary_op_backward[OP.SLICE, t1_shape, ug_shape_1, attrs_1](t1, ug, expected_and_grad.grad_1) + test_unary_op_backward[OP.SLICE, t1_shape, ug_shape_1, attrs_1]( + t1, ug, expected_and_grad.grad_1 + ) # dim = 2 alias slice_2 = Slice(293, 33, -7) @@ -385,20 +439,26 @@ fn test_SLICE() raises: Attribute("starts", TensorShape(slice_2.start)), Attribute("ends", TensorShape(slice_2.end)), Attribute("steps", TensorShape(slice_2.step)), - Attribute("axes", TensorShape(2)) + Attribute("axes", TensorShape(2)), ) alias ug_shape_2 = TensorShape(430, 322, 38) ug = Tensor[dtype](ug_shape_2) rand(ug.data(), ug.num_elements()) - var attrs_tuple_2 = PythonObject((slice_2.start, slice_2.end, slice_2.step, 2)) - expected_and_grad = torch_unary_op(OP.SLICE, t1, ug, attrs_tuple=attrs_tuple_2) + var attrs_tuple_2 = PythonObject( + (slice_2.start, slice_2.end, slice_2.step, 2) + ) + expected_and_grad = torch_unary_op( + OP.SLICE, t1, ug, attrs_tuple=attrs_tuple_2 + ) test_unary_op[OP.SLICE, t1_shape, attrs_2](t1, expected_and_grad.expected) - test_unary_op_backward[OP.SLICE, t1_shape, ug_shape_2, attrs_2](t1, ug, expected_and_grad.grad_1) + test_unary_op_backward[OP.SLICE, t1_shape, ug_shape_2, attrs_2]( + t1, ug, expected_and_grad.grad_1 + ) # Multiple dims - + # dim = 0, 1 alias slice_0_1 = Slice(23, 340, 3) alias slice_1_1 = Slice(10, 250, 5) @@ -407,17 +467,32 @@ fn test_SLICE() raises: Attribute("starts", TensorShape(slice_0_1.start, slice_1_1.start)), Attribute("ends", TensorShape(slice_0_1.end, slice_1_1.end)), Attribute("steps", TensorShape(slice_0_1.step, slice_1_1.step)), - Attribute("axes", TensorShape(0, 1)) + Attribute("axes", TensorShape(0, 1)), ) alias ug_shape_0_1 = TensorShape(106, 48, 317) ug = Tensor[dtype](ug_shape_0_1) rand(ug.data(), ug.num_elements()) - var attrs_tuple_0_1 = PythonObject((slice_0_1.start, slice_0_1.end, slice_0_1.step, 0, slice_1_1.start, slice_1_1.end, slice_1_1.step, 1)) - expected_and_grad = torch_unary_op(OP.SLICE, t1, ug, attrs_tuple=attrs_tuple_0_1) + var attrs_tuple_0_1 = PythonObject( + ( + slice_0_1.start, + slice_0_1.end, + slice_0_1.step, + 0, + slice_1_1.start, + slice_1_1.end, + slice_1_1.step, + 1, + ) + ) + expected_and_grad = torch_unary_op( + OP.SLICE, t1, ug, attrs_tuple=attrs_tuple_0_1 + ) test_unary_op[OP.SLICE, t1_shape, attrs_0_1](t1, expected_and_grad.expected) - test_unary_op_backward[OP.SLICE, t1_shape, ug_shape_0_1, attrs_0_1](t1, ug, expected_and_grad.grad_1) + test_unary_op_backward[OP.SLICE, t1_shape, ug_shape_0_1, attrs_0_1]( + t1, ug, expected_and_grad.grad_1 + ) # dim = 0, 1, 2 alias slice_0_2 = Slice(-412, -5, 3) @@ -425,20 +500,46 @@ fn test_SLICE() raises: alias slice_2_2 = Slice(293, 33, -7) alias attrs_0_2 = AttributeVector( - Attribute("starts", TensorShape(slice_0_2.start, slice_1_2.start, slice_2_2.start)), - Attribute("ends", TensorShape(slice_0_2.end, slice_1_2.end, slice_2_2.end)), - Attribute("steps", TensorShape(slice_0_2.step, slice_1_2.step, slice_2_2.step)), - Attribute("axes", TensorShape(0, 1, 2)) + Attribute( + "starts", + TensorShape(slice_0_2.start, slice_1_2.start, slice_2_2.start), + ), + Attribute( + "ends", TensorShape(slice_0_2.end, slice_1_2.end, slice_2_2.end) + ), + Attribute( + "steps", TensorShape(slice_0_2.step, slice_1_2.step, slice_2_2.step) + ), + Attribute("axes", TensorShape(0, 1, 2)), ) alias ug_shape_0_2 = TensorShape(136, 35, 38) ug = Tensor[dtype](ug_shape_0_2) rand(ug.data(), ug.num_elements()) - var attrs_tuple_0_2 = PythonObject((slice_0_2.start, slice_0_2.end, slice_0_2.step, 0, slice_1_2.start, slice_1_2.end, slice_1_2.step, 1, slice_2_2.start, slice_2_2.end, slice_2_2.step, 2)) - expected_and_grad = torch_unary_op(OP.SLICE, t1, ug, attrs_tuple=attrs_tuple_0_2) + var attrs_tuple_0_2 = PythonObject( + ( + slice_0_2.start, + slice_0_2.end, + slice_0_2.step, + 0, + slice_1_2.start, + slice_1_2.end, + slice_1_2.step, + 1, + slice_2_2.start, + slice_2_2.end, + slice_2_2.step, + 2, + ) + ) + expected_and_grad = torch_unary_op( + OP.SLICE, t1, ug, attrs_tuple=attrs_tuple_0_2 + ) test_unary_op[OP.SLICE, t1_shape, attrs_0_2](t1, expected_and_grad.expected) - test_unary_op_backward[OP.SLICE, t1_shape, ug_shape_0_2, attrs_0_2](t1, ug, expected_and_grad.grad_1) + test_unary_op_backward[OP.SLICE, t1_shape, ug_shape_0_2, attrs_0_2]( + t1, ug, expected_and_grad.grad_1 + ) fn test_UPSAMPLE() raises: @@ -510,13 +611,14 @@ fn test_UPSAMPLE() raises: fn main(): print("Running mlops (compare with torch) tests") try: - # test_SIGMOID() - # test_RELU() - # test_TANH() - # test_CLIP() - # test_SQUEEZE() - # test_UNSQUEEZE() - # test_SLICE() + test_SIGMOID() + test_RELU() + test_LEAKYRELU() + test_TANH() + test_CLIP() + test_SQUEEZE() + test_UNSQUEEZE() + test_SLICE() test_UPSAMPLE() except e: print("[ERROR] Error in mlops (compare with torch)") diff --git a/tests/python/test_models_mnist.mojo b/tests/python/test_models_mnist.mojo index 85dd47d..5a0312d 100644 --- a/tests/python/test_models_mnist.mojo +++ b/tests/python/test_models_mnist.mojo @@ -120,7 +120,7 @@ fn run_mojo[ ) var model = Model[graph]() - var optim = optim.Adam[graph](Reference(model.parameters), lr=learning_rate) + var optim = optim.Adam[graph](model.parameters, lr=learning_rate) var losses = List[Scalar[dtype]]() diff --git a/tests/python/test_models_regression.mojo b/tests/python/test_models_regression.mojo index cc88444..1a36b77 100644 --- a/tests/python/test_models_regression.mojo +++ b/tests/python/test_models_regression.mojo @@ -1,6 +1,6 @@ from random import rand from python import Python -from math.limit import max_finite +from utils.numerics import max_finite from testing import assert_almost_equal from basalt import dtype diff --git a/tests/python/test_models_sin_estimate.mojo b/tests/python/test_models_sin_estimate.mojo index fe6e2a4..9b59231 100644 --- a/tests/python/test_models_sin_estimate.mojo +++ b/tests/python/test_models_sin_estimate.mojo @@ -1,6 +1,6 @@ from random import rand from python import Python -from math.limit import max_finite +from utils.numerics import max_finite from testing import assert_almost_equal from basalt import dtype @@ -81,7 +81,7 @@ fn run_mojo[ ) var model = Model[graph]() - var optim = optim.Adam[graph](Reference(model.parameters), lr=learning_rate) + var optim = optim.Adam[graph](model.parameters, lr=learning_rate) var losses = List[Scalar[dtype]]()