Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions problems/p03/p03.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ alias dtype = DType.float32
fn add_10_guard(
output: UnsafePointer[Scalar[dtype], MutAnyOrigin],
a: UnsafePointer[Scalar[dtype], MutAnyOrigin],
size: Int,
size: UInt,
):
i = thread_idx.x
# FILL ME IN (roughly 2 lines)
Expand All @@ -35,7 +35,7 @@ def main():
ctx.enqueue_function_checked[add_10_guard, add_10_guard](
out,
a,
SIZE,
UInt(SIZE),
grid_dim=BLOCKS_PER_GRID,
block_dim=THREADS_PER_BLOCK,
)
Expand Down
4 changes: 2 additions & 2 deletions problems/p04/p04.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ alias dtype = DType.float32
fn add_10_2d(
output: UnsafePointer[Scalar[dtype], MutAnyOrigin],
a: UnsafePointer[Scalar[dtype], MutAnyOrigin],
size: Int,
size: UInt,
):
row = thread_idx.y
col = thread_idx.x
Expand Down Expand Up @@ -41,7 +41,7 @@ def main():
ctx.enqueue_function_checked[add_10_2d, add_10_2d](
out,
a,
SIZE,
UInt(SIZE),
grid_dim=BLOCKS_PER_GRID,
block_dim=THREADS_PER_BLOCK,
)
Expand Down
4 changes: 2 additions & 2 deletions problems/p04/p04_layout_tensor.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ alias layout = Layout.row_major(SIZE, SIZE)
fn add_10_2d(
output: LayoutTensor[dtype, layout, MutAnyOrigin],
a: LayoutTensor[dtype, layout, MutAnyOrigin],
size: Int,
size: UInt,
):
row = thread_idx.y
col = thread_idx.x
Expand Down Expand Up @@ -46,7 +46,7 @@ def main():
ctx.enqueue_function_checked[add_10_2d, add_10_2d](
out_tensor,
a_tensor,
SIZE,
UInt(SIZE),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think would be easier to do it at the initialization alias SIZE: UInt = ....

grid_dim=BLOCKS_PER_GRID,
block_dim=THREADS_PER_BLOCK,
)
Expand Down
4 changes: 2 additions & 2 deletions problems/p05/p05.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ fn broadcast_add(
output: UnsafePointer[Scalar[dtype], MutAnyOrigin],
a: UnsafePointer[Scalar[dtype], MutAnyOrigin],
b: UnsafePointer[Scalar[dtype], MutAnyOrigin],
size: Int,
size: UInt,
):
row = thread_idx.y
col = thread_idx.x
Expand Down Expand Up @@ -45,7 +45,7 @@ def main():
out,
a,
b,
SIZE,
UInt(SIZE),
grid_dim=BLOCKS_PER_GRID,
block_dim=THREADS_PER_BLOCK,
)
Expand Down
4 changes: 2 additions & 2 deletions problems/p05/p05_layout_tensor.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ fn broadcast_add[
output: LayoutTensor[dtype, out_layout, MutAnyOrigin],
a: LayoutTensor[dtype, a_layout, ImmutAnyOrigin],
b: LayoutTensor[dtype, b_layout, ImmutAnyOrigin],
size: Int,
size: UInt,
):
row = thread_idx.y
col = thread_idx.x
Expand Down Expand Up @@ -63,7 +63,7 @@ def main():
out_tensor,
a_tensor,
b_tensor,
SIZE,
UInt(SIZE),
grid_dim=BLOCKS_PER_GRID,
block_dim=THREADS_PER_BLOCK,
)
Expand Down
4 changes: 2 additions & 2 deletions problems/p06/p06.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ alias dtype = DType.float32
fn add_10_blocks(
output: UnsafePointer[Scalar[dtype], MutAnyOrigin],
a: UnsafePointer[Scalar[dtype], MutAnyOrigin],
size: Int,
size: UInt,
):
i = block_dim.x * block_idx.x + thread_idx.x
# FILL ME IN (roughly 2 lines)
Expand All @@ -35,7 +35,7 @@ def main():
ctx.enqueue_function_checked[add_10_blocks, add_10_blocks](
out,
a,
SIZE,
UInt(SIZE),
grid_dim=BLOCKS_PER_GRID,
block_dim=THREADS_PER_BLOCK,
)
Expand Down
4 changes: 2 additions & 2 deletions problems/p07/p07.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ alias dtype = DType.float32
fn add_10_blocks_2d(
output: UnsafePointer[Scalar[dtype], MutAnyOrigin],
a: UnsafePointer[Scalar[dtype], MutAnyOrigin],
size: Int,
size: UInt,
):
row = block_dim.y * block_idx.y + thread_idx.y
col = block_dim.x * block_idx.x + thread_idx.x
Expand Down Expand Up @@ -42,7 +42,7 @@ def main():
ctx.enqueue_function_checked[add_10_blocks_2d, add_10_blocks_2d](
out,
a,
SIZE,
UInt(SIZE),
grid_dim=BLOCKS_PER_GRID,
block_dim=THREADS_PER_BLOCK,
)
Expand Down
4 changes: 2 additions & 2 deletions problems/p07/p07_layout_tensor.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ fn add_10_blocks_2d[
](
output: LayoutTensor[dtype, out_layout, MutAnyOrigin],
a: LayoutTensor[dtype, a_layout, ImmutAnyOrigin],
size: Int,
size: UInt,
):
row = block_dim.y * block_idx.y + thread_idx.y
col = block_dim.x * block_idx.x + thread_idx.x
Expand Down Expand Up @@ -53,7 +53,7 @@ def main():
ctx.enqueue_function_checked[kernel, kernel](
out_tensor,
a_tensor,
SIZE,
UInt(SIZE),
grid_dim=BLOCKS_PER_GRID,
block_dim=THREADS_PER_BLOCK,
)
Expand Down
4 changes: 2 additions & 2 deletions problems/p08/p08.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ alias dtype = DType.float32
fn add_10_shared(
output: UnsafePointer[Scalar[dtype], MutAnyOrigin],
a: UnsafePointer[Scalar[dtype], MutAnyOrigin],
size: Int,
size: UInt,
):
shared = stack_allocation[
TPB,
Expand Down Expand Up @@ -48,7 +48,7 @@ def main():
ctx.enqueue_function_checked[add_10_shared, add_10_shared](
out,
a,
SIZE,
UInt(SIZE),
grid_dim=BLOCKS_PER_GRID,
block_dim=THREADS_PER_BLOCK,
)
Expand Down
4 changes: 2 additions & 2 deletions problems/p08/p08_layout_tensor.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ fn add_10_shared_layout_tensor[
](
output: LayoutTensor[dtype, layout, MutAnyOrigin],
a: LayoutTensor[dtype, layout, ImmutAnyOrigin],
size: Int,
size: UInt,
):
# Allocate shared memory using LayoutTensor with explicit address_space
shared = LayoutTensor[
Expand Down Expand Up @@ -57,7 +57,7 @@ def main():
ctx.enqueue_function_checked[kernel, kernel](
out_tensor,
a_tensor,
SIZE,
UInt(SIZE),
grid_dim=BLOCKS_PER_GRID,
block_dim=THREADS_PER_BLOCK,
)
Expand Down
8 changes: 4 additions & 4 deletions problems/p10/p10.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ alias layout = Layout.row_major(SIZE, SIZE)
fn shared_memory_race(
output: LayoutTensor[dtype, layout, MutAnyOrigin],
a: LayoutTensor[dtype, layout, ImmutAnyOrigin],
size: Int,
size: UInt,
):
row = thread_idx.y
col = thread_idx.x
Expand Down Expand Up @@ -45,7 +45,7 @@ fn shared_memory_race(
fn add_10_2d(
output: LayoutTensor[dtype, layout, MutAnyOrigin],
a: LayoutTensor[dtype, layout, ImmutAnyOrigin],
size: Int,
size: UInt,
):
row = thread_idx.y
col = thread_idx.x
Expand Down Expand Up @@ -94,7 +94,7 @@ def main():
ctx.enqueue_function_checked[add_10_2d, add_10_2d](
out_tensor,
a_tensor,
SIZE,
UInt(SIZE),
grid_dim=BLOCKS_PER_GRID,
block_dim=THREADS_PER_BLOCK,
)
Expand Down Expand Up @@ -127,7 +127,7 @@ def main():
](
out_tensor,
a_tensor,
SIZE,
UInt(SIZE),
grid_dim=BLOCKS_PER_GRID,
block_dim=THREADS_PER_BLOCK,
)
Expand Down
4 changes: 2 additions & 2 deletions problems/p11/p11.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ alias dtype = DType.float32
fn pooling(
output: UnsafePointer[Scalar[dtype], MutAnyOrigin],
a: UnsafePointer[Scalar[dtype], MutAnyOrigin],
size: Int,
size: UInt,
):
shared = stack_allocation[
TPB,
Expand Down Expand Up @@ -44,7 +44,7 @@ def main():
ctx.enqueue_function_checked[pooling, pooling](
out,
a,
SIZE,
UInt(SIZE),
grid_dim=BLOCKS_PER_GRID,
block_dim=THREADS_PER_BLOCK,
)
Expand Down
4 changes: 2 additions & 2 deletions problems/p11/p11_layout_tensor.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ fn pooling[
](
output: LayoutTensor[dtype, layout, MutAnyOrigin],
a: LayoutTensor[dtype, layout, ImmutAnyOrigin],
size: Int,
size: UInt,
):
# Allocate shared memory using tensor builder
shared = LayoutTensor[
Expand Down Expand Up @@ -53,7 +53,7 @@ def main():
ctx.enqueue_function_checked[pooling[layout], pooling[layout]](
out_tensor,
a_tensor,
SIZE,
UInt(SIZE),
grid_dim=BLOCKS_PER_GRID,
block_dim=THREADS_PER_BLOCK,
)
Expand Down
4 changes: 2 additions & 2 deletions problems/p12/p12.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ fn dot_product(
output: UnsafePointer[Scalar[dtype], MutAnyOrigin],
a: UnsafePointer[Scalar[dtype], MutAnyOrigin],
b: UnsafePointer[Scalar[dtype], MutAnyOrigin],
size: Int,
size: UInt,
):
# FILL ME IN (roughly 13 lines)
...
Expand All @@ -43,7 +43,7 @@ def main():
out,
a,
b,
SIZE,
UInt(SIZE),
grid_dim=BLOCKS_PER_GRID,
block_dim=THREADS_PER_BLOCK,
)
Expand Down
4 changes: 2 additions & 2 deletions problems/p12/p12_layout_tensor.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ fn dot_product[
output: LayoutTensor[dtype, out_layout, MutAnyOrigin],
a: LayoutTensor[dtype, in_layout, ImmutAnyOrigin],
b: LayoutTensor[dtype, in_layout, ImmutAnyOrigin],
size: Int,
size: UInt,
):
# FILL ME IN (roughly 13 lines)
...
Expand Down Expand Up @@ -54,7 +54,7 @@ def main():
out_tensor,
a_tensor,
b_tensor,
SIZE,
UInt(SIZE),
grid_dim=BLOCKS_PER_GRID,
block_dim=THREADS_PER_BLOCK,
)
Expand Down
6 changes: 3 additions & 3 deletions problems/p13/p13.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ fn conv_1d_simple[
b: LayoutTensor[dtype, conv_layout, ImmutAnyOrigin],
):
global_i = block_dim.x * block_idx.x + thread_idx.x
local_i = thread_idx.x
local_i = Int(thread_idx.x)
# FILL ME IN (roughly 14 lines)


Expand All @@ -48,8 +48,8 @@ fn conv_1d_block_boundary[
a: LayoutTensor[dtype, in_layout, ImmutAnyOrigin],
b: LayoutTensor[dtype, conv_layout, ImmutAnyOrigin],
):
global_i = block_dim.x * block_idx.x + thread_idx.x
local_i = thread_idx.x
global_i = Int(block_dim.x * block_idx.x + thread_idx.x)
local_i = Int(thread_idx.x)
# FILL ME IN (roughly 18 lines)


Expand Down
12 changes: 6 additions & 6 deletions problems/p14/p14.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ fn prefix_sum_simple[
](
output: LayoutTensor[dtype, layout, MutAnyOrigin],
a: LayoutTensor[dtype, layout, ImmutAnyOrigin],
size: Int,
size: UInt,
):
global_i = block_dim.x * block_idx.x + thread_idx.x
local_i = thread_idx.x
Expand All @@ -44,7 +44,7 @@ fn prefix_sum_local_phase[
](
output: LayoutTensor[dtype, out_layout, MutAnyOrigin],
a: LayoutTensor[dtype, in_layout, ImmutAnyOrigin],
size: Int,
size: UInt,
):
global_i = block_dim.x * block_idx.x + thread_idx.x
local_i = thread_idx.x
Expand All @@ -54,7 +54,7 @@ fn prefix_sum_local_phase[
# Kernel 2: Add block sums to their respective blocks
fn prefix_sum_block_sum_phase[
layout: Layout
](output: LayoutTensor[dtype, layout, MutAnyOrigin], size: Int):
](output: LayoutTensor[dtype, layout, MutAnyOrigin], size: UInt):
global_i = block_dim.x * block_idx.x + thread_idx.x
# FILL ME IN (roughly 3 lines)

Expand Down Expand Up @@ -98,7 +98,7 @@ def main():
ctx.enqueue_function_checked[kernel, kernel](
out_tensor,
a_tensor,
size,
UInt(size),
grid_dim=BLOCKS_PER_GRID,
block_dim=THREADS_PER_BLOCK,
)
Expand All @@ -114,7 +114,7 @@ def main():
ctx.enqueue_function_checked[kernel, kernel](
out_tensor,
a_tensor,
size,
UInt(size),
grid_dim=BLOCKS_PER_GRID_2,
block_dim=THREADS_PER_BLOCK_2,
)
Expand All @@ -128,7 +128,7 @@ def main():
alias kernel2 = prefix_sum_block_sum_phase[extended_layout]
ctx.enqueue_function_checked[kernel2, kernel2](
out_tensor,
size,
UInt(size),
grid_dim=BLOCKS_PER_GRID_2,
block_dim=THREADS_PER_BLOCK_2,
)
Expand Down
4 changes: 2 additions & 2 deletions problems/p15/p15.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ fn axis_sum[
](
output: LayoutTensor[dtype, out_layout, MutAnyOrigin],
a: LayoutTensor[dtype, in_layout, ImmutAnyOrigin],
size: Int,
size: UInt,
):
global_i = block_dim.x * block_idx.x + thread_idx.x
local_i = thread_idx.x
Expand Down Expand Up @@ -52,7 +52,7 @@ def main():
ctx.enqueue_function_checked[kernel, kernel](
out_tensor,
inp_tensor,
SIZE,
UInt(SIZE),
grid_dim=BLOCKS_PER_GRID,
block_dim=THREADS_PER_BLOCK,
)
Expand Down
Loading