Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions problems/p01/p01.mojo
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from memory import UnsafePointer
from gpu import thread_idx
from gpu.host import DeviceContext
from testing import assert_equal
Expand All @@ -11,7 +10,8 @@ alias dtype = DType.float32


fn add_10(
output: UnsafePointer[Scalar[dtype]], a: UnsafePointer[Scalar[dtype]]
output: UnsafeMutPointer[Scalar[dtype]],
a: UnsafeImmutPointer[Scalar[dtype]],
):
i = thread_idx.x
# FILL ME IN (roughly 1 line)
Expand Down
7 changes: 3 additions & 4 deletions problems/p02/p02.mojo
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from memory import UnsafePointer
from gpu import thread_idx, block_dim, block_idx
from gpu.host import DeviceContext
from testing import assert_equal
Expand All @@ -11,9 +10,9 @@ alias dtype = DType.float32


fn add(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
b: UnsafePointer[Scalar[dtype]],
output: UnsafeMutPointer[Scalar[dtype]],
a: UnsafeImmutPointer[Scalar[dtype]],
b: UnsafeImmutPointer[Scalar[dtype]],
):
i = thread_idx.x
# FILL ME IN (roughly 1 line)
Expand Down
5 changes: 2 additions & 3 deletions problems/p03/p03.mojo
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from memory import UnsafePointer
from gpu import thread_idx
from gpu.host import DeviceContext
from testing import assert_equal
Expand All @@ -11,8 +10,8 @@ alias dtype = DType.float32


fn add_10_guard(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
output: UnsafeMutPointer[Scalar[dtype]],
a: UnsafeImmutPointer[Scalar[dtype]],
size: Int,
):
i = thread_idx.x
Expand Down
5 changes: 2 additions & 3 deletions problems/p04/p04.mojo
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from memory import UnsafePointer
from gpu import thread_idx, block_dim, block_idx
from gpu.host import DeviceContext
from testing import assert_equal
Expand All @@ -11,8 +10,8 @@ alias dtype = DType.float32


fn add_10_2d(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
output: UnsafeMutPointer[Scalar[dtype]],
a: UnsafeImmutPointer[Scalar[dtype]],
size: Int,
):
row = thread_idx.y
Expand Down
7 changes: 3 additions & 4 deletions problems/p05/p05.mojo
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from memory import UnsafePointer
from gpu import thread_idx, block_dim, block_idx
from gpu.host import DeviceContext, HostBuffer
from testing import assert_equal
Expand All @@ -11,9 +10,9 @@ alias dtype = DType.float32


fn broadcast_add(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
b: UnsafePointer[Scalar[dtype]],
output: UnsafeMutPointer[Scalar[dtype]],
a: UnsafeImmutPointer[Scalar[dtype]],
b: UnsafeImmutPointer[Scalar[dtype]],
size: Int,
):
row = thread_idx.y
Expand Down
5 changes: 2 additions & 3 deletions problems/p06/p06.mojo
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from memory import UnsafePointer
from gpu import thread_idx, block_idx, block_dim
from gpu.host import DeviceContext
from testing import assert_equal
Expand All @@ -11,8 +10,8 @@ alias dtype = DType.float32


fn add_10_blocks(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
output: UnsafeMutPointer[Scalar[dtype]],
a: UnsafeImmutPointer[Scalar[dtype]],
size: Int,
):
i = block_dim.x * block_idx.x + thread_idx.x
Expand Down
5 changes: 2 additions & 3 deletions problems/p07/p07.mojo
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from memory import UnsafePointer
from gpu import thread_idx, block_idx, block_dim
from gpu.host import DeviceContext
from testing import assert_equal
Expand All @@ -11,8 +10,8 @@ alias dtype = DType.float32


fn add_10_blocks_2d(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
output: UnsafeMutPointer[Scalar[dtype]],
a: UnsafeImmutPointer[Scalar[dtype]],
size: Int,
):
row = block_dim.y * block_idx.y + thread_idx.y
Expand Down
6 changes: 3 additions & 3 deletions problems/p08/p08.mojo
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from memory import UnsafePointer, stack_allocation
from memory import stack_allocation
from gpu import thread_idx, block_idx, block_dim, barrier
from gpu.host import DeviceContext
from gpu.memory import AddressSpace
Expand All @@ -14,8 +14,8 @@ alias dtype = DType.float32


fn add_10_shared(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
output: UnsafeMutPointer[Scalar[dtype]],
a: UnsafeImmutPointer[Scalar[dtype]],
size: Int,
):
shared = stack_allocation[
Expand Down
1 change: 0 additions & 1 deletion problems/p08/p08_layout_tensor.mojo
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from memory import UnsafePointer
from gpu import thread_idx, block_idx, block_dim, barrier
from gpu.host import DeviceContext
from gpu.memory import AddressSpace
Expand Down
5 changes: 3 additions & 2 deletions problems/p09/p09.mojo
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from memory import UnsafePointer
from gpu import thread_idx, barrier
from gpu.host import DeviceContext
from gpu.memory import AddressSpace
Expand All @@ -17,7 +16,8 @@ alias ITER = 2

# ANCHOR: first_crash
fn add_10(
output: UnsafePointer[Scalar[dtype]], a: UnsafePointer[Scalar[dtype]]
output: UnsafeMutPointer[Scalar[dtype]],
a: UnsafeImmutPointer[Scalar[dtype]],
):
i = thread_idx.x
output[i] = a[i] + 10.0
Expand Down Expand Up @@ -105,6 +105,7 @@ def main():
print()

with DeviceContext() as ctx:
# TODO: fix this
input_ptr = UnsafePointer[Scalar[dtype]]()
result_buf = ctx.enqueue_create_buffer[dtype](SIZE).enqueue_fill(0)

Expand Down
6 changes: 3 additions & 3 deletions problems/p11/p11.mojo
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from memory import UnsafePointer, stack_allocation
from memory import stack_allocation
from gpu import thread_idx, block_idx, block_dim, barrier
from gpu.host import DeviceContext
from gpu.memory import AddressSpace
Expand All @@ -14,8 +14,8 @@ alias dtype = DType.float32


fn pooling(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
output: UnsafeMutPointer[Scalar[dtype]],
a: UnsafeImmutPointer[Scalar[dtype]],
size: Int,
):
shared = stack_allocation[
Expand Down
8 changes: 4 additions & 4 deletions problems/p12/p12.mojo
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from memory import UnsafePointer, stack_allocation
from memory import stack_allocation
from gpu import thread_idx, block_idx, block_dim, barrier
from gpu.host import DeviceContext
from gpu.memory import AddressSpace
Expand All @@ -14,9 +14,9 @@ alias dtype = DType.float32


fn dot_product(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
b: UnsafePointer[Scalar[dtype]],
output: UnsafeMutPointer[Scalar[dtype]],
a: UnsafeImmutPointer[Scalar[dtype]],
b: UnsafeImmutPointer[Scalar[dtype]],
size: Int,
):
# FILL ME IN (roughly 13 lines)
Expand Down
1 change: 0 additions & 1 deletion problems/p17/op/conv1d.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ fn conv1d_kernel[
import compiler
from runtime.asyncrt import DeviceContextPtr
from tensor import InputTensor, OutputTensor
from memory import UnsafePointer
from gpu.host import DeviceBuffer


Expand Down
1 change: 0 additions & 1 deletion problems/p19/op/attention.mojo
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from memory import UnsafePointer
from gpu import thread_idx, block_idx, block_dim, barrier
from gpu.host import DeviceContext, HostBuffer, DeviceBuffer
from gpu.memory import AddressSpace, async_copy_wait_all
Expand Down
4 changes: 2 additions & 2 deletions solutions/p01/p01.mojo
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from memory import UnsafePointer
from gpu import thread_idx
from gpu.host import DeviceContext
from testing import assert_equal
Expand All @@ -11,7 +10,8 @@ alias dtype = DType.float32

# ANCHOR: add_10_solution
fn add_10(
output: UnsafePointer[Scalar[dtype]], a: UnsafePointer[Scalar[dtype]]
output: UnsafeMutPointer[Scalar[dtype]],
a: UnsafeImmutPointer[Scalar[dtype]],
):
i = thread_idx.x
output[i] = a[i] + 10.0
Expand Down
7 changes: 3 additions & 4 deletions solutions/p02/p02.mojo
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from memory import UnsafePointer
from gpu import thread_idx, block_dim, block_idx
from gpu.host import DeviceContext
from testing import assert_equal
Expand All @@ -11,9 +10,9 @@ alias dtype = DType.float32

# ANCHOR: add_solution
fn add(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
b: UnsafePointer[Scalar[dtype]],
output: UnsafeMutPointer[Scalar[dtype]],
a: UnsafeImmutPointer[Scalar[dtype]],
b: UnsafeImmutPointer[Scalar[dtype]],
):
i = thread_idx.x
output[i] = a[i] + b[i]
Expand Down
5 changes: 2 additions & 3 deletions solutions/p03/p03.mojo
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from memory import UnsafePointer
from gpu import thread_idx
from gpu.host import DeviceContext
from testing import assert_equal
Expand All @@ -11,8 +10,8 @@ alias dtype = DType.float32

# ANCHOR: add_10_guard_solution
fn add_10_guard(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
output: UnsafeMutPointer[Scalar[dtype]],
a: UnsafeImmutPointer[Scalar[dtype]],
size: Int,
):
i = thread_idx.x
Expand Down
5 changes: 2 additions & 3 deletions solutions/p04/p04.mojo
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from memory import UnsafePointer
from gpu import thread_idx, block_dim, block_idx
from gpu.host import DeviceContext
from testing import assert_equal
Expand All @@ -11,8 +10,8 @@ alias dtype = DType.float32

# ANCHOR: add_10_2d_solution
fn add_10_2d(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
output: UnsafeMutPointer[Scalar[dtype]],
a: UnsafeImmutPointer[Scalar[dtype]],
size: Int,
):
row = thread_idx.y
Expand Down
7 changes: 3 additions & 4 deletions solutions/p05/p05.mojo
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from memory import UnsafePointer
from gpu import thread_idx, block_dim, block_idx
from gpu.host import DeviceContext, HostBuffer
from testing import assert_equal
Expand All @@ -11,9 +10,9 @@ alias dtype = DType.float32

# ANCHOR: broadcast_add_solution
fn broadcast_add(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
b: UnsafePointer[Scalar[dtype]],
output: UnsafeMutPointer[Scalar[dtype]],
a: UnsafeImmutPointer[Scalar[dtype]],
b: UnsafeImmutPointer[Scalar[dtype]],
size: Int,
):
row = thread_idx.y
Expand Down
5 changes: 2 additions & 3 deletions solutions/p06/p06.mojo
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from memory import UnsafePointer
from gpu import thread_idx, block_idx, block_dim
from gpu.host import DeviceContext
from testing import assert_equal
Expand All @@ -11,8 +10,8 @@ alias dtype = DType.float32

# ANCHOR: add_10_blocks_solution
fn add_10_blocks(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
output: UnsafeMutPointer[Scalar[dtype]],
a: UnsafeImmutPointer[Scalar[dtype]],
size: Int,
):
i = block_dim.x * block_idx.x + thread_idx.x
Expand Down
5 changes: 2 additions & 3 deletions solutions/p07/p07.mojo
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from memory import UnsafePointer
from gpu import thread_idx, block_idx, block_dim
from gpu.host import DeviceContext
from testing import assert_equal
Expand All @@ -11,8 +10,8 @@ alias dtype = DType.float32

# ANCHOR: add_10_blocks_2d_solution
fn add_10_blocks_2d(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
output: UnsafeMutPointer[Scalar[dtype]],
a: UnsafeImmutPointer[Scalar[dtype]],
size: Int,
):
row = block_dim.y * block_idx.y + thread_idx.y
Expand Down
6 changes: 3 additions & 3 deletions solutions/p08/p08.mojo
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from memory import UnsafePointer, stack_allocation
from memory import stack_allocation
from gpu import thread_idx, block_idx, block_dim, barrier
from gpu.host import DeviceContext
from gpu.memory import AddressSpace
Expand All @@ -14,8 +14,8 @@ alias dtype = DType.float32

# ANCHOR: add_10_shared_solution
fn add_10_shared(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
output: UnsafeMutPointer[Scalar[dtype]],
a: UnsafeImmutPointer[Scalar[dtype]],
size: Int,
):
shared = stack_allocation[
Expand Down
1 change: 0 additions & 1 deletion solutions/p08/p08_layout_tensor.mojo
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from memory import UnsafePointer
from gpu import thread_idx, block_idx, block_dim, barrier
from gpu.host import DeviceContext
from gpu.memory import AddressSpace
Expand Down
6 changes: 3 additions & 3 deletions solutions/p11/p11.mojo
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from memory import UnsafePointer, stack_allocation
from memory import stack_allocation
from gpu import thread_idx, block_idx, block_dim, barrier
from gpu.host import DeviceContext
from gpu.memory import AddressSpace
Expand All @@ -14,8 +14,8 @@ alias dtype = DType.float32

# ANCHOR: pooling_solution
fn pooling(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
output: UnsafeMutPointer[Scalar[dtype]],
a: UnsafeImmutPointer[Scalar[dtype]],
size: Int,
):
shared = stack_allocation[
Expand Down
8 changes: 4 additions & 4 deletions solutions/p12/p12.mojo
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from memory import UnsafePointer, stack_allocation
from memory import stack_allocation
from gpu import thread_idx, block_idx, block_dim, barrier
from gpu.host import DeviceContext
from gpu.memory import AddressSpace
Expand All @@ -14,9 +14,9 @@ alias dtype = DType.float32

# ANCHOR: dot_product_solution
fn dot_product(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
b: UnsafePointer[Scalar[dtype]],
output: UnsafeMutPointer[Scalar[dtype]],
a: UnsafeImmutPointer[Scalar[dtype]],
b: UnsafeImmutPointer[Scalar[dtype]],
size: Int,
):
shared = stack_allocation[
Expand Down
1 change: 0 additions & 1 deletion solutions/p19/op/attention.mojo
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from memory import UnsafePointer
from gpu import thread_idx, block_idx, block_dim, barrier
from gpu.host import DeviceContext, HostBuffer, DeviceBuffer
from gpu.memory import AddressSpace, async_copy_wait_all
Expand Down