diff --git a/src/x86-ubuntu-gpu-ml/build.sh b/src/x86-ubuntu-gpu-ml/build.sh index 8c9907ba..4a47aa32 100755 --- a/src/x86-ubuntu-gpu-ml/build.sh +++ b/src/x86-ubuntu-gpu-ml/build.sh @@ -14,5 +14,6 @@ fi # Install the needed plugins ./packer init x86-ubuntu-gpu-ml.pkr.hcl -# Build the image -./packer build x86-ubuntu-gpu-ml.pkr.hcl \ No newline at end of file +# Build the image - Pass command line options from this script to build +# command. This can be used to set variable such as qemu path. +./packer build "$@" x86-ubuntu-gpu-ml.pkr.hcl \ No newline at end of file diff --git a/src/x86-ubuntu-gpu-ml/files/load_amdgpu.sh b/src/x86-ubuntu-gpu-ml/files/load_amdgpu.sh index a7742276..f3511883 100644 --- a/src/x86-ubuntu-gpu-ml/files/load_amdgpu.sh +++ b/src/x86-ubuntu-gpu-ml/files/load_amdgpu.sh @@ -24,6 +24,7 @@ insmod /lib/modules/`uname -r`/updates/dkms/amdkcl.ko.zst insmod /lib/modules/`uname -r`/updates/dkms/amd-sched.ko.zst insmod /lib/modules/`uname -r`/updates/dkms/amdxcp.ko.zst insmod /lib/modules/`uname -r`/updates/dkms/amddrm_buddy.ko.zst +insmod /lib/modules/`uname -r`/updates/dkms/amddrm_exec.ko.zst insmod /lib/modules/`uname -r`/updates/dkms/amdttm.ko.zst insmod /lib/modules/`uname -r`/updates/dkms/amddrm_ttm_helper.ko.zst diff --git a/src/x86-ubuntu-gpu-ml/files/mi350_discovery b/src/x86-ubuntu-gpu-ml/files/mi350_discovery new file mode 100644 index 00000000..49a8ae27 Binary files /dev/null and b/src/x86-ubuntu-gpu-ml/files/mi350_discovery differ diff --git a/src/x86-ubuntu-gpu-ml/scripts/rocm-install.sh b/src/x86-ubuntu-gpu-ml/scripts/rocm-install.sh index edeb5d26..c23fcfa1 100755 --- a/src/x86-ubuntu-gpu-ml/scripts/rocm-install.sh +++ b/src/x86-ubuntu-gpu-ml/scripts/rocm-install.sh @@ -60,10 +60,10 @@ sudo mkdir --parents --mode=0755 /etc/apt/keyrings wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \ gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null -echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/6.4/ubuntu noble main" \ +echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/7.0/ubuntu noble main" \ | sudo tee /etc/apt/sources.list.d/amdgpu.list -echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/6.4 noble main" \ +echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/7.0 noble main" \ | sudo tee --append /etc/apt/sources.list.d/rocm.list echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' \ | sudo tee /etc/apt/preferences.d/rocm-pin-600 @@ -86,8 +86,8 @@ sudo chmod 777 /usr/lib/firmware/amdgpu/ip_discovery.bin # Install a known-working version of Linux as this might change after stable -# release. Installl this after DKMS so they are rebuilt. -KERNEL=6.8.0-60-generic +# release. Install this after DKMS so they are rebuilt. +KERNEL=6.8.0-79-generic sudo apt -y install "linux-image-${KERNEL}" sudo apt -y install "linux-headers-${KERNEL}" "linux-modules-extra-${KERNEL}" @@ -105,6 +105,13 @@ if [ ! -f ./gem5_wmi.ko ]; then fi popd +# Make the discovery files writeable by packer +touch /usr/lib/firmware/amdgpu/mi300_discovery +touch /usr/lib/firmware/amdgpu/mi350_discovery + +chmod 777 /usr/lib/firmware/amdgpu/mi300_discovery +chmod 777 /usr/lib/firmware/amdgpu/mi350_discovery + # Note about pip: This disk is created for the express purpose of being run in # gem5 and is therefore effectively sandboxed enough that we can use the pip # option --break-system-packages. If you plan to modify this disk image with @@ -121,11 +128,9 @@ pip3 install --break-system-packages torch torchvision torchaudio --index-url ht # For a newer version uncomment one below and remove the above install: # Warning: Absurdly slow compared to ROCm 6.0 *in simulation*: -#pip3 install --break-system-packages torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.2.4 -# Warning: Missing python module torch.sparse.......: -#pip3 install --break-system-packages torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.3 +#pip3 install --break-system-packages torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.4 # Warning: nightly build, may not work depending on day. Use at your own risk: -#pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm6.4/ --break-system-packages +#pip3 install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/rocm7.0 --break-system-packages # Setup gem5 auto login. mv /home/gem5/serial-getty@.service /lib/systemd/system/ diff --git a/src/x86-ubuntu-gpu-ml/x86-ubuntu-gpu-ml.pkr.hcl b/src/x86-ubuntu-gpu-ml/x86-ubuntu-gpu-ml.pkr.hcl index a9e24531..4e7c2936 100644 --- a/src/x86-ubuntu-gpu-ml/x86-ubuntu-gpu-ml.pkr.hcl +++ b/src/x86-ubuntu-gpu-ml/x86-ubuntu-gpu-ml.pkr.hcl @@ -13,7 +13,7 @@ packer { variable "image_name" { type = string - default = "x86-ubuntu-gpu-ml" + default = "x86-ubuntu-rocm70" } variable "ssh_password" { @@ -26,6 +26,11 @@ variable "ssh_username" { default = "gem5" } +variable "qemu_path" { + type = string + default = "/usr/bin/qemu-system-x86_64" +} + source "qemu" "initialize" { accelerator = "kvm" boot_command = ["e", @@ -43,7 +48,7 @@ source "qemu" "initialize" { iso_urls = ["https://releases.ubuntu.com/24.04.2/ubuntu-24.04.2-live-server-amd64.iso"] memory = "8192" output_directory = "disk-image" - qemu_binary = "/usr/bin/qemu-system-x86_64" + qemu_binary = "${var.qemu_path}" qemuargs = [["-cpu", "host"], ["-display", "none"]] shutdown_command = "echo '${var.ssh_password}'|sudo -S shutdown -P now" ssh_password = "${var.ssh_password}" @@ -97,13 +102,18 @@ build { } provisioner "file" { - destination = "/usr/lib/firmware/amdgpu/ip_discovery.bin" + destination = "/usr/lib/firmware/amdgpu/mi300_discovery" source = "files/mi300_discovery" } + provisioner "file" { + destination = "/usr/lib/firmware/amdgpu/mi350_discovery" + source = "files/mi350_discovery" + } + provisioner "file" { source = "/home/gem5/vmlinux-gpu-ml" - destination = "vmlinux-gpu-ml" + destination = "vmlinux-rocm70" direction = "download" } }