From fbf8968e7b26600b468ee34fadfb494574a6c22b Mon Sep 17 00:00:00 2001 From: Ceng <441651826@qq.com> Date: Mon, 4 Aug 2025 15:57:08 +0800 Subject: [PATCH 1/4] =?UTF-8?q?issue/338:=20infinirt=E9=80=82=E9=85=8D?= =?UTF-8?q?=E8=99=9A=E5=AD=98=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ceng <441651826@qq.com> --- include/infinirt.h | 39 +++++++++++ src/infinirt-test/main.cc | 6 ++ src/infinirt-test/test.cc | 89 +++++++++++++++++++++++++ src/infinirt-test/test.h | 2 + src/infinirt/cpu/infinirt_cpu.cc | 23 +++++++ src/infinirt/cuda/infinirt_cuda.cu | 103 +++++++++++++++++++++++++++++ src/infinirt/infinirt.cc | 25 +++++++ src/infinirt/infinirt_impl.h | 61 +++++++++-------- xmake/nvidia.lua | 15 +++++ 9 files changed, 335 insertions(+), 28 deletions(-) diff --git a/include/infinirt.h b/include/infinirt.h index ffecfef80..1a4438f1f 100644 --- a/include/infinirt.h +++ b/include/infinirt.h @@ -3,6 +3,11 @@ #include "infinicore.h" +#include +#include +#include +#include + typedef void *infinirtStream_t; typedef void *infinirtEvent_t; @@ -53,4 +58,38 @@ __C __export infiniStatus_t infinirtMemcpyAsync(void *dst, const void *src, size __C __export infiniStatus_t infinirtMallocAsync(void **p_ptr, size_t size, infinirtStream_t stream); __C __export infiniStatus_t infinirtFreeAsync(void *ptr, infinirtStream_t stream); +// Virtual memory & physical memory +typedef void *infinirtMemProp_t; +typedef void *infinirtDeviceptr_t; +typedef void *infinirtAllocationHandle_t; + +// Represents a physical memory allocation, mirroring Rust's PhyMem. +struct infinirtPhyMem { + infinirtAllocationHandle_t handle; // Opaque handle to physical memory + size_t len; + infinirtMemProp_t prop; +}; + +// Represents a vacant region, storing its length. +using infinirtVacantRegion = size_t; +// Represents a mapped region, holding a shared pointer to the physical memory object. +using infinirtMappedRegion = std::shared_ptr; +// A region in virtual memory can be either mapped or vacant. +using infinirtPhyRegion = std::variant; + +struct infinirtVirtualMemManager { + infinirtDeviceptr_t device_ptr; + size_t len; + // Maps offset to a physical region (mapped or vacant). + std::map map; +}; + +__C __export infiniStatus_t infinirtGetMemProp(infinirtMemProp_t *prop, infiniDevice_t device, int device_id); +__C __export infiniStatus_t infinirtGetMemGranularityMinimum(size_t *granularity, infinirtMemProp_t prop); +__C __export infiniStatus_t infinirtCreatePhysicalMem(infinirtPhyMem *phy_mem, size_t len, infinirtMemProp_t prop); + +__C __export infiniStatus_t infinirtCreateVirtualMemManager(infinirtVirtualMemManager *vm, infiniDevice_t device, size_t len, size_t min_addr); +__C __export infiniStatus_t infinirtMapVirtualMem(void **mapped_ptr, infinirtVirtualMemManager *vm, size_t offset, infinirtPhyMem *phy_mem); +__C __export infiniStatus_t infinirtUnmapVirtualMem(infinirtVirtualMemManager *vm, size_t offset); + #endif // __INFINIRT_API_H__ diff --git a/src/infinirt-test/main.cc b/src/infinirt-test/main.cc index 72c891220..b6a5968bd 100644 --- a/src/infinirt-test/main.cc +++ b/src/infinirt-test/main.cc @@ -98,6 +98,12 @@ int main(int argc, char *argv[]) { return 1; } } + + if (device == INFINI_DEVICE_NVIDIA) { + if (!testVirtualMem(device, deviceId)) { + return 1; + } + } } return 0; diff --git a/src/infinirt-test/test.cc b/src/infinirt-test/test.cc index 0c46888c0..492900ad3 100644 --- a/src/infinirt-test/test.cc +++ b/src/infinirt-test/test.cc @@ -1,7 +1,10 @@ #include "test.h" +#include #include #include #include +#include +#include bool testMemcpy(infiniDevice_t device, int deviceId, size_t dataSize) { @@ -91,3 +94,89 @@ bool testSetDevice(infiniDevice_t device, int deviceId) { return true; } + +bool testVirtualMem(infiniDevice_t device, int deviceId) { + std::cout << "==============================================\n" + << "Testing virtual memory on Device ID: " << deviceId << "\n" + << "==============================================" << std::endl; + + infinirtMemProp_t prop; + if (infinirtGetMemProp(&prop, device, deviceId) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to get memory property for device " << device << " with ID " << deviceId << std::endl; + return false; + } + size_t min_granularity; + if (infinirtGetMemGranularityMinimum(&min_granularity, prop) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to get memory granularity minimum for device " << device << " with ID " << deviceId << std::endl; + return false; + } + std::cout << "Memory granularity minimum: " << min_granularity << " bytes" << std::endl; + + infinirtVirtualMemManager vm; + if (infinirtCreateVirtualMemManager(&vm, device, 10 * min_granularity, 0) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to reserve virtual memory for device " << device << " with ID " << deviceId << std::endl; + return false; + } + std::cout << "Virtual memory reserved: " << vm.len << " bytes" << std::endl; + + infinirtPhyMem phy_mem; + if (infinirtCreatePhysicalMem(&phy_mem, min_granularity, prop) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to create physical memory for device " << device << " with ID " << deviceId << std::endl; + return false; + } + std::cout << "Physical memory created: " << phy_mem.len << " bytes" << std::endl; + + void *mapped_ptr; + if (infinirtMapVirtualMem(&mapped_ptr, &vm, min_granularity, &phy_mem) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to map virtual memory for device " << device << " with ID " << deviceId << std::endl; + return false; + } + std::cout << "Virtual memory mapped at address: " << mapped_ptr << std::endl; + + size_t num_elements = min_granularity / sizeof(size_t); + std::vector host_data(num_elements); + std::iota(host_data.begin(), host_data.end(), 0); + + if (infinirtMemcpy(mapped_ptr, host_data.data(), min_granularity, INFINIRT_MEMCPY_H2D) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to copy data from host to device." << std::endl; + return false; + } + + infinirtVirtualMemManager vm2; + if (infinirtCreateVirtualMemManager(&vm2, device, 2 * min_granularity, 0) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to reserve second virtual memory for device " << device << " with ID " << deviceId << std::endl; + return false; + } + + void *mapped_ptr2; + if (infinirtMapVirtualMem(&mapped_ptr2, &vm2, min_granularity, &phy_mem) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to map second virtual memory for device " << device << " with ID " << deviceId << std::endl; + return false; + } + + std::vector host_data2(num_elements, 0); + if (infinirtMemcpy(host_data2.data(), mapped_ptr2, min_granularity, INFINIRT_MEMCPY_D2H) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to copy data from device to host." << std::endl; + return false; + } + + if (!std::equal(host_data.begin(), host_data.end(), host_data2.begin())) { + std::cerr << "Data mismatch between host_data and host_data2." << std::endl; + return false; + } + + std::cout << "Unmapping virtual memory..." << std::endl; + if (infinirtUnmapVirtualMem(&vm, min_granularity) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to unmap virtual memory for device " << device << " with ID " << deviceId << std::endl; + return false; + } + + if (infinirtMemcpy(host_data.data(), mapped_ptr, min_granularity, INFINIRT_MEMCPY_D2H) == INFINI_STATUS_SUCCESS) { + std::cerr << "Memory access after unmap should fail, but it succeeded." << std::endl; + return false; + } + + std::cout << "Virtual memory test PASSED!" << std::endl; + + return true; +} diff --git a/src/infinirt-test/test.h b/src/infinirt-test/test.h index 6c4d56fff..2645e49cd 100644 --- a/src/infinirt-test/test.h +++ b/src/infinirt-test/test.h @@ -4,5 +4,7 @@ bool testSetDevice(infiniDevice_t device, int deviceId); bool testMemcpy(infiniDevice_t device, int deviceId, size_t dataSize); +bool testVirtualMem(infiniDevice_t device, int deviceId); +bool testVirtualMemUnmap(infiniDevice_t device, int deviceId); #endif diff --git a/src/infinirt/cpu/infinirt_cpu.cc b/src/infinirt/cpu/infinirt_cpu.cc index ea46deb02..2ee3c135d 100644 --- a/src/infinirt/cpu/infinirt_cpu.cc +++ b/src/infinirt/cpu/infinirt_cpu.cc @@ -88,4 +88,27 @@ infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) { return freeDevice(ptr); } +infiniStatus_t getMemProp(infinirtMemProp_t *prop_ptr, infiniDevice_t device, int device_id) { + return INFINI_STATUS_NOT_IMPLEMENTED; +} + +infiniStatus_t getMemGranularityMinimum(size_t *granularity, infinirtMemProp_t prop) { + return INFINI_STATUS_NOT_IMPLEMENTED; +} + +infiniStatus_t createPhysicalMem(infinirtPhyMem *phy_mem, size_t len, infinirtMemProp_t prop) { + return INFINI_STATUS_NOT_IMPLEMENTED; +} + +infiniStatus_t createVirtualMemManager(infinirtVirtualMemManager *vm, infiniDevice_t device, size_t len, size_t min_addr) { + return INFINI_STATUS_NOT_IMPLEMENTED; +} + +infiniStatus_t mapVirtualMem(void **mapped_ptr, infinirtVirtualMemManager *vm, size_t offset, infinirtPhyMem *phy_mem) { + return INFINI_STATUS_NOT_IMPLEMENTED; +} + +infiniStatus_t unmapVirtualMem(infinirtVirtualMemManager *vm, size_t offset) { + return INFINI_STATUS_NOT_IMPLEMENTED; +} } // namespace infinirt::cpu diff --git a/src/infinirt/cuda/infinirt_cuda.cu b/src/infinirt/cuda/infinirt_cuda.cu index cc41617ac..8b0ff1d15 100644 --- a/src/infinirt/cuda/infinirt_cuda.cu +++ b/src/infinirt/cuda/infinirt_cuda.cu @@ -1,6 +1,8 @@ #include "../../utils.h" #include "infinirt_cuda.cuh" +#include #include +#include #define CHECK_CUDART(RT_API) CHECK_INTERNAL(RT_API, cudaSuccess) @@ -134,4 +136,105 @@ infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) { CHECK_CUDART(cudaFreeAsync(ptr, (cudaStream_t)stream)); return INFINI_STATUS_SUCCESS; } + +infiniStatus_t getMemProp(infinirtMemProp_t *prop_ptr, infiniDevice_t device, int device_id) { + CUmemAllocationProp *cuda_prop = new CUmemAllocationProp(); + memset(cuda_prop, 0, sizeof(CUmemAllocationProp)); + cuda_prop->type = CU_MEM_ALLOCATION_TYPE_PINNED; + cuda_prop->requestedHandleTypes = CU_MEM_HANDLE_TYPE_NONE; + cuda_prop->location.type = CU_MEM_LOCATION_TYPE_DEVICE; + cuda_prop->location.id = device_id; + + *prop_ptr = cuda_prop; + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t getMemGranularityMinimum(size_t *granularity, infinirtMemProp_t prop) { + CHECK_CUDART(cuMemGetAllocationGranularity(granularity, (CUmemAllocationProp *)prop, CU_MEM_ALLOC_GRANULARITY_MINIMUM)); + + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t createPhysicalMem(infinirtPhyMem *phy_mem, size_t len, infinirtMemProp_t prop) { + CUmemGenericAllocationHandle handle; + CUmemAllocationProp *cuda_prop = (CUmemAllocationProp *)prop; + CHECK_CUDART(cuMemCreate(&handle, len, (CUmemAllocationProp *)prop, 0)); + phy_mem->handle = (infinirtAllocationHandle_t)handle; + phy_mem->len = len; + phy_mem->prop = prop; + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t createVirtualMemManager(infinirtVirtualMemManager *vm, infiniDevice_t device, size_t len, size_t min_addr) { + CUdeviceptr device_ptr; + CHECK_CUDART(cuMemAddressReserve(&device_ptr, len, 0, (CUdeviceptr)min_addr, 0)); + vm->device_ptr = (infinirtDeviceptr_t)device_ptr; + vm->len = len; + vm->map.clear(); + vm->map[0] = infinirtVacantRegion(len); + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t mapVirtualMem(void **mapped_ptr, infinirtVirtualMemManager *vm, size_t offset, + infinirtPhyMem *phy_mem) { + if (offset > vm->len || offset + phy_mem->len > vm->len) { + std::cerr << "Offset is out of range" + << " offset: " << offset << " phy_mem->len: " << phy_mem->len << " vm->len: " << vm->len << std::endl; + return INFINI_STATUS_BAD_PARAM; + } + auto it = vm->map.upper_bound(offset); + --it; + auto &[head, region] = *it; + + if (auto *vacant = std::get_if(®ion)) { + if (phy_mem->len > *vacant) { + std::cerr << "Physical memory length is greater than the vacant region length" << std::endl; + return INFINI_STATUS_BAD_PARAM; + } + + CUdeviceptr ptr = (CUdeviceptr)vm->device_ptr + offset; + CHECK_CUDART(cuMemMap(ptr, phy_mem->len, 0, (CUmemGenericAllocationHandle)phy_mem->handle, 0)); + CUmemAccessDesc desc = {}; + auto prop = (CUmemAllocationProp *)phy_mem->prop; + desc.location = prop->location; + desc.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE; + CHECK_CUDART(cuMemSetAccess(ptr, phy_mem->len, &desc, 1)); + + vm->map.erase(it); + vm->map[offset] = std::make_shared(*phy_mem); + auto head_len = offset - head; + auto tail_len = *vacant - head_len - phy_mem->len; + if (head_len > 0) { + vm->map[head] = head_len; + } + if (tail_len > 0) { + vm->map[head + head_len + phy_mem->len] = tail_len; + } + + *mapped_ptr = (void *)ptr; + return INFINI_STATUS_SUCCESS; + } else { + std::cerr << "Virtual memory already mapped at offset: " << offset << std::endl; + return INFINI_STATUS_INTERNAL_ERROR; + } +} + +infiniStatus_t unmapVirtualMem(infinirtVirtualMemManager *vm, size_t offset) { + auto it = vm->map.find(offset); + if (it == vm->map.end()) { + return INFINI_STATUS_BAD_PARAM; + } + + if (auto *mapped = std::get_if(&it->second)) { + auto phy_mem = *mapped; + auto ptr = (CUdeviceptr)vm->device_ptr + offset; + CHECK_CUDART(cuMemUnmap(ptr, phy_mem->len)); + + it->second = phy_mem->len; + return INFINI_STATUS_SUCCESS; + } else { + return INFINI_STATUS_BAD_PARAM; + } +} + } // namespace infinirt::cuda diff --git a/src/infinirt/infinirt.cc b/src/infinirt/infinirt.cc index d57841532..87587c34e 100644 --- a/src/infinirt/infinirt.cc +++ b/src/infinirt/infinirt.cc @@ -170,3 +170,28 @@ __C infiniStatus_t infinirtMallocAsync(void **p_ptr, size_t size, infinirtStream __C infiniStatus_t infinirtFreeAsync(void *ptr, infinirtStream_t stream) { INFINIRT_CALL_DEVICE_API(freeAsync, (ptr, stream)); } + +__C infiniStatus_t infinirtGetMemProp(infinirtMemProp_t *prop, infiniDevice_t device, int device_id) { + INFINIRT_CALL_DEVICE_API_AND(device, getMemProp, (prop, device, device_id), {}); +} + +__C infiniStatus_t infinirtGetMemGranularityMinimum(size_t *granularity, infinirtMemProp_t prop) { + INFINIRT_CALL_DEVICE_API(getMemGranularityMinimum, (granularity, prop)); +} + +__C infiniStatus_t infinirtCreatePhysicalMem(infinirtPhyMem *phy_mem, size_t len, infinirtMemProp_t prop) { + INFINIRT_CALL_DEVICE_API(createPhysicalMem, (phy_mem, len, prop)); +} + +__C infiniStatus_t infinirtCreateVirtualMemManager(infinirtVirtualMemManager *vm, infiniDevice_t device, size_t len, size_t min_addr) { + INFINIRT_CALL_DEVICE_API(createVirtualMemManager, (vm, device, len, min_addr)); +} + +__C infiniStatus_t infinirtMapVirtualMem(void **mapped_ptr, infinirtVirtualMemManager *vm, size_t offset, + infinirtPhyMem *phy_mem) { + INFINIRT_CALL_DEVICE_API(mapVirtualMem, (mapped_ptr, vm, offset, phy_mem)); +} + +__C infiniStatus_t infinirtUnmapVirtualMem(infinirtVirtualMemManager *vm, size_t offset) { + INFINIRT_CALL_DEVICE_API(unmapVirtualMem, (vm, offset)); +} diff --git a/src/infinirt/infinirt_impl.h b/src/infinirt/infinirt_impl.h index 0d6f8cf05..2445e18ba 100644 --- a/src/infinirt/infinirt_impl.h +++ b/src/infinirt/infinirt_impl.h @@ -2,35 +2,40 @@ #define __INFINIRT_IMPL_H__ #include "infinirt.h" -#define INFINIRT_DEVICE_API(IMPL, COUNT) \ - infiniStatus_t getDeviceCount(int *count) COUNT; \ - infiniStatus_t setDevice(int device_id) IMPL; \ - infiniStatus_t deviceSynchronize() IMPL; \ - \ - infiniStatus_t streamCreate(infinirtStream_t *stream_ptr) IMPL; \ - infiniStatus_t streamDestroy(infinirtStream_t stream) IMPL; \ - infiniStatus_t streamSynchronize(infinirtStream_t stream) IMPL; \ - infiniStatus_t streamWaitEvent(infinirtStream_t stream, infinirtEvent_t event) IMPL; \ - \ - infiniStatus_t eventCreate(infinirtEvent_t *event_ptr) IMPL; \ - infiniStatus_t eventRecord(infinirtEvent_t event, infinirtStream_t stream) IMPL; \ - infiniStatus_t eventQuery(infinirtEvent_t event, infinirtEventStatus_t *status_ptr) IMPL; \ - infiniStatus_t eventSynchronize(infinirtEvent_t event) IMPL; \ - infiniStatus_t eventDestroy(infinirtEvent_t event) IMPL; \ - \ - infiniStatus_t mallocDevice(void **p_ptr, size_t size) IMPL; \ - infiniStatus_t mallocHost(void **p_ptr, size_t size) IMPL; \ - infiniStatus_t freeDevice(void *ptr) IMPL; \ - infiniStatus_t freeHost(void *ptr) IMPL; \ - \ - infiniStatus_t memcpy(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind) IMPL; \ - infiniStatus_t memcpyAsync(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind, infinirtStream_t stream) IMPL; \ - \ - infiniStatus_t mallocAsync(void **p_ptr, size_t size, infinirtStream_t stream) IMPL; \ - infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) IMPL; +#define INFINIRT_DEVICE_API(IMPL, COUNT) \ + infiniStatus_t getDeviceCount(int *count) COUNT; \ + infiniStatus_t setDevice(int device_id) IMPL; \ + infiniStatus_t deviceSynchronize() IMPL; \ + \ + infiniStatus_t streamCreate(infinirtStream_t *stream_ptr) IMPL; \ + infiniStatus_t streamDestroy(infinirtStream_t stream) IMPL; \ + infiniStatus_t streamSynchronize(infinirtStream_t stream) IMPL; \ + infiniStatus_t streamWaitEvent(infinirtStream_t stream, infinirtEvent_t event) IMPL; \ + \ + infiniStatus_t eventCreate(infinirtEvent_t *event_ptr) IMPL; \ + infiniStatus_t eventRecord(infinirtEvent_t event, infinirtStream_t stream) IMPL; \ + infiniStatus_t eventQuery(infinirtEvent_t event, infinirtEventStatus_t *status_ptr) IMPL; \ + infiniStatus_t eventSynchronize(infinirtEvent_t event) IMPL; \ + infiniStatus_t eventDestroy(infinirtEvent_t event) IMPL; \ + \ + infiniStatus_t mallocDevice(void **p_ptr, size_t size) IMPL; \ + infiniStatus_t mallocHost(void **p_ptr, size_t size) IMPL; \ + infiniStatus_t freeDevice(void *ptr) IMPL; \ + infiniStatus_t freeHost(void *ptr) IMPL; \ + \ + infiniStatus_t memcpy(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind) IMPL; \ + infiniStatus_t memcpyAsync(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind, infinirtStream_t stream) IMPL; \ + \ + infiniStatus_t mallocAsync(void **p_ptr, size_t size, infinirtStream_t stream) IMPL; \ + infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) IMPL; \ + infiniStatus_t getMemProp(infinirtMemProp_t *prop_ptr, infiniDevice_t device, int device_id) IMPL; \ + infiniStatus_t getMemGranularityMinimum(size_t *granularity, infinirtMemProp_t prop) IMPL; \ + infiniStatus_t createPhysicalMem(infinirtPhyMem *phy_mem, size_t len, infinirtMemProp_t prop) IMPL; \ + infiniStatus_t createVirtualMemManager(infinirtVirtualMemManager *vm, infiniDevice_t device, size_t len, size_t min_addr) IMPL; \ + infiniStatus_t mapVirtualMem(void **mapped_ptr, infinirtVirtualMemManager *vm, size_t offset, infinirtPhyMem *phy_mem) IMPL; \ + infiniStatus_t unmapVirtualMem(infinirtVirtualMemManager *vm, size_t offset) IMPL; #define INFINIRT_DEVICE_API_IMPL INFINIRT_DEVICE_API(, ) -#define INFINIRT_DEVICE_API_NOOP INFINIRT_DEVICE_API({ return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; }, \ - {*count = 0; return INFINI_STATUS_SUCCESS; }) +#define INFINIRT_DEVICE_API_NOOP INFINIRT_DEVICE_API({ return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; }, {*count = 0; return INFINI_STATUS_SUCCESS; }) #endif // __INFINIRT_IMPL_H__ diff --git a/xmake/nvidia.lua b/xmake/nvidia.lua index 797edcb5e..7c2b65f94 100644 --- a/xmake/nvidia.lua +++ b/xmake/nvidia.lua @@ -69,6 +69,21 @@ target("infinirt-nvidia") set_toolchains("cuda") add_links("cudart") + on_load(function (target) + import("lib.detect.find_tool") + local nvcc = find_tool("nvcc") + if nvcc ~= nil then + if is_plat("windows") then + nvcc_path = os.iorun("where nvcc"):match("(.-)\r?\n") + else + nvcc_path = nvcc.program + end + + target:add("linkdirs", path.directory(path.directory(nvcc_path)) .. "/lib64/stubs") + target:add("links", "cuda") + end + end) + if is_plat("windows") then add_cuflags("-Xcompiler=/utf-8", "--expt-relaxed-constexpr", "--allow-unsupported-compiler") add_cxxflags("/FS") From adfb447fe557d7ef6016ce9bfcc401583a8b191c Mon Sep 17 00:00:00 2001 From: Ceng <441651826@qq.com> Date: Wed, 6 Aug 2025 17:08:43 +0800 Subject: [PATCH 2/4] update interface of virtual memory Signed-off-by: Ceng <441651826@qq.com> --- include/infinirt.h | 42 ++---- src/infinirt-test/test.cc | 204 ++++++++++++++++++----------- src/infinirt/cpu/infinirt_cpu.cc | 16 ++- src/infinirt/cuda/infinirt_cuda.cu | 136 +++++++++++++------ src/infinirt/infinirt.cc | 26 ++-- src/infinirt/infinirt_impl.h | 65 ++++----- 6 files changed, 295 insertions(+), 194 deletions(-) diff --git a/include/infinirt.h b/include/infinirt.h index 1a4438f1f..0dbcb2606 100644 --- a/include/infinirt.h +++ b/include/infinirt.h @@ -3,11 +3,6 @@ #include "infinicore.h" -#include -#include -#include -#include - typedef void *infinirtStream_t; typedef void *infinirtEvent_t; @@ -59,37 +54,18 @@ __C __export infiniStatus_t infinirtMallocAsync(void **p_ptr, size_t size, infin __C __export infiniStatus_t infinirtFreeAsync(void *ptr, infinirtStream_t stream); // Virtual memory & physical memory -typedef void *infinirtMemProp_t; typedef void *infinirtDeviceptr_t; typedef void *infinirtAllocationHandle_t; +typedef void *infinirtPhyMem_t; +typedef void *infinirtVirtualMem_t; -// Represents a physical memory allocation, mirroring Rust's PhyMem. -struct infinirtPhyMem { - infinirtAllocationHandle_t handle; // Opaque handle to physical memory - size_t len; - infinirtMemProp_t prop; -}; - -// Represents a vacant region, storing its length. -using infinirtVacantRegion = size_t; -// Represents a mapped region, holding a shared pointer to the physical memory object. -using infinirtMappedRegion = std::shared_ptr; -// A region in virtual memory can be either mapped or vacant. -using infinirtPhyRegion = std::variant; - -struct infinirtVirtualMemManager { - infinirtDeviceptr_t device_ptr; - size_t len; - // Maps offset to a physical region (mapped or vacant). - std::map map; -}; - -__C __export infiniStatus_t infinirtGetMemProp(infinirtMemProp_t *prop, infiniDevice_t device, int device_id); -__C __export infiniStatus_t infinirtGetMemGranularityMinimum(size_t *granularity, infinirtMemProp_t prop); -__C __export infiniStatus_t infinirtCreatePhysicalMem(infinirtPhyMem *phy_mem, size_t len, infinirtMemProp_t prop); +__C __export infiniStatus_t infinirtGetMemGranularityMinimum(size_t *granularity); +__C __export infiniStatus_t infinirtCreatePhysicalMem(infinirtPhyMem_t *phy_mem, size_t len); +__C __export infiniStatus_t infinirtReleasePhysicalMem(infinirtPhyMem_t phy_mem); -__C __export infiniStatus_t infinirtCreateVirtualMemManager(infinirtVirtualMemManager *vm, infiniDevice_t device, size_t len, size_t min_addr); -__C __export infiniStatus_t infinirtMapVirtualMem(void **mapped_ptr, infinirtVirtualMemManager *vm, size_t offset, infinirtPhyMem *phy_mem); -__C __export infiniStatus_t infinirtUnmapVirtualMem(infinirtVirtualMemManager *vm, size_t offset); +__C __export infiniStatus_t infinirtCreateVirtualMem(infinirtVirtualMem_t *vm, size_t len); +__C __export infiniStatus_t infinirtMapVirtualMem(void **mapped_ptr, infinirtVirtualMem_t vm, size_t offset, infinirtPhyMem_t phy_mem); +__C __export infiniStatus_t infinirtUnmapVirtualMem(infinirtVirtualMem_t vm, size_t offset); +__C __export infiniStatus_t infinirtReleaseVirtualMem(infinirtVirtualMem_t vm); #endif // __INFINIRT_API_H__ diff --git a/src/infinirt-test/test.cc b/src/infinirt-test/test.cc index 492900ad3..a40d8d742 100644 --- a/src/infinirt-test/test.cc +++ b/src/infinirt-test/test.cc @@ -100,83 +100,141 @@ bool testVirtualMem(infiniDevice_t device, int deviceId) { << "Testing virtual memory on Device ID: " << deviceId << "\n" << "==============================================" << std::endl; - infinirtMemProp_t prop; - if (infinirtGetMemProp(&prop, device, deviceId) != INFINI_STATUS_SUCCESS) { - std::cerr << "Failed to get memory property for device " << device << " with ID " << deviceId << std::endl; - return false; - } + // Get minimum granularity size_t min_granularity; - if (infinirtGetMemGranularityMinimum(&min_granularity, prop) != INFINI_STATUS_SUCCESS) { - std::cerr << "Failed to get memory granularity minimum for device " << device << " with ID " << deviceId << std::endl; + if (infinirtGetMemGranularityMinimum(&min_granularity) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to get memory granularity minimum" << std::endl; return false; } std::cout << "Memory granularity minimum: " << min_granularity << " bytes" << std::endl; - infinirtVirtualMemManager vm; - if (infinirtCreateVirtualMemManager(&vm, device, 10 * min_granularity, 0) != INFINI_STATUS_SUCCESS) { - std::cerr << "Failed to reserve virtual memory for device " << device << " with ID " << deviceId << std::endl; - return false; - } - std::cout << "Virtual memory reserved: " << vm.len << " bytes" << std::endl; - - infinirtPhyMem phy_mem; - if (infinirtCreatePhysicalMem(&phy_mem, min_granularity, prop) != INFINI_STATUS_SUCCESS) { - std::cerr << "Failed to create physical memory for device " << device << " with ID " << deviceId << std::endl; - return false; - } - std::cout << "Physical memory created: " << phy_mem.len << " bytes" << std::endl; - - void *mapped_ptr; - if (infinirtMapVirtualMem(&mapped_ptr, &vm, min_granularity, &phy_mem) != INFINI_STATUS_SUCCESS) { - std::cerr << "Failed to map virtual memory for device " << device << " with ID " << deviceId << std::endl; - return false; - } - std::cout << "Virtual memory mapped at address: " << mapped_ptr << std::endl; - - size_t num_elements = min_granularity / sizeof(size_t); - std::vector host_data(num_elements); - std::iota(host_data.begin(), host_data.end(), 0); - - if (infinirtMemcpy(mapped_ptr, host_data.data(), min_granularity, INFINIRT_MEMCPY_H2D) != INFINI_STATUS_SUCCESS) { - std::cerr << "Failed to copy data from host to device." << std::endl; - return false; - } - - infinirtVirtualMemManager vm2; - if (infinirtCreateVirtualMemManager(&vm2, device, 2 * min_granularity, 0) != INFINI_STATUS_SUCCESS) { - std::cerr << "Failed to reserve second virtual memory for device " << device << " with ID " << deviceId << std::endl; - return false; - } - - void *mapped_ptr2; - if (infinirtMapVirtualMem(&mapped_ptr2, &vm2, min_granularity, &phy_mem) != INFINI_STATUS_SUCCESS) { - std::cerr << "Failed to map second virtual memory for device " << device << " with ID " << deviceId << std::endl; - return false; - } - - std::vector host_data2(num_elements, 0); - if (infinirtMemcpy(host_data2.data(), mapped_ptr2, min_granularity, INFINIRT_MEMCPY_D2H) != INFINI_STATUS_SUCCESS) { - std::cerr << "Failed to copy data from device to host." << std::endl; - return false; - } - - if (!std::equal(host_data.begin(), host_data.end(), host_data2.begin())) { - std::cerr << "Data mismatch between host_data and host_data2." << std::endl; - return false; - } - - std::cout << "Unmapping virtual memory..." << std::endl; - if (infinirtUnmapVirtualMem(&vm, min_granularity) != INFINI_STATUS_SUCCESS) { - std::cerr << "Failed to unmap virtual memory for device " << device << " with ID " << deviceId << std::endl; - return false; - } - - if (infinirtMemcpy(host_data.data(), mapped_ptr, min_granularity, INFINIRT_MEMCPY_D2H) == INFINI_STATUS_SUCCESS) { - std::cerr << "Memory access after unmap should fail, but it succeeded." << std::endl; - return false; - } - - std::cout << "Virtual memory test PASSED!" << std::endl; - + // Test 1: Basic virtual memory allocation and release + { + std::cout << "\nTest 1: Basic virtual memory allocation and release" << std::endl; + infinirtVirtualMem_t vm; + size_t vm_len = 10 * min_granularity; + if (infinirtCreateVirtualMem(&vm, vm_len) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to reserve virtual memory" << std::endl; + return false; + } + std::cout << "Virtual memory reserved: " << vm_len << " bytes" << std::endl; + + // Release virtual memory + if (infinirtReleaseVirtualMem(vm) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to release virtual memory" << std::endl; + return false; + } + std::cout << "Virtual memory released successfully" << std::endl; + } + + // Test 2: Physical memory allocation and release + { + std::cout << "\nTest 2: Physical memory allocation and release" << std::endl; + infinirtPhyMem_t phy_mem; + if (infinirtCreatePhysicalMem(&phy_mem, min_granularity) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to create physical memory" << std::endl; + return false; + } + std::cout << "Physical memory created: " << min_granularity << " bytes" << std::endl; + + // Release physical memory + if (infinirtReleasePhysicalMem(phy_mem) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to release physical memory" << std::endl; + return false; + } + std::cout << "Physical memory released successfully" << std::endl; + } + + // Test 3: Virtual memory mapping and unmapping with data verification + { + std::cout << "\nTest 3: Virtual memory mapping and data verification" << std::endl; + + // Create virtual memory regions + infinirtVirtualMem_t vm1, vm2; + size_t vm_len = 10 * min_granularity; + if (infinirtCreateVirtualMem(&vm1, vm_len) != INFINI_STATUS_SUCCESS || + infinirtCreateVirtualMem(&vm2, 2 * min_granularity) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to create virtual memory regions" << std::endl; + return false; + } + + // Create physical memory + infinirtPhyMem_t phy_mem; + if (infinirtCreatePhysicalMem(&phy_mem, min_granularity) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to create physical memory" << std::endl; + return false; + } + + // Map physical memory to both virtual memory regions + void *mapped_ptr1, *mapped_ptr2; + if (infinirtMapVirtualMem(&mapped_ptr1, vm1, min_granularity, phy_mem) != INFINI_STATUS_SUCCESS || + infinirtMapVirtualMem(&mapped_ptr2, vm2, min_granularity, phy_mem) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to map virtual memory" << std::endl; + return false; + } + + // Write data through first mapping + size_t num_elements = min_granularity / sizeof(size_t); + std::vector host_data(num_elements); + std::iota(host_data.begin(), host_data.end(), 0); + if (infinirtMemcpy(mapped_ptr1, host_data.data(), min_granularity, INFINIRT_MEMCPY_H2D) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to copy data to device" << std::endl; + return false; + } + + // Read data through second mapping + std::vector host_data2(num_elements, 0); + if (infinirtMemcpy(host_data2.data(), mapped_ptr2, min_granularity, INFINIRT_MEMCPY_D2H) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to copy data from device" << std::endl; + return false; + } + + // Verify data + if (!std::equal(host_data.begin(), host_data.end(), host_data2.begin())) { + std::cerr << "Data mismatch between mappings" << std::endl; + return false; + } + + // Test unmapping + if (infinirtUnmapVirtualMem(vm1, min_granularity) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to unmap virtual memory" << std::endl; + return false; + } + + // Verify memory access fails after unmapping + if (infinirtMemcpy(host_data.data(), mapped_ptr1, min_granularity, INFINIRT_MEMCPY_D2H) == INFINI_STATUS_SUCCESS) { + std::cerr << "Memory access after unmap should fail" << std::endl; + return false; + } + + // Clean up all resources + std::cout << "\nCleaning up resources..." << std::endl; + + // Unmap remaining mapping + if (infinirtUnmapVirtualMem(vm2, min_granularity) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to unmap second virtual memory" << std::endl; + return false; + } + + // Release physical memory + if (infinirtReleasePhysicalMem(phy_mem) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to release physical memory" << std::endl; + return false; + } + + // Release virtual memory regions + if (infinirtReleaseVirtualMem(vm1) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to release first virtual memory" << std::endl; + return false; + } + if (infinirtReleaseVirtualMem(vm2) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to release second virtual memory" << std::endl; + return false; + } + + std::cout << "All resources cleaned up successfully" << std::endl; + } + + std::cout << "\nAll virtual memory tests PASSED!" << std::endl; return true; } diff --git a/src/infinirt/cpu/infinirt_cpu.cc b/src/infinirt/cpu/infinirt_cpu.cc index 2ee3c135d..a7486ea53 100644 --- a/src/infinirt/cpu/infinirt_cpu.cc +++ b/src/infinirt/cpu/infinirt_cpu.cc @@ -88,27 +88,31 @@ infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) { return freeDevice(ptr); } -infiniStatus_t getMemProp(infinirtMemProp_t *prop_ptr, infiniDevice_t device, int device_id) { +infiniStatus_t getMemGranularityMinimum(size_t *granularity) { return INFINI_STATUS_NOT_IMPLEMENTED; } -infiniStatus_t getMemGranularityMinimum(size_t *granularity, infinirtMemProp_t prop) { +infiniStatus_t createPhysicalMem(infinirtPhyMem_t *phy_mem, size_t len) { return INFINI_STATUS_NOT_IMPLEMENTED; } -infiniStatus_t createPhysicalMem(infinirtPhyMem *phy_mem, size_t len, infinirtMemProp_t prop) { +infiniStatus_t releasePhysicalMem(infinirtPhyMem_t phy_mem) { return INFINI_STATUS_NOT_IMPLEMENTED; } -infiniStatus_t createVirtualMemManager(infinirtVirtualMemManager *vm, infiniDevice_t device, size_t len, size_t min_addr) { +infiniStatus_t createVirtualMem(infinirtVirtualMem_t *vm, size_t len) { return INFINI_STATUS_NOT_IMPLEMENTED; } -infiniStatus_t mapVirtualMem(void **mapped_ptr, infinirtVirtualMemManager *vm, size_t offset, infinirtPhyMem *phy_mem) { +infiniStatus_t releaseVirtualMem(infinirtVirtualMem_t vm) { return INFINI_STATUS_NOT_IMPLEMENTED; } -infiniStatus_t unmapVirtualMem(infinirtVirtualMemManager *vm, size_t offset) { +infiniStatus_t mapVirtualMem(void **mapped_ptr, infinirtVirtualMem_t vm, size_t offset, infinirtPhyMem_t phy_mem) { + return INFINI_STATUS_NOT_IMPLEMENTED; +} + +infiniStatus_t unmapVirtualMem(infinirtVirtualMem_t vm, size_t offset) { return INFINI_STATUS_NOT_IMPLEMENTED; } } // namespace infinirt::cpu diff --git a/src/infinirt/cuda/infinirt_cuda.cu b/src/infinirt/cuda/infinirt_cuda.cu index 8b0ff1d15..e197f9b81 100644 --- a/src/infinirt/cuda/infinirt_cuda.cu +++ b/src/infinirt/cuda/infinirt_cuda.cu @@ -2,11 +2,31 @@ #include "infinirt_cuda.cuh" #include #include +#include +#include #include +#include #define CHECK_CUDART(RT_API) CHECK_INTERNAL(RT_API, cudaSuccess) namespace infinirt::cuda { +// Internal struct definitions for opaque pointers +struct PhyMemImpl { + infinirtAllocationHandle_t handle; + size_t len; + CUmemAllocationProp *prop; +}; + +using VacantRegion = size_t; +using MappedRegion = std::shared_ptr; +using PhyRegion = std::variant; + +struct VirtualMemManagerImpl { + infinirtDeviceptr_t device_ptr; + size_t len; + std::map map; +}; + infiniStatus_t getDeviceCount(int *count) { CHECK_CUDART(cudaGetDeviceCount(count)); return INFINI_STATUS_SUCCESS; @@ -137,78 +157,115 @@ infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) { return INFINI_STATUS_SUCCESS; } -infiniStatus_t getMemProp(infinirtMemProp_t *prop_ptr, infiniDevice_t device, int device_id) { +CUmemAllocationProp *getMemProp() { + int device_id; + infinirtGetDevice(nullptr, &device_id); CUmemAllocationProp *cuda_prop = new CUmemAllocationProp(); memset(cuda_prop, 0, sizeof(CUmemAllocationProp)); cuda_prop->type = CU_MEM_ALLOCATION_TYPE_PINNED; cuda_prop->requestedHandleTypes = CU_MEM_HANDLE_TYPE_NONE; cuda_prop->location.type = CU_MEM_LOCATION_TYPE_DEVICE; cuda_prop->location.id = device_id; + return cuda_prop; +} + +infiniStatus_t getMemGranularityMinimum(size_t *granularity) { + CUmemAllocationProp *cuda_prop = getMemProp(); + CHECK_CUDART(cuMemGetAllocationGranularity(granularity, cuda_prop, CU_MEM_ALLOC_GRANULARITY_MINIMUM)); - *prop_ptr = cuda_prop; return INFINI_STATUS_SUCCESS; } -infiniStatus_t getMemGranularityMinimum(size_t *granularity, infinirtMemProp_t prop) { - CHECK_CUDART(cuMemGetAllocationGranularity(granularity, (CUmemAllocationProp *)prop, CU_MEM_ALLOC_GRANULARITY_MINIMUM)); +infiniStatus_t createPhysicalMem(infinirtPhyMem_t *phy_mem, size_t len) { + CUmemGenericAllocationHandle handle; + CUmemAllocationProp *cuda_prop = getMemProp(); + CHECK_CUDART(cuMemCreate(&handle, len, cuda_prop, 0)); + + PhyMemImpl *impl = new PhyMemImpl; + impl->handle = (infinirtAllocationHandle_t)handle; + impl->len = len; + impl->prop = cuda_prop; + *phy_mem = (infinirtPhyMem_t)impl; return INFINI_STATUS_SUCCESS; } -infiniStatus_t createPhysicalMem(infinirtPhyMem *phy_mem, size_t len, infinirtMemProp_t prop) { - CUmemGenericAllocationHandle handle; - CUmemAllocationProp *cuda_prop = (CUmemAllocationProp *)prop; - CHECK_CUDART(cuMemCreate(&handle, len, (CUmemAllocationProp *)prop, 0)); - phy_mem->handle = (infinirtAllocationHandle_t)handle; - phy_mem->len = len; - phy_mem->prop = prop; +infiniStatus_t releasePhysicalMem(infinirtPhyMem_t phy_mem) { + PhyMemImpl *impl = (PhyMemImpl *)phy_mem; + CHECK_CUDART(cuMemRelease((CUmemGenericAllocationHandle)impl->handle)); + delete impl->prop; + delete impl; return INFINI_STATUS_SUCCESS; } -infiniStatus_t createVirtualMemManager(infinirtVirtualMemManager *vm, infiniDevice_t device, size_t len, size_t min_addr) { +infiniStatus_t createVirtualMem(infinirtVirtualMem_t *vm, size_t len) { CUdeviceptr device_ptr; - CHECK_CUDART(cuMemAddressReserve(&device_ptr, len, 0, (CUdeviceptr)min_addr, 0)); - vm->device_ptr = (infinirtDeviceptr_t)device_ptr; - vm->len = len; - vm->map.clear(); - vm->map[0] = infinirtVacantRegion(len); + CHECK_CUDART(cuMemAddressReserve(&device_ptr, len, 0, (CUdeviceptr)0, 0)); + + VirtualMemManagerImpl *impl = new VirtualMemManagerImpl; + impl->device_ptr = (infinirtDeviceptr_t)device_ptr; + impl->len = len; + impl->map.clear(); + impl->map[0] = VacantRegion(len); + + *vm = (infinirtVirtualMem_t)impl; return INFINI_STATUS_SUCCESS; } -infiniStatus_t mapVirtualMem(void **mapped_ptr, infinirtVirtualMemManager *vm, size_t offset, - infinirtPhyMem *phy_mem) { - if (offset > vm->len || offset + phy_mem->len > vm->len) { +infiniStatus_t releaseVirtualMem(infinirtVirtualMem_t vm) { + VirtualMemManagerImpl *impl = (VirtualMemManagerImpl *)vm; + + // First unmap all mapped regions + for (auto &[offset, region] : impl->map) { + if (auto *mapped = std::get_if(®ion)) { + CUdeviceptr ptr = (CUdeviceptr)impl->device_ptr + offset; + CHECK_CUDART(cuMemUnmap(ptr, (*mapped)->len)); + } + } + + // Then free the virtual address space + CHECK_CUDART(cuMemAddressFree((CUdeviceptr)impl->device_ptr, impl->len)); + + delete impl; + return INFINI_STATUS_SUCCESS; +} + +infiniStatus_t mapVirtualMem(void **mapped_ptr, infinirtVirtualMem_t vm, size_t offset, + infinirtPhyMem_t phy_mem) { + VirtualMemManagerImpl *vm_impl = (VirtualMemManagerImpl *)vm; + PhyMemImpl *phy_impl = (PhyMemImpl *)phy_mem; + + if (offset > vm_impl->len || offset + phy_impl->len > vm_impl->len) { std::cerr << "Offset is out of range" - << " offset: " << offset << " phy_mem->len: " << phy_mem->len << " vm->len: " << vm->len << std::endl; + << " offset: " << offset << " phy_mem->len: " << phy_impl->len << " vm->len: " << vm_impl->len << std::endl; return INFINI_STATUS_BAD_PARAM; } - auto it = vm->map.upper_bound(offset); + auto it = vm_impl->map.upper_bound(offset); --it; auto &[head, region] = *it; - if (auto *vacant = std::get_if(®ion)) { - if (phy_mem->len > *vacant) { + if (auto *vacant = std::get_if(®ion)) { + if (phy_impl->len > *vacant) { std::cerr << "Physical memory length is greater than the vacant region length" << std::endl; return INFINI_STATUS_BAD_PARAM; } - CUdeviceptr ptr = (CUdeviceptr)vm->device_ptr + offset; - CHECK_CUDART(cuMemMap(ptr, phy_mem->len, 0, (CUmemGenericAllocationHandle)phy_mem->handle, 0)); + CUdeviceptr ptr = (CUdeviceptr)vm_impl->device_ptr + offset; + CHECK_CUDART(cuMemMap(ptr, phy_impl->len, 0, (CUmemGenericAllocationHandle)phy_impl->handle, 0)); CUmemAccessDesc desc = {}; - auto prop = (CUmemAllocationProp *)phy_mem->prop; - desc.location = prop->location; + desc.location = phy_impl->prop->location; desc.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE; - CHECK_CUDART(cuMemSetAccess(ptr, phy_mem->len, &desc, 1)); + CHECK_CUDART(cuMemSetAccess(ptr, phy_impl->len, &desc, 1)); - vm->map.erase(it); - vm->map[offset] = std::make_shared(*phy_mem); + vm_impl->map.erase(it); + vm_impl->map[offset] = std::make_shared(*phy_impl); auto head_len = offset - head; - auto tail_len = *vacant - head_len - phy_mem->len; + auto tail_len = *vacant - head_len - phy_impl->len; if (head_len > 0) { - vm->map[head] = head_len; + vm_impl->map[head] = head_len; } if (tail_len > 0) { - vm->map[head + head_len + phy_mem->len] = tail_len; + vm_impl->map[head + head_len + phy_impl->len] = tail_len; } *mapped_ptr = (void *)ptr; @@ -219,15 +276,16 @@ infiniStatus_t mapVirtualMem(void **mapped_ptr, infinirtVirtualMemManager *vm, s } } -infiniStatus_t unmapVirtualMem(infinirtVirtualMemManager *vm, size_t offset) { - auto it = vm->map.find(offset); - if (it == vm->map.end()) { +infiniStatus_t unmapVirtualMem(infinirtVirtualMem_t vm, size_t offset) { + VirtualMemManagerImpl *vm_impl = (VirtualMemManagerImpl *)vm; + auto it = vm_impl->map.find(offset); + if (it == vm_impl->map.end()) { return INFINI_STATUS_BAD_PARAM; } - if (auto *mapped = std::get_if(&it->second)) { + if (auto *mapped = std::get_if(&it->second)) { auto phy_mem = *mapped; - auto ptr = (CUdeviceptr)vm->device_ptr + offset; + auto ptr = (CUdeviceptr)vm_impl->device_ptr + offset; CHECK_CUDART(cuMemUnmap(ptr, phy_mem->len)); it->second = phy_mem->len; diff --git a/src/infinirt/infinirt.cc b/src/infinirt/infinirt.cc index 87587c34e..8783a81a2 100644 --- a/src/infinirt/infinirt.cc +++ b/src/infinirt/infinirt.cc @@ -171,27 +171,31 @@ __C infiniStatus_t infinirtFreeAsync(void *ptr, infinirtStream_t stream) { INFINIRT_CALL_DEVICE_API(freeAsync, (ptr, stream)); } -__C infiniStatus_t infinirtGetMemProp(infinirtMemProp_t *prop, infiniDevice_t device, int device_id) { - INFINIRT_CALL_DEVICE_API_AND(device, getMemProp, (prop, device, device_id), {}); +__C infiniStatus_t infinirtGetMemGranularityMinimum(size_t *granularity) { + INFINIRT_CALL_DEVICE_API(getMemGranularityMinimum, (granularity)); } -__C infiniStatus_t infinirtGetMemGranularityMinimum(size_t *granularity, infinirtMemProp_t prop) { - INFINIRT_CALL_DEVICE_API(getMemGranularityMinimum, (granularity, prop)); +__C infiniStatus_t infinirtCreatePhysicalMem(infinirtPhyMem_t *phy_mem, size_t len) { + INFINIRT_CALL_DEVICE_API(createPhysicalMem, (phy_mem, len)); } -__C infiniStatus_t infinirtCreatePhysicalMem(infinirtPhyMem *phy_mem, size_t len, infinirtMemProp_t prop) { - INFINIRT_CALL_DEVICE_API(createPhysicalMem, (phy_mem, len, prop)); +__C infiniStatus_t infinirtReleasePhysicalMem(infinirtPhyMem_t phy_mem) { + INFINIRT_CALL_DEVICE_API(releasePhysicalMem, (phy_mem)); } -__C infiniStatus_t infinirtCreateVirtualMemManager(infinirtVirtualMemManager *vm, infiniDevice_t device, size_t len, size_t min_addr) { - INFINIRT_CALL_DEVICE_API(createVirtualMemManager, (vm, device, len, min_addr)); +__C infiniStatus_t infinirtCreateVirtualMem(infinirtVirtualMem_t *vm, size_t len) { + INFINIRT_CALL_DEVICE_API(createVirtualMem, (vm, len)); } -__C infiniStatus_t infinirtMapVirtualMem(void **mapped_ptr, infinirtVirtualMemManager *vm, size_t offset, - infinirtPhyMem *phy_mem) { +__C infiniStatus_t infinirtMapVirtualMem(void **mapped_ptr, infinirtVirtualMem_t vm, size_t offset, + infinirtPhyMem_t phy_mem) { INFINIRT_CALL_DEVICE_API(mapVirtualMem, (mapped_ptr, vm, offset, phy_mem)); } -__C infiniStatus_t infinirtUnmapVirtualMem(infinirtVirtualMemManager *vm, size_t offset) { +__C infiniStatus_t infinirtUnmapVirtualMem(infinirtVirtualMem_t vm, size_t offset) { INFINIRT_CALL_DEVICE_API(unmapVirtualMem, (vm, offset)); } + +__C infiniStatus_t infinirtReleaseVirtualMem(infinirtVirtualMem_t vm) { + INFINIRT_CALL_DEVICE_API(releaseVirtualMem, (vm)); +} diff --git a/src/infinirt/infinirt_impl.h b/src/infinirt/infinirt_impl.h index 2445e18ba..658226603 100644 --- a/src/infinirt/infinirt_impl.h +++ b/src/infinirt/infinirt_impl.h @@ -2,38 +2,39 @@ #define __INFINIRT_IMPL_H__ #include "infinirt.h" -#define INFINIRT_DEVICE_API(IMPL, COUNT) \ - infiniStatus_t getDeviceCount(int *count) COUNT; \ - infiniStatus_t setDevice(int device_id) IMPL; \ - infiniStatus_t deviceSynchronize() IMPL; \ - \ - infiniStatus_t streamCreate(infinirtStream_t *stream_ptr) IMPL; \ - infiniStatus_t streamDestroy(infinirtStream_t stream) IMPL; \ - infiniStatus_t streamSynchronize(infinirtStream_t stream) IMPL; \ - infiniStatus_t streamWaitEvent(infinirtStream_t stream, infinirtEvent_t event) IMPL; \ - \ - infiniStatus_t eventCreate(infinirtEvent_t *event_ptr) IMPL; \ - infiniStatus_t eventRecord(infinirtEvent_t event, infinirtStream_t stream) IMPL; \ - infiniStatus_t eventQuery(infinirtEvent_t event, infinirtEventStatus_t *status_ptr) IMPL; \ - infiniStatus_t eventSynchronize(infinirtEvent_t event) IMPL; \ - infiniStatus_t eventDestroy(infinirtEvent_t event) IMPL; \ - \ - infiniStatus_t mallocDevice(void **p_ptr, size_t size) IMPL; \ - infiniStatus_t mallocHost(void **p_ptr, size_t size) IMPL; \ - infiniStatus_t freeDevice(void *ptr) IMPL; \ - infiniStatus_t freeHost(void *ptr) IMPL; \ - \ - infiniStatus_t memcpy(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind) IMPL; \ - infiniStatus_t memcpyAsync(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind, infinirtStream_t stream) IMPL; \ - \ - infiniStatus_t mallocAsync(void **p_ptr, size_t size, infinirtStream_t stream) IMPL; \ - infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) IMPL; \ - infiniStatus_t getMemProp(infinirtMemProp_t *prop_ptr, infiniDevice_t device, int device_id) IMPL; \ - infiniStatus_t getMemGranularityMinimum(size_t *granularity, infinirtMemProp_t prop) IMPL; \ - infiniStatus_t createPhysicalMem(infinirtPhyMem *phy_mem, size_t len, infinirtMemProp_t prop) IMPL; \ - infiniStatus_t createVirtualMemManager(infinirtVirtualMemManager *vm, infiniDevice_t device, size_t len, size_t min_addr) IMPL; \ - infiniStatus_t mapVirtualMem(void **mapped_ptr, infinirtVirtualMemManager *vm, size_t offset, infinirtPhyMem *phy_mem) IMPL; \ - infiniStatus_t unmapVirtualMem(infinirtVirtualMemManager *vm, size_t offset) IMPL; +#define INFINIRT_DEVICE_API(IMPL, COUNT) \ + infiniStatus_t getDeviceCount(int *count) COUNT; \ + infiniStatus_t setDevice(int device_id) IMPL; \ + infiniStatus_t deviceSynchronize() IMPL; \ + \ + infiniStatus_t streamCreate(infinirtStream_t *stream_ptr) IMPL; \ + infiniStatus_t streamDestroy(infinirtStream_t stream) IMPL; \ + infiniStatus_t streamSynchronize(infinirtStream_t stream) IMPL; \ + infiniStatus_t streamWaitEvent(infinirtStream_t stream, infinirtEvent_t event) IMPL; \ + \ + infiniStatus_t eventCreate(infinirtEvent_t *event_ptr) IMPL; \ + infiniStatus_t eventRecord(infinirtEvent_t event, infinirtStream_t stream) IMPL; \ + infiniStatus_t eventQuery(infinirtEvent_t event, infinirtEventStatus_t *status_ptr) IMPL; \ + infiniStatus_t eventSynchronize(infinirtEvent_t event) IMPL; \ + infiniStatus_t eventDestroy(infinirtEvent_t event) IMPL; \ + \ + infiniStatus_t mallocDevice(void **p_ptr, size_t size) IMPL; \ + infiniStatus_t mallocHost(void **p_ptr, size_t size) IMPL; \ + infiniStatus_t freeDevice(void *ptr) IMPL; \ + infiniStatus_t freeHost(void *ptr) IMPL; \ + \ + infiniStatus_t memcpy(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind) IMPL; \ + infiniStatus_t memcpyAsync(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind, infinirtStream_t stream) IMPL; \ + \ + infiniStatus_t mallocAsync(void **p_ptr, size_t size, infinirtStream_t stream) IMPL; \ + infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) IMPL; \ + infiniStatus_t getMemGranularityMinimum(size_t *granularity) IMPL; \ + infiniStatus_t createPhysicalMem(infinirtPhyMem_t *phy_mem, size_t len) IMPL; \ + infiniStatus_t releasePhysicalMem(infinirtPhyMem_t phy_mem) IMPL; \ + infiniStatus_t createVirtualMem(infinirtVirtualMem_t *vm, size_t len) IMPL; \ + infiniStatus_t mapVirtualMem(void **mapped_ptr, infinirtVirtualMem_t vm, size_t offset, infinirtPhyMem_t phy_mem) IMPL; \ + infiniStatus_t unmapVirtualMem(infinirtVirtualMem_t vm, size_t offset) IMPL; \ + infiniStatus_t releaseVirtualMem(infinirtVirtualMem_t vm) IMPL; #define INFINIRT_DEVICE_API_IMPL INFINIRT_DEVICE_API(, ) #define INFINIRT_DEVICE_API_NOOP INFINIRT_DEVICE_API({ return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; }, {*count = 0; return INFINI_STATUS_SUCCESS; }) From 349d98657b9fc9a4129265950617e65062b7d96a Mon Sep 17 00:00:00 2001 From: Ceng <441651826@qq.com> Date: Wed, 6 Aug 2025 17:17:57 +0800 Subject: [PATCH 3/4] fix format test Signed-off-by: Ceng <441651826@qq.com> --- src/infinirt-test/test.cc | 6 ++---- src/infinirt/infinirt_impl.h | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/infinirt-test/test.cc b/src/infinirt-test/test.cc index a40d8d742..2c3ac9834 100644 --- a/src/infinirt-test/test.cc +++ b/src/infinirt-test/test.cc @@ -152,8 +152,7 @@ bool testVirtualMem(infiniDevice_t device, int deviceId) { // Create virtual memory regions infinirtVirtualMem_t vm1, vm2; size_t vm_len = 10 * min_granularity; - if (infinirtCreateVirtualMem(&vm1, vm_len) != INFINI_STATUS_SUCCESS || - infinirtCreateVirtualMem(&vm2, 2 * min_granularity) != INFINI_STATUS_SUCCESS) { + if (infinirtCreateVirtualMem(&vm1, vm_len) != INFINI_STATUS_SUCCESS || infinirtCreateVirtualMem(&vm2, 2 * min_granularity) != INFINI_STATUS_SUCCESS) { std::cerr << "Failed to create virtual memory regions" << std::endl; return false; } @@ -167,8 +166,7 @@ bool testVirtualMem(infiniDevice_t device, int deviceId) { // Map physical memory to both virtual memory regions void *mapped_ptr1, *mapped_ptr2; - if (infinirtMapVirtualMem(&mapped_ptr1, vm1, min_granularity, phy_mem) != INFINI_STATUS_SUCCESS || - infinirtMapVirtualMem(&mapped_ptr2, vm2, min_granularity, phy_mem) != INFINI_STATUS_SUCCESS) { + if (infinirtMapVirtualMem(&mapped_ptr1, vm1, min_granularity, phy_mem) != INFINI_STATUS_SUCCESS || infinirtMapVirtualMem(&mapped_ptr2, vm2, min_granularity, phy_mem) != INFINI_STATUS_SUCCESS) { std::cerr << "Failed to map virtual memory" << std::endl; return false; } diff --git a/src/infinirt/infinirt_impl.h b/src/infinirt/infinirt_impl.h index 658226603..7c0b063d4 100644 --- a/src/infinirt/infinirt_impl.h +++ b/src/infinirt/infinirt_impl.h @@ -29,9 +29,9 @@ infiniStatus_t mallocAsync(void **p_ptr, size_t size, infinirtStream_t stream) IMPL; \ infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) IMPL; \ infiniStatus_t getMemGranularityMinimum(size_t *granularity) IMPL; \ - infiniStatus_t createPhysicalMem(infinirtPhyMem_t *phy_mem, size_t len) IMPL; \ + infiniStatus_t createPhysicalMem(infinirtPhyMem_t *phy_mem, size_t len) IMPL; \ infiniStatus_t releasePhysicalMem(infinirtPhyMem_t phy_mem) IMPL; \ - infiniStatus_t createVirtualMem(infinirtVirtualMem_t *vm, size_t len) IMPL; \ + infiniStatus_t createVirtualMem(infinirtVirtualMem_t *vm, size_t len) IMPL; \ infiniStatus_t mapVirtualMem(void **mapped_ptr, infinirtVirtualMem_t vm, size_t offset, infinirtPhyMem_t phy_mem) IMPL; \ infiniStatus_t unmapVirtualMem(infinirtVirtualMem_t vm, size_t offset) IMPL; \ infiniStatus_t releaseVirtualMem(infinirtVirtualMem_t vm) IMPL; From 8d8c4745ff755002570eabc7b8f7eafb436f92a4 Mon Sep 17 00:00:00 2001 From: Ceng <441651826@qq.com> Date: Tue, 12 Aug 2025 11:31:13 +0800 Subject: [PATCH 4/4] update interface of virtual memory Signed-off-by: Ceng <441651826@qq.com> --- include/infinirt.h | 17 ++-- src/infinirt-test/test.cc | 91 +++++++++++++++---- src/infinirt/cpu/infinirt_cpu.cc | 12 +-- src/infinirt/cuda/infinirt_cuda.cu | 135 +++++------------------------ src/infinirt/infinirt.cc | 24 ++--- src/infinirt/infinirt_impl.h | 12 +-- 6 files changed, 129 insertions(+), 162 deletions(-) diff --git a/include/infinirt.h b/include/infinirt.h index 0dbcb2606..82507a565 100644 --- a/include/infinirt.h +++ b/include/infinirt.h @@ -54,18 +54,15 @@ __C __export infiniStatus_t infinirtMallocAsync(void **p_ptr, size_t size, infin __C __export infiniStatus_t infinirtFreeAsync(void *ptr, infinirtStream_t stream); // Virtual memory & physical memory -typedef void *infinirtDeviceptr_t; -typedef void *infinirtAllocationHandle_t; -typedef void *infinirtPhyMem_t; -typedef void *infinirtVirtualMem_t; +typedef void *infinirtPhysicalMemoryHandle_t; __C __export infiniStatus_t infinirtGetMemGranularityMinimum(size_t *granularity); -__C __export infiniStatus_t infinirtCreatePhysicalMem(infinirtPhyMem_t *phy_mem, size_t len); -__C __export infiniStatus_t infinirtReleasePhysicalMem(infinirtPhyMem_t phy_mem); +__C __export infiniStatus_t infinirtCreatePhysicalMem(infinirtPhysicalMemoryHandle_t *pm_handle, size_t len); +__C __export infiniStatus_t infinirtReleasePhysicalMem(infinirtPhysicalMemoryHandle_t pm_handle); -__C __export infiniStatus_t infinirtCreateVirtualMem(infinirtVirtualMem_t *vm, size_t len); -__C __export infiniStatus_t infinirtMapVirtualMem(void **mapped_ptr, infinirtVirtualMem_t vm, size_t offset, infinirtPhyMem_t phy_mem); -__C __export infiniStatus_t infinirtUnmapVirtualMem(infinirtVirtualMem_t vm, size_t offset); -__C __export infiniStatus_t infinirtReleaseVirtualMem(infinirtVirtualMem_t vm); +__C __export infiniStatus_t infinirtCreateVirtualMem(void **vm, size_t len); +__C __export infiniStatus_t infinirtMapVirtualMem(void *vm, size_t len, size_t offset, infinirtPhysicalMemoryHandle_t pm_handle); +__C __export infiniStatus_t infinirtUnmapVirtualMem(void *vm, size_t len); +__C __export infiniStatus_t infinirtReleaseVirtualMem(void *vm, size_t len); #endif // __INFINIRT_API_H__ diff --git a/src/infinirt-test/test.cc b/src/infinirt-test/test.cc index 2c3ac9834..c2956a60b 100644 --- a/src/infinirt-test/test.cc +++ b/src/infinirt-test/test.cc @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -111,7 +112,7 @@ bool testVirtualMem(infiniDevice_t device, int deviceId) { // Test 1: Basic virtual memory allocation and release { std::cout << "\nTest 1: Basic virtual memory allocation and release" << std::endl; - infinirtVirtualMem_t vm; + void *vm; size_t vm_len = 10 * min_granularity; if (infinirtCreateVirtualMem(&vm, vm_len) != INFINI_STATUS_SUCCESS) { std::cerr << "Failed to reserve virtual memory" << std::endl; @@ -120,7 +121,7 @@ bool testVirtualMem(infiniDevice_t device, int deviceId) { std::cout << "Virtual memory reserved: " << vm_len << " bytes" << std::endl; // Release virtual memory - if (infinirtReleaseVirtualMem(vm) != INFINI_STATUS_SUCCESS) { + if (infinirtReleaseVirtualMem(vm, vm_len) != INFINI_STATUS_SUCCESS) { std::cerr << "Failed to release virtual memory" << std::endl; return false; } @@ -130,15 +131,15 @@ bool testVirtualMem(infiniDevice_t device, int deviceId) { // Test 2: Physical memory allocation and release { std::cout << "\nTest 2: Physical memory allocation and release" << std::endl; - infinirtPhyMem_t phy_mem; - if (infinirtCreatePhysicalMem(&phy_mem, min_granularity) != INFINI_STATUS_SUCCESS) { + infinirtPhysicalMemoryHandle_t pm_handle; + if (infinirtCreatePhysicalMem(&pm_handle, min_granularity) != INFINI_STATUS_SUCCESS) { std::cerr << "Failed to create physical memory" << std::endl; return false; } std::cout << "Physical memory created: " << min_granularity << " bytes" << std::endl; // Release physical memory - if (infinirtReleasePhysicalMem(phy_mem) != INFINI_STATUS_SUCCESS) { + if (infinirtReleasePhysicalMem(pm_handle) != INFINI_STATUS_SUCCESS) { std::cerr << "Failed to release physical memory" << std::endl; return false; } @@ -150,7 +151,7 @@ bool testVirtualMem(infiniDevice_t device, int deviceId) { std::cout << "\nTest 3: Virtual memory mapping and data verification" << std::endl; // Create virtual memory regions - infinirtVirtualMem_t vm1, vm2; + void *vm1, *vm2; size_t vm_len = 10 * min_granularity; if (infinirtCreateVirtualMem(&vm1, vm_len) != INFINI_STATUS_SUCCESS || infinirtCreateVirtualMem(&vm2, 2 * min_granularity) != INFINI_STATUS_SUCCESS) { std::cerr << "Failed to create virtual memory regions" << std::endl; @@ -158,15 +159,14 @@ bool testVirtualMem(infiniDevice_t device, int deviceId) { } // Create physical memory - infinirtPhyMem_t phy_mem; - if (infinirtCreatePhysicalMem(&phy_mem, min_granularity) != INFINI_STATUS_SUCCESS) { + infinirtPhysicalMemoryHandle_t pm_handle; + if (infinirtCreatePhysicalMem(&pm_handle, min_granularity) != INFINI_STATUS_SUCCESS) { std::cerr << "Failed to create physical memory" << std::endl; return false; } // Map physical memory to both virtual memory regions - void *mapped_ptr1, *mapped_ptr2; - if (infinirtMapVirtualMem(&mapped_ptr1, vm1, min_granularity, phy_mem) != INFINI_STATUS_SUCCESS || infinirtMapVirtualMem(&mapped_ptr2, vm2, min_granularity, phy_mem) != INFINI_STATUS_SUCCESS) { + if (infinirtMapVirtualMem(vm1, min_granularity, 0, pm_handle) != INFINI_STATUS_SUCCESS || infinirtMapVirtualMem(vm2, min_granularity, 0, pm_handle) != INFINI_STATUS_SUCCESS) { std::cerr << "Failed to map virtual memory" << std::endl; return false; } @@ -175,14 +175,14 @@ bool testVirtualMem(infiniDevice_t device, int deviceId) { size_t num_elements = min_granularity / sizeof(size_t); std::vector host_data(num_elements); std::iota(host_data.begin(), host_data.end(), 0); - if (infinirtMemcpy(mapped_ptr1, host_data.data(), min_granularity, INFINIRT_MEMCPY_H2D) != INFINI_STATUS_SUCCESS) { + if (infinirtMemcpy(vm1, host_data.data(), min_granularity, INFINIRT_MEMCPY_H2D) != INFINI_STATUS_SUCCESS) { std::cerr << "Failed to copy data to device" << std::endl; return false; } // Read data through second mapping std::vector host_data2(num_elements, 0); - if (infinirtMemcpy(host_data2.data(), mapped_ptr2, min_granularity, INFINIRT_MEMCPY_D2H) != INFINI_STATUS_SUCCESS) { + if (infinirtMemcpy(host_data2.data(), vm2, min_granularity, INFINIRT_MEMCPY_D2H) != INFINI_STATUS_SUCCESS) { std::cerr << "Failed to copy data from device" << std::endl; return false; } @@ -200,7 +200,7 @@ bool testVirtualMem(infiniDevice_t device, int deviceId) { } // Verify memory access fails after unmapping - if (infinirtMemcpy(host_data.data(), mapped_ptr1, min_granularity, INFINIRT_MEMCPY_D2H) == INFINI_STATUS_SUCCESS) { + if (infinirtMemcpy(host_data.data(), vm1, min_granularity, INFINIRT_MEMCPY_D2H) == INFINI_STATUS_SUCCESS) { std::cerr << "Memory access after unmap should fail" << std::endl; return false; } @@ -215,17 +215,17 @@ bool testVirtualMem(infiniDevice_t device, int deviceId) { } // Release physical memory - if (infinirtReleasePhysicalMem(phy_mem) != INFINI_STATUS_SUCCESS) { + if (infinirtReleasePhysicalMem(pm_handle) != INFINI_STATUS_SUCCESS) { std::cerr << "Failed to release physical memory" << std::endl; return false; } // Release virtual memory regions - if (infinirtReleaseVirtualMem(vm1) != INFINI_STATUS_SUCCESS) { + if (infinirtReleaseVirtualMem(vm1, vm_len) != INFINI_STATUS_SUCCESS) { std::cerr << "Failed to release first virtual memory" << std::endl; return false; } - if (infinirtReleaseVirtualMem(vm2) != INFINI_STATUS_SUCCESS) { + if (infinirtReleaseVirtualMem(vm2, 2 * min_granularity) != INFINI_STATUS_SUCCESS) { std::cerr << "Failed to release second virtual memory" << std::endl; return false; } @@ -233,6 +233,65 @@ bool testVirtualMem(infiniDevice_t device, int deviceId) { std::cout << "All resources cleaned up successfully" << std::endl; } + // Test 4: Release virtual memory without unmapping + { + std::cout << "\nTest 4: Release virtual memory without unmapping" << std::endl; + + // Create virtual memory + void *vm; + size_t vm_len = 2 * min_granularity; + if (infinirtCreateVirtualMem(&vm, vm_len) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to create virtual memory" << std::endl; + return false; + } + + // Create physical memory + infinirtPhysicalMemoryHandle_t pm_handle; + if (infinirtCreatePhysicalMem(&pm_handle, min_granularity) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to create physical memory" << std::endl; + infinirtReleaseVirtualMem(vm, vm_len); + return false; + } + + // Map virtual memory to physical memory + if (infinirtMapVirtualMem(vm, min_granularity, 0, pm_handle) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to map virtual memory" << std::endl; + infinirtReleasePhysicalMem(pm_handle); + infinirtReleaseVirtualMem(vm, vm_len); + return false; + } + + std::cout << "Attempting to release virtual memory without unmapping first..." << std::endl; + // Try to release virtual memory without unmapping - this should fail + if (infinirtReleaseVirtualMem(vm, vm_len) == INFINI_STATUS_SUCCESS) { + std::cerr << "ERROR: Virtual memory release succeeded without unmapping first!" << std::endl; + // Clean up anyway + infinirtUnmapVirtualMem(vm, min_granularity); + infinirtReleasePhysicalMem(pm_handle); + return false; + } + std::cout << "As expected, virtual memory release failed when mapped" << std::endl; + + // Clean up properly + if (infinirtUnmapVirtualMem(vm, min_granularity) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to unmap virtual memory during cleanup" << std::endl; + infinirtReleasePhysicalMem(pm_handle); + return false; + } + + if (infinirtReleasePhysicalMem(pm_handle) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to release physical memory during cleanup" << std::endl; + return false; + } + + // Now release should succeed + if (infinirtReleaseVirtualMem(vm, vm_len) != INFINI_STATUS_SUCCESS) { + std::cerr << "Failed to release virtual memory after unmapping" << std::endl; + return false; + } + std::cout << "Successfully released virtual memory after proper unmapping" << std::endl; + } + std::cout << "\nAll virtual memory tests PASSED!" << std::endl; return true; } diff --git a/src/infinirt/cpu/infinirt_cpu.cc b/src/infinirt/cpu/infinirt_cpu.cc index a7486ea53..196ac144e 100644 --- a/src/infinirt/cpu/infinirt_cpu.cc +++ b/src/infinirt/cpu/infinirt_cpu.cc @@ -92,27 +92,27 @@ infiniStatus_t getMemGranularityMinimum(size_t *granularity) { return INFINI_STATUS_NOT_IMPLEMENTED; } -infiniStatus_t createPhysicalMem(infinirtPhyMem_t *phy_mem, size_t len) { +infiniStatus_t createPhysicalMem(infinirtPhysicalMemoryHandle_t *pm_handle, size_t len) { return INFINI_STATUS_NOT_IMPLEMENTED; } -infiniStatus_t releasePhysicalMem(infinirtPhyMem_t phy_mem) { +infiniStatus_t releasePhysicalMem(infinirtPhysicalMemoryHandle_t pm_handle) { return INFINI_STATUS_NOT_IMPLEMENTED; } -infiniStatus_t createVirtualMem(infinirtVirtualMem_t *vm, size_t len) { +infiniStatus_t createVirtualMem(void **vm, size_t len) { return INFINI_STATUS_NOT_IMPLEMENTED; } -infiniStatus_t releaseVirtualMem(infinirtVirtualMem_t vm) { +infiniStatus_t releaseVirtualMem(void *vm, size_t len) { return INFINI_STATUS_NOT_IMPLEMENTED; } -infiniStatus_t mapVirtualMem(void **mapped_ptr, infinirtVirtualMem_t vm, size_t offset, infinirtPhyMem_t phy_mem) { +infiniStatus_t mapVirtualMem(void *vm, size_t len, size_t offset, infinirtPhysicalMemoryHandle_t pm_handle) { return INFINI_STATUS_NOT_IMPLEMENTED; } -infiniStatus_t unmapVirtualMem(infinirtVirtualMem_t vm, size_t offset) { +infiniStatus_t unmapVirtualMem(void *vm, size_t len) { return INFINI_STATUS_NOT_IMPLEMENTED; } } // namespace infinirt::cpu diff --git a/src/infinirt/cuda/infinirt_cuda.cu b/src/infinirt/cuda/infinirt_cuda.cu index e197f9b81..700738808 100644 --- a/src/infinirt/cuda/infinirt_cuda.cu +++ b/src/infinirt/cuda/infinirt_cuda.cu @@ -2,30 +2,11 @@ #include "infinirt_cuda.cuh" #include #include -#include -#include #include -#include #define CHECK_CUDART(RT_API) CHECK_INTERNAL(RT_API, cudaSuccess) namespace infinirt::cuda { -// Internal struct definitions for opaque pointers -struct PhyMemImpl { - infinirtAllocationHandle_t handle; - size_t len; - CUmemAllocationProp *prop; -}; - -using VacantRegion = size_t; -using MappedRegion = std::shared_ptr; -using PhyRegion = std::variant; - -struct VirtualMemManagerImpl { - infinirtDeviceptr_t device_ptr; - size_t len; - std::map map; -}; infiniStatus_t getDeviceCount(int *count) { CHECK_CUDART(cudaGetDeviceCount(count)); @@ -176,123 +157,53 @@ infiniStatus_t getMemGranularityMinimum(size_t *granularity) { return INFINI_STATUS_SUCCESS; } -infiniStatus_t createPhysicalMem(infinirtPhyMem_t *phy_mem, size_t len) { +infiniStatus_t createPhysicalMem(infinirtPhysicalMemoryHandle_t *pm_handle, size_t len) { CUmemGenericAllocationHandle handle; CUmemAllocationProp *cuda_prop = getMemProp(); CHECK_CUDART(cuMemCreate(&handle, len, cuda_prop, 0)); - PhyMemImpl *impl = new PhyMemImpl; - impl->handle = (infinirtAllocationHandle_t)handle; - impl->len = len; - impl->prop = cuda_prop; - - *phy_mem = (infinirtPhyMem_t)impl; + *pm_handle = (infinirtPhysicalMemoryHandle_t)handle; return INFINI_STATUS_SUCCESS; } -infiniStatus_t releasePhysicalMem(infinirtPhyMem_t phy_mem) { - PhyMemImpl *impl = (PhyMemImpl *)phy_mem; - CHECK_CUDART(cuMemRelease((CUmemGenericAllocationHandle)impl->handle)); - delete impl->prop; - delete impl; +infiniStatus_t releasePhysicalMem(infinirtPhysicalMemoryHandle_t pm_handle) { + CHECK_CUDART(cuMemRelease((CUmemGenericAllocationHandle)pm_handle)); return INFINI_STATUS_SUCCESS; } -infiniStatus_t createVirtualMem(infinirtVirtualMem_t *vm, size_t len) { +infiniStatus_t createVirtualMem(void **vm, size_t len) { CUdeviceptr device_ptr; CHECK_CUDART(cuMemAddressReserve(&device_ptr, len, 0, (CUdeviceptr)0, 0)); - VirtualMemManagerImpl *impl = new VirtualMemManagerImpl; - impl->device_ptr = (infinirtDeviceptr_t)device_ptr; - impl->len = len; - impl->map.clear(); - impl->map[0] = VacantRegion(len); + *vm = (void *)device_ptr; + return INFINI_STATUS_SUCCESS; +} - *vm = (infinirtVirtualMem_t)impl; +infiniStatus_t releaseVirtualMem(void *vm, size_t len) { + CHECK_CUDART(cuMemAddressFree((CUdeviceptr)vm, len)); return INFINI_STATUS_SUCCESS; } -infiniStatus_t releaseVirtualMem(infinirtVirtualMem_t vm) { - VirtualMemManagerImpl *impl = (VirtualMemManagerImpl *)vm; +infiniStatus_t mapVirtualMem(void *vm, size_t len, size_t offset, + infinirtPhysicalMemoryHandle_t pm_handle) { - // First unmap all mapped regions - for (auto &[offset, region] : impl->map) { - if (auto *mapped = std::get_if(®ion)) { - CUdeviceptr ptr = (CUdeviceptr)impl->device_ptr + offset; - CHECK_CUDART(cuMemUnmap(ptr, (*mapped)->len)); - } - } + CUdeviceptr ptr = (CUdeviceptr)vm + offset; + CHECK_CUDART(cuMemMap(ptr, len, 0, (CUmemGenericAllocationHandle)pm_handle, 0)); - // Then free the virtual address space - CHECK_CUDART(cuMemAddressFree((CUdeviceptr)impl->device_ptr, impl->len)); + CUmemAllocationProp *cuda_prop = getMemProp(); + CUmemAccessDesc desc = {}; + desc.location = cuda_prop->location; + desc.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE; + CHECK_CUDART(cuMemSetAccess(ptr, len, &desc, 1)); - delete impl; return INFINI_STATUS_SUCCESS; } -infiniStatus_t mapVirtualMem(void **mapped_ptr, infinirtVirtualMem_t vm, size_t offset, - infinirtPhyMem_t phy_mem) { - VirtualMemManagerImpl *vm_impl = (VirtualMemManagerImpl *)vm; - PhyMemImpl *phy_impl = (PhyMemImpl *)phy_mem; - - if (offset > vm_impl->len || offset + phy_impl->len > vm_impl->len) { - std::cerr << "Offset is out of range" - << " offset: " << offset << " phy_mem->len: " << phy_impl->len << " vm->len: " << vm_impl->len << std::endl; - return INFINI_STATUS_BAD_PARAM; - } - auto it = vm_impl->map.upper_bound(offset); - --it; - auto &[head, region] = *it; - - if (auto *vacant = std::get_if(®ion)) { - if (phy_impl->len > *vacant) { - std::cerr << "Physical memory length is greater than the vacant region length" << std::endl; - return INFINI_STATUS_BAD_PARAM; - } - - CUdeviceptr ptr = (CUdeviceptr)vm_impl->device_ptr + offset; - CHECK_CUDART(cuMemMap(ptr, phy_impl->len, 0, (CUmemGenericAllocationHandle)phy_impl->handle, 0)); - CUmemAccessDesc desc = {}; - desc.location = phy_impl->prop->location; - desc.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE; - CHECK_CUDART(cuMemSetAccess(ptr, phy_impl->len, &desc, 1)); - - vm_impl->map.erase(it); - vm_impl->map[offset] = std::make_shared(*phy_impl); - auto head_len = offset - head; - auto tail_len = *vacant - head_len - phy_impl->len; - if (head_len > 0) { - vm_impl->map[head] = head_len; - } - if (tail_len > 0) { - vm_impl->map[head + head_len + phy_impl->len] = tail_len; - } - - *mapped_ptr = (void *)ptr; - return INFINI_STATUS_SUCCESS; - } else { - std::cerr << "Virtual memory already mapped at offset: " << offset << std::endl; - return INFINI_STATUS_INTERNAL_ERROR; - } -} - -infiniStatus_t unmapVirtualMem(infinirtVirtualMem_t vm, size_t offset) { - VirtualMemManagerImpl *vm_impl = (VirtualMemManagerImpl *)vm; - auto it = vm_impl->map.find(offset); - if (it == vm_impl->map.end()) { - return INFINI_STATUS_BAD_PARAM; - } - - if (auto *mapped = std::get_if(&it->second)) { - auto phy_mem = *mapped; - auto ptr = (CUdeviceptr)vm_impl->device_ptr + offset; - CHECK_CUDART(cuMemUnmap(ptr, phy_mem->len)); +infiniStatus_t unmapVirtualMem(void *vm, size_t len) { + CUdeviceptr ptr = (CUdeviceptr)vm; + CHECK_CUDART(cuMemUnmap(ptr, len)); - it->second = phy_mem->len; - return INFINI_STATUS_SUCCESS; - } else { - return INFINI_STATUS_BAD_PARAM; - } + return INFINI_STATUS_SUCCESS; } } // namespace infinirt::cuda diff --git a/src/infinirt/infinirt.cc b/src/infinirt/infinirt.cc index 8783a81a2..29edf01ad 100644 --- a/src/infinirt/infinirt.cc +++ b/src/infinirt/infinirt.cc @@ -175,27 +175,27 @@ __C infiniStatus_t infinirtGetMemGranularityMinimum(size_t *granularity) { INFINIRT_CALL_DEVICE_API(getMemGranularityMinimum, (granularity)); } -__C infiniStatus_t infinirtCreatePhysicalMem(infinirtPhyMem_t *phy_mem, size_t len) { - INFINIRT_CALL_DEVICE_API(createPhysicalMem, (phy_mem, len)); +__C infiniStatus_t infinirtCreatePhysicalMem(infinirtPhysicalMemoryHandle_t *pm_handle, size_t len) { + INFINIRT_CALL_DEVICE_API(createPhysicalMem, (pm_handle, len)); } -__C infiniStatus_t infinirtReleasePhysicalMem(infinirtPhyMem_t phy_mem) { - INFINIRT_CALL_DEVICE_API(releasePhysicalMem, (phy_mem)); +__C infiniStatus_t infinirtReleasePhysicalMem(infinirtPhysicalMemoryHandle_t pm_handle) { + INFINIRT_CALL_DEVICE_API(releasePhysicalMem, (pm_handle)); } -__C infiniStatus_t infinirtCreateVirtualMem(infinirtVirtualMem_t *vm, size_t len) { +__C infiniStatus_t infinirtCreateVirtualMem(void **vm, size_t len) { INFINIRT_CALL_DEVICE_API(createVirtualMem, (vm, len)); } -__C infiniStatus_t infinirtMapVirtualMem(void **mapped_ptr, infinirtVirtualMem_t vm, size_t offset, - infinirtPhyMem_t phy_mem) { - INFINIRT_CALL_DEVICE_API(mapVirtualMem, (mapped_ptr, vm, offset, phy_mem)); +__C infiniStatus_t infinirtMapVirtualMem(void *vm, size_t len, size_t offset, + infinirtPhysicalMemoryHandle_t pm_handle) { + INFINIRT_CALL_DEVICE_API(mapVirtualMem, (vm, len, offset, pm_handle)); } -__C infiniStatus_t infinirtUnmapVirtualMem(infinirtVirtualMem_t vm, size_t offset) { - INFINIRT_CALL_DEVICE_API(unmapVirtualMem, (vm, offset)); +__C infiniStatus_t infinirtUnmapVirtualMem(void *vm, size_t len) { + INFINIRT_CALL_DEVICE_API(unmapVirtualMem, (vm, len)); } -__C infiniStatus_t infinirtReleaseVirtualMem(infinirtVirtualMem_t vm) { - INFINIRT_CALL_DEVICE_API(releaseVirtualMem, (vm)); +__C infiniStatus_t infinirtReleaseVirtualMem(void *vm, size_t len) { + INFINIRT_CALL_DEVICE_API(releaseVirtualMem, (vm, len)); } diff --git a/src/infinirt/infinirt_impl.h b/src/infinirt/infinirt_impl.h index 7c0b063d4..5c38a8084 100644 --- a/src/infinirt/infinirt_impl.h +++ b/src/infinirt/infinirt_impl.h @@ -29,12 +29,12 @@ infiniStatus_t mallocAsync(void **p_ptr, size_t size, infinirtStream_t stream) IMPL; \ infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) IMPL; \ infiniStatus_t getMemGranularityMinimum(size_t *granularity) IMPL; \ - infiniStatus_t createPhysicalMem(infinirtPhyMem_t *phy_mem, size_t len) IMPL; \ - infiniStatus_t releasePhysicalMem(infinirtPhyMem_t phy_mem) IMPL; \ - infiniStatus_t createVirtualMem(infinirtVirtualMem_t *vm, size_t len) IMPL; \ - infiniStatus_t mapVirtualMem(void **mapped_ptr, infinirtVirtualMem_t vm, size_t offset, infinirtPhyMem_t phy_mem) IMPL; \ - infiniStatus_t unmapVirtualMem(infinirtVirtualMem_t vm, size_t offset) IMPL; \ - infiniStatus_t releaseVirtualMem(infinirtVirtualMem_t vm) IMPL; + infiniStatus_t createPhysicalMem(infinirtPhysicalMemoryHandle_t *pm_handle, size_t len) IMPL; \ + infiniStatus_t releasePhysicalMem(infinirtPhysicalMemoryHandle_t pm_handle) IMPL; \ + infiniStatus_t createVirtualMem(void **vm, size_t len) IMPL; \ + infiniStatus_t mapVirtualMem(void *vm, size_t len, size_t offset, infinirtPhysicalMemoryHandle_t pm_handle) IMPL; \ + infiniStatus_t unmapVirtualMem(void *vm, size_t len) IMPL; \ + infiniStatus_t releaseVirtualMem(void *vm, size_t len) IMPL; #define INFINIRT_DEVICE_API_IMPL INFINIRT_DEVICE_API(, ) #define INFINIRT_DEVICE_API_NOOP INFINIRT_DEVICE_API({ return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; }, {*count = 0; return INFINI_STATUS_SUCCESS; })