Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
{ "name": "static", "hidden": true, "cacheVariables": { "GGML_STATIC": "ON" } },
{ "name": "sycl_f16", "hidden": true, "cacheVariables": { "GGML_SYCL_F16": "ON" } },
{ "name": "vulkan", "hidden": true, "cacheVariables": { "GGML_VULKAN": "ON" } },
{ "name": "remoting_frontend", "hidden": true, "cacheVariables": { "GGML_REMOTING_FRONTEND": "ON" } },
{ "name": "remoting_backend", "hidden": true, "cacheVariables": { "GGML_REMOTING_BACKEND": "ON" } },

{
"name": "x64-windows-llvm", "hidden": true,
Expand Down
3 changes: 3 additions & 0 deletions ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,8 @@ option(GGML_WEBGPU_CPU_PROFILE "ggml: enable WebGPU profiling (CPU)
option(GGML_WEBGPU_GPU_PROFILE "ggml: enable WebGPU profiling (GPU)" OFF)

option(GGML_ZDNN "ggml: use zDNN" OFF)
option(GGML_REMOTING_FRONTEND "ggml: use the API Remoting frontend" OFF)
option(GGML_REMOTING_BACKEND "ggml: use the API Remoting backend" OFF)
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF)
Expand Down Expand Up @@ -317,6 +319,7 @@ set(GGML_PUBLIC_HEADERS
include/ggml-sycl.h
include/ggml-vulkan.h
include/ggml-webgpu.h
include/ggml-remoting-frontend.h
include/gguf.h)

set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
Expand Down
16 changes: 16 additions & 0 deletions ggml/include/ggml-remoting-frontend.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include "ggml.h"
#include "ggml-backend.h"

#ifdef __cplusplus
extern "C" {
#endif

#define GGML_REMOTING_FRONTEND_NAME "RemotingFrontend"

GGML_BACKEND_API ggml_backend_reg_t ggml_backend_remoting_frontend_reg();

#ifdef __cplusplus
}
#endif
2 changes: 2 additions & 0 deletions ggml/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,8 @@ ggml_add_backend(WebGPU)
ggml_add_backend(zDNN)
ggml_add_backend(OpenCL)
ggml_add_backend(Hexagon)
ggml_add_backend(RemotingFrontend)
ggml_add_backend(RemotingBackend)

foreach (target ggml-base ggml)
target_include_directories(${target} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>)
Expand Down
9 changes: 9 additions & 0 deletions ggml/src/ggml-backend-reg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@
#include "ggml-cann.h"
#endif

#ifdef GGML_USE_REMOTINGFRONTEND
#include "ggml-remoting-frontend.h"
#endif

// disable C++17 deprecation warning for std::codecvt_utf8
#if defined(__clang__)
# pragma clang diagnostic push
Expand Down Expand Up @@ -200,6 +204,10 @@ struct ggml_backend_registry {
#ifdef GGML_USE_ZDNN
register_backend(ggml_backend_zdnn_reg());
#endif
#ifdef GGML_USE_REMOTINGFRONTEND
register_backend(ggml_backend_remoting_frontend_reg());
#endif

#ifdef GGML_USE_OPENCL
register_backend(ggml_backend_opencl_reg());
#endif
Expand Down Expand Up @@ -604,6 +612,7 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
ggml_backend_load_best("rpc", silent, dir_path);
ggml_backend_load_best("sycl", silent, dir_path);
ggml_backend_load_best("vulkan", silent, dir_path);
ggml_backend_load_best("remoting_frontend", silent, dir_path);
ggml_backend_load_best("opencl", silent, dir_path);
ggml_backend_load_best("hexagon", silent, dir_path);
ggml_backend_load_best("musa", silent, dir_path);
Expand Down
1 change: 1 addition & 0 deletions ggml/src/ggml-metal/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ ggml_add_backend_library(ggml-metal
ggml-metal-common.cpp
ggml-metal-context.m
ggml-metal-ops.cpp
ggml-metal-remoting.cpp
)

target_link_libraries(ggml-metal PRIVATE
Expand Down
28 changes: 28 additions & 0 deletions ggml/src/ggml-metal/ggml-metal-remoting.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#include "ggml-backend.h"
#include "ggml-backend-impl.h"
#include "ggml-impl.h"

#include "ggml-metal-device.h"
#include "ggml-metal-impl.h"
#include "ggml-metal-context.h"

extern "C" {
GGML_BACKEND_API void ggml_backend_metal_get_device_context(ggml_backend_dev_t dev,
bool *has_simdgroup_mm,
bool *has_simdgroup_reduction,
bool *use_bfloat);

GGML_BACKEND_API void
ggml_backend_metal_get_device_context(ggml_backend_dev_t dev,
bool *has_simdgroup_mm,
bool *has_simdgroup_reduction,
bool *has_bfloat) {
ggml_metal_device_t dev_ctx = (ggml_metal_device_t)dev->context;

const struct ggml_metal_device_props *props = ggml_metal_device_get_props(dev_ctx);

*has_bfloat = props->has_bfloat;
*has_simdgroup_reduction = props->has_simdgroup_reduction;
*has_simdgroup_mm = props->has_simdgroup_mm;
}
}
21 changes: 21 additions & 0 deletions ggml/src/ggml-remotingbackend/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
cmake_minimum_required(VERSION 3.19)
cmake_policy(SET CMP0114 NEW)

message(STATUS "Enable API Remoting backend")

ggml_add_backend_library(ggml-remotingbackend
backend.cpp
backend-dispatched.cpp
backend-dispatched-backend.cpp
backend-dispatched-device.cpp
backend-dispatched-buffer.cpp
backend-dispatched-buffer-type.cpp
backend-dispatched-metal.cpp
backend-utils.cpp
shared/api_remoting.h
shared/apir_backend.h
shared/venus_cs.h
venus_cs_ggml-rpc-back.cpp
)

target_compile_options(ggml-remotingbackend PRIVATE -std=c++20)
15 changes: 15 additions & 0 deletions ggml/src/ggml-remotingbackend/backend-convert.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#include "shared/apir_backend.h"

#define BUFFER_TO_HOST_HANDLE(name) ggml_buffer_to_apir_handle(name)

static inline apir_buffer_host_handle_t
ggml_buffer_to_apir_handle(ggml_backend_buffer_t buffer) {
// in the backend, the buffer handle is the buffer pointer
return (apir_buffer_host_handle_t) buffer;
}

static inline apir_buffer_type_host_handle_t
ggml_buffer_type_to_apir_handle(ggml_backend_buffer_type_t buft) {
// in the backend, the buffer handle is the buffer pointer
return (apir_buffer_type_host_handle_t) buft;
}
58 changes: 58 additions & 0 deletions ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#include <cstdint>
#include "backend-internal.h"
#include "backend-dispatched.h"

#include "ggml-impl.h"
#include "ggml-backend-impl.h"
#include "ggml-backend.h"

#include "shared/apir_backend.h"

struct timer_data graph_compute_timer = {0, 0, 0, "compute_timer"};

uint32_t
backend_graph_compute(struct vn_cs_encoder *enc, struct vn_cs_decoder *dec, struct virgl_apir_context *ctx) {
UNUSED(ctx);
UNUSED(enc);

start_timer(&graph_compute_timer);

uint32_t shmem_res_id;
vn_decode_virtgpu_shmem_res_id(dec, &shmem_res_id);

const void *shmem_data = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id);
if (!shmem_data) {
FATAL("Couldn't get the shmem addr from virgl :/");
}
size_t cgraph_size;
vn_decode_size_t(dec, &cgraph_size);

struct vn_cs_decoder secondary_dec = vn_cs_new_decoder((const char *) shmem_data, cgraph_size);

ggml_cgraph *cgraph = vn_decode_ggml_cgraph(&secondary_dec, cgraph_size);

ggml_status status;
#if APIR_BACKEND_CHECK_SUPPORTS_OP == 1
for (int idx = 0; idx < cgraph->n_nodes; idx++) {
ggml_tensor *op = ggml_graph_node(cgraph, idx);
if (dev->iface.supports_op(dev, op)) {
continue;
}
ERROR("Graph node %d (%s) not supported by the backend :/", idx, ggml_op_desc(op));

status = GGML_STATUS_ABORTED;
vn_encode_ggml_status(enc, &status);

stop_timer(&graph_compute_timer);
return 0;
}
#endif
status = bck->iface.graph_compute(bck, cgraph);
bck->iface.synchronize(bck);

vn_encode_ggml_status(enc, &status);

stop_timer(&graph_compute_timer);

return 0;
}
81 changes: 81 additions & 0 deletions ggml/src/ggml-remotingbackend/backend-dispatched-buffer-type.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#include <cstdint>
#include "backend-internal.h"
#include "backend-dispatched.h"

#include "ggml-impl.h"
#include "ggml-backend-impl.h"
#include "ggml-backend.h"

uint32_t
backend_buffer_type_get_name(struct vn_cs_encoder *enc, struct vn_cs_decoder *dec, struct virgl_apir_context *ctx) {
UNUSED(ctx);
ggml_backend_buffer_type_t buft;
buft = vn_decode_ggml_buffer_type(dec);

const char *string = buft->iface.get_name(buft);

const size_t string_size = strlen(string) + 1;
vn_encode_array_size(enc, string_size);
vn_encode_char_array(enc, string, string_size);

return 0;
}

uint32_t
backend_buffer_type_get_alignment(struct vn_cs_encoder *enc, struct vn_cs_decoder *dec, struct virgl_apir_context *ctx) {
UNUSED(ctx);
ggml_backend_buffer_type_t buft;
buft = vn_decode_ggml_buffer_type(dec);

size_t value = buft->iface.get_alignment(buft);
vn_encode_size_t(enc, &value);

return 0;
}

uint32_t
backend_buffer_type_get_max_size(struct vn_cs_encoder *enc, struct vn_cs_decoder *dec, struct virgl_apir_context *ctx) {
UNUSED(ctx);
ggml_backend_buffer_type_t buft;
buft = vn_decode_ggml_buffer_type(dec);

size_t value = buft->iface.get_max_size(buft);
vn_encode_size_t(enc, &value);

return 0;
}

uint32_t
backend_buffer_type_is_host(struct vn_cs_encoder *enc, struct vn_cs_decoder *dec, struct virgl_apir_context *ctx) {
UNUSED(ctx);
ggml_backend_buffer_type_t buft;
buft = vn_decode_ggml_buffer_type(dec);

bool is_host = buft->iface.is_host(buft);
vn_encode_bool_t(enc, &is_host);

return 0;
}

uint32_t
backend_buffer_type_alloc_buffer(struct vn_cs_encoder *enc, struct vn_cs_decoder *dec, struct virgl_apir_context *ctx) {
UNUSED(ctx);

ggml_backend_buffer_type_t buft;
buft = vn_decode_ggml_buffer_type(dec);

size_t size;
vn_decode_size_t(dec, &size);

ggml_backend_buffer_t buffer;

buffer = buft->iface.alloc_buffer(buft, size);

vn_encode_ggml_buffer(enc, buffer);

if (buffer) {
track_backend_buffer(buffer);
}

return 0;
}
Loading