From 011b2f914f343f4b7edf79cfe589c98c9682a00d Mon Sep 17 00:00:00 2001 From: Max Krasnyansky Date: Sun, 9 Nov 2025 16:20:35 -0800 Subject: [PATCH 1/2] cpu: skip NOPs to avoid barriers --- ggml/src/ggml-cpu/ggml-cpu.c | 44 +++++++++++++++++++++++------------- ggml/src/ggml-cpu/ops.cpp | 40 -------------------------------- ggml/src/ggml-cpu/ops.h | 4 ---- 3 files changed, 28 insertions(+), 60 deletions(-) diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c index b5466dd703d1d..a6aee688df782 100644 --- a/ggml/src/ggml-cpu/ggml-cpu.c +++ b/ggml/src/ggml-cpu/ggml-cpu.c @@ -1807,22 +1807,6 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm { ggml_compute_forward_cont(params, tensor); } break; - case GGML_OP_RESHAPE: - { - ggml_compute_forward_reshape(params, tensor); - } break; - case GGML_OP_VIEW: - { - ggml_compute_forward_view(params, tensor); - } break; - case GGML_OP_PERMUTE: - { - ggml_compute_forward_permute(params, tensor); - } break; - case GGML_OP_TRANSPOSE: - { - ggml_compute_forward_transpose(params, tensor); - } break; case GGML_OP_GET_ROWS: { ggml_compute_forward_get_rows(params, tensor); @@ -2042,6 +2026,22 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm { // nop } break; + case GGML_OP_RESHAPE: + { + // nop + } break; + case GGML_OP_PERMUTE: + { + // nop + } break; + case GGML_OP_VIEW: + { + // nop + } break; + case GGML_OP_TRANSPOSE: + { + // nop + } break; case GGML_OP_COUNT: { GGML_ABORT("fatal error"); @@ -2884,6 +2884,18 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { for (int node_n = 0; node_n < cgraph->n_nodes && atomic_load_explicit(&tp->abort, memory_order_relaxed) != node_n; node_n++) { struct ggml_tensor * node = cgraph->nodes[node_n]; + // skip NOPs + switch (node->op) { + case GGML_OP_NONE: + case GGML_OP_RESHAPE: + case GGML_OP_VIEW: + case GGML_OP_PERMUTE: + case GGML_OP_TRANSPOSE: + continue; + default: + break; + } + ggml_compute_forward(¶ms, node); if (state->ith == 0 && cplan->abort_callback && diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp index 8235f69594391..7c42fb78b4a4f 100644 --- a/ggml/src/ggml-cpu/ops.cpp +++ b/ggml/src/ggml-cpu/ops.cpp @@ -4455,46 +4455,6 @@ void ggml_compute_forward_cont( ggml_compute_forward_dup(params, dst); } -// ggml_compute_forward_reshape - -void ggml_compute_forward_reshape( - const ggml_compute_params * params, - ggml_tensor * dst) { - // NOP - GGML_UNUSED(params); - GGML_UNUSED(dst); -} - -// ggml_compute_forward_view - -void ggml_compute_forward_view( - const ggml_compute_params * params, - ggml_tensor * dst) { - // NOP - GGML_UNUSED(params); - GGML_UNUSED(dst); -} - -// ggml_compute_forward_permute - -void ggml_compute_forward_permute( - const ggml_compute_params * params, - ggml_tensor * dst) { - // NOP - GGML_UNUSED(params); - GGML_UNUSED(dst); -} - -// ggml_compute_forward_transpose - -void ggml_compute_forward_transpose( - const ggml_compute_params * params, - ggml_tensor * dst) { - // NOP - GGML_UNUSED(params); - GGML_UNUSED(dst); -} - // ggml_compute_forward_get_rows static void ggml_compute_forward_get_rows_q( diff --git a/ggml/src/ggml-cpu/ops.h b/ggml/src/ggml-cpu/ops.h index 9824a03b45833..2b4127c12b15e 100644 --- a/ggml/src/ggml-cpu/ops.h +++ b/ggml/src/ggml-cpu/ops.h @@ -51,10 +51,6 @@ void ggml_compute_forward_scale(const struct ggml_compute_params * params, struc void ggml_compute_forward_set(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_cpy(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_cont(const struct ggml_compute_params * params, struct ggml_tensor * dst); -void ggml_compute_forward_reshape(const struct ggml_compute_params * params, struct ggml_tensor * dst); -void ggml_compute_forward_view(const struct ggml_compute_params * params, struct ggml_tensor * dst); -void ggml_compute_forward_permute(const struct ggml_compute_params * params, struct ggml_tensor * dst); -void ggml_compute_forward_transpose(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_get_rows(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_get_rows_back(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_set_rows(const struct ggml_compute_params * params, struct ggml_tensor * dst); From 3b5b5d3443d68d751d0987a0e062bc68e0aa2a83 Mon Sep 17 00:00:00 2001 From: Max Krasnyansky Date: Mon, 10 Nov 2025 08:42:02 -0800 Subject: [PATCH 2/2] cpu: use ggml_op_is_empty --- ggml/src/ggml-cpu/ggml-cpu.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c index a6aee688df782..086708bae0b29 100644 --- a/ggml/src/ggml-cpu/ggml-cpu.c +++ b/ggml/src/ggml-cpu/ggml-cpu.c @@ -2884,16 +2884,9 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { for (int node_n = 0; node_n < cgraph->n_nodes && atomic_load_explicit(&tp->abort, memory_order_relaxed) != node_n; node_n++) { struct ggml_tensor * node = cgraph->nodes[node_n]; - // skip NOPs - switch (node->op) { - case GGML_OP_NONE: - case GGML_OP_RESHAPE: - case GGML_OP_VIEW: - case GGML_OP_PERMUTE: - case GGML_OP_TRANSPOSE: + if (ggml_op_is_empty(node->op)) { + // skip NOPs continue; - default: - break; } ggml_compute_forward(¶ms, node);