From 39b7605d3e301f11c62e260e33fd2d2dac445c8c Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Wed, 5 Nov 2025 09:53:09 -0700 Subject: [PATCH 1/5] feat(tests): Add --verbose | -v flag to test-backend-ops to print tensors Branch: Mamba2Perf Signed-off-by: Gabe Goodhart --- tests/test-backend-ops.cpp | 180 +++++++++++++++++++++++++++++++------ 1 file changed, 155 insertions(+), 25 deletions(-) diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 967a53c63d86d..3128cd0edccd4 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -175,6 +175,33 @@ static void init_tensor_kq_mask(ggml_tensor * tensor, float min = -1.0f, float m ggml_backend_tensor_set(tensor, data_f16.data(), 0, data_f16.size()*sizeof(ggml_fp16_t)); } +static std::vector ggml_get_float_value(uint8_t * buf, ggml_type type, size_t i, size_t bs, + bool quantized, std::vector & vq) { + const auto * tt = ggml_get_type_traits(type); + std::vector tv; + if (type == GGML_TYPE_F16) { + tv.push_back(ggml_fp16_to_fp32(*(ggml_fp16_t*)&buf[i])); + } else if (type == GGML_TYPE_BF16) { + tv.push_back(ggml_bf16_to_fp32(*(ggml_bf16_t*)&buf[i])); + } else if (type == GGML_TYPE_F32) { + tv.push_back(*(float *) &buf[i]); + } else if (type == GGML_TYPE_I64) { + tv.push_back((float)*(int64_t *) &buf[i]); + } else if (type == GGML_TYPE_I32) { + tv.push_back((float)*(int32_t *) &buf[i]); + } else if (type == GGML_TYPE_I16) { + tv.push_back((float)*(int16_t *) &buf[i]); + } else if (type == GGML_TYPE_I8) { + tv.push_back((float)*(int8_t *) &buf[i]); + } else if (quantized) { + tt->to_float(&buf[i], vq.data(), bs); + tv.insert(tv.end(), vq.begin(), vq.end()); + } else { + GGML_ABORT("fatal error"); + } + return tv; +} + static std::vector tensor_to_float(const ggml_tensor * t) { std::vector tv; tv.reserve(ggml_nelements(t)); @@ -182,7 +209,6 @@ static std::vector tensor_to_float(const ggml_tensor * t) { std::vector buf(ggml_nbytes(t)); ggml_backend_tensor_get(t, buf.data(), 0, ggml_nbytes(t)); - const auto * tt = ggml_get_type_traits(t->type); size_t bs = ggml_blck_size(t->type); std::vector vq(ggml_blck_size(t->type)); bool quantized = ggml_is_quantized(t->type); @@ -193,26 +219,8 @@ static std::vector tensor_to_float(const ggml_tensor * t) { for (int64_t i1 = 0; i1 < t->ne[1]; i1++) { for (int64_t i0 = 0; i0 < t->ne[0]; i0 += bs) { size_t i = i3*t->nb[3] + i2*t->nb[2] + i1*t->nb[1] + i0/bs*t->nb[0]; - if (t->type == GGML_TYPE_F16) { - tv.push_back(ggml_fp16_to_fp32(*(ggml_fp16_t*)&buf[i])); - } else if (t->type == GGML_TYPE_BF16) { - tv.push_back(ggml_bf16_to_fp32(*(ggml_bf16_t*)&buf[i])); - } else if (t->type == GGML_TYPE_F32) { - tv.push_back(*(float *) &buf[i]); - } else if (t->type == GGML_TYPE_I64) { - tv.push_back((float)*(int64_t *) &buf[i]); - } else if (t->type == GGML_TYPE_I32) { - tv.push_back((float)*(int32_t *) &buf[i]); - } else if (t->type == GGML_TYPE_I16) { - tv.push_back((float)*(int16_t *) &buf[i]); - } else if (t->type == GGML_TYPE_I8) { - tv.push_back((float)*(int8_t *) &buf[i]); - } else if (quantized) { - tt->to_float(&buf[i], vq.data(), bs); - tv.insert(tv.end(), vq.begin(), vq.end()); - } else { - GGML_ABORT("fatal error"); - } + const auto fvs = ggml_get_float_value(buf.data(), t->type, i, bs, quantized, vq); + tv.insert(tv.end(), fvs.begin(), fvs.end()); } } } @@ -221,6 +229,107 @@ static std::vector tensor_to_float(const ggml_tensor * t) { return tv; } +static std::string ggml_ne_string(const ggml_tensor * t) { + std::string str; + for (int i = 0; i < GGML_MAX_DIMS; ++i) { + str += std::to_string(t->ne[i]); + if (i + 1 < GGML_MAX_DIMS) { + str += ", "; + } + } + return str; +} + +static void ggml_print_tensor(ggml_tensor * t, int64_t n = 3) { + GGML_ASSERT(t != nullptr); + GGML_ASSERT(n > 0); + + std::stringstream src_ss; + src_ss << "("; + size_t last_src = 0; + for (size_t i = 0; i < GGML_MAX_SRC; ++i) { + if (t->src[i] != nullptr) { + last_src = i; + } + } + for (size_t i = 0; i < GGML_MAX_SRC; ++i) { + if (t->src[i] != nullptr) { + src_ss << t->src[i]->name << "{" << ggml_ne_string(t->src[i]) <<"}"; + } + if (i <= last_src) { + src_ss << ", "; + } + } + src_ss << ")"; + + printf("%s: %24s = (%s) %10s%s = {%s}\n", __func__, + t->name, ggml_type_name(t->type), ggml_op_desc(t), + src_ss.str().c_str(), + ggml_ne_string(t).c_str()); + + std::vector tv; + tv.reserve(ggml_nelements(t)); + + std::vector buf(ggml_nbytes(t)); + ggml_backend_tensor_get(t, buf.data(), 0, ggml_nbytes(t)); + + size_t bs = ggml_blck_size(t->type); + std::vector vq(ggml_blck_size(t->type)); + bool quantized = ggml_is_quantized(t->type); + + float sum = 0; + for (int64_t i3 = 0; i3 < t->ne[3]; i3++) { + for (int64_t i2 = 0; i2 < t->ne[2]; i2++) { + for (int64_t i1 = 0; i1 < t->ne[1]; i1++) { + for (int64_t i0 = 0; i0 < t->ne[0]; i0 += bs) { + size_t i = i3*t->nb[3] + i2*t->nb[2] + i1*t->nb[1] + i0/bs*t->nb[0]; + for (const auto & val : ggml_get_float_value(buf.data(), t->type, i, bs, quantized, vq)) { + sum += val; + } + } + } + } + } + for (int64_t i3 = 0; i3 < t->ne[3]; i3++) { + printf(" [\n"); + for (int64_t i2 = 0; i2 < t->ne[2]; i2++) { + if (i2 == n && t->ne[2] > 2*n) { + printf(" ..., \n"); + i2 = t->ne[2] - n; + } + printf(" [\n"); + for (int64_t i1 = 0; i1 < t->ne[1]; i1++) { + if (i1 == n && t->ne[1] > 2*n) { + printf(" ..., \n"); + i1 = t->ne[1] - n; + } + printf(" ["); + for (int64_t i0 = 0; i0 < t->ne[0]; i0++) { + size_t i = i3*t->nb[3] + i2*t->nb[2] + i1*t->nb[1] + i0/bs*t->nb[0]; + if (i0 == n && t->ne[0] > 2*n) { + printf("..., "); + i0 = t->ne[0] - n; + } + for (const auto & v : ggml_get_float_value(buf.data(), t->type, i, bs, quantized, vq)) { + printf("%12.4f", v); + } + if (i0 < t->ne[0] - 1) printf(", "); + } + printf("],\n"); + } + printf(" ],\n"); + } + printf(" ]\n"); + printf(" sum = %f\n", sum); + } + + // TODO: make this abort configurable/optional? + if (std::isnan(sum)) { + printf("encountered NaN - aborting\n"); + exit(0); + } +} + // normalized mean squared error = mse(a, b) / mse(a, 0) static double nmse(const float * a, const float * b, size_t n) { double mse_a_b = 0.0; @@ -993,6 +1102,8 @@ static std::unique_ptr create_printer(output_formats format) { GGML_ABORT("invalid output format"); } +// test case definition + struct test_case { virtual ~test_case() {} @@ -1071,6 +1182,9 @@ struct test_case { std::string current_op_name; + // set to true to print tensors + bool verbose = false; + void add_sentinel(ggml_context * ctx) { if (mode == MODE_PERF || mode == MODE_GRAD || mode == MODE_SUPPORT) { return; @@ -1220,6 +1334,7 @@ struct test_case { // compare struct callback_userdata { bool ok; + bool verbose; double max_err; ggml_backend_t backend1; ggml_backend_t backend2; @@ -1227,6 +1342,7 @@ struct test_case { callback_userdata ud { true, + verbose, max_nmse_err(), backend1, backend2 @@ -1251,6 +1367,11 @@ struct test_case { } } + if (ud->verbose) { + ggml_print_tensor(t1); + ggml_print_tensor(t2); + } + std::vector f1 = tensor_to_float(t1); std::vector f2 = tensor_to_float(t2); @@ -6193,7 +6314,7 @@ static const ggml_type other_types[] = { }; // Test cases for evaluation: should try to cover edge cases while using small input sizes to keep the runtime low -static std::vector> make_test_cases_eval() { +static std::vector> make_test_cases_eval(bool verbose = false) { std::vector> test_cases; std::default_random_engine rng(0); @@ -7329,6 +7450,11 @@ static std::vector> make_test_cases_eval() { test_cases.emplace_back(new test_falcon(2)); #endif + // set verbose on all test cases + for (auto & tc : test_cases) { + tc->verbose = verbose; + } + return test_cases; } @@ -7493,7 +7619,7 @@ static std::vector> make_test_cases_perf() { } static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op_names_filter, const char * params_filter, - printer * output_printer) { + printer * output_printer, bool verbose) { auto filter_test_cases = [](std::vector> & test_cases, const char * params_filter) { if (params_filter == nullptr) { return; @@ -7512,7 +7638,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op }; if (mode == MODE_TEST) { - auto test_cases = make_test_cases_eval(); + auto test_cases = make_test_cases_eval(verbose); filter_test_cases(test_cases, params_filter); ggml_backend_t backend_cpu = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, NULL); if (backend_cpu == NULL) { @@ -7701,6 +7827,7 @@ static void usage(char ** argv) { printf(" --output specifies output format (default: console, options: console, sql, csv)\n"); printf(" --list-ops lists all available GGML operations\n"); printf(" --show-coverage shows test coverage\n"); + printf(" --verbose | -v print tensors during ops\n"); } int main(int argc, char ** argv) { @@ -7709,6 +7836,7 @@ int main(int argc, char ** argv) { const char * op_names_filter = nullptr; const char * backend_filter = nullptr; const char * params_filter = nullptr; + bool verbose = false; for (int i = 1; i < argc; i++) { if (strcmp(argv[i], "test") == 0) { @@ -7756,6 +7884,8 @@ int main(int argc, char ** argv) { } else if (strcmp(argv[i], "--show-coverage") == 0) { show_test_coverage(); return 0; + } else if (strcmp(argv[i], "--verbose") == 0 || strcmp(argv[i], "-v") == 0) { + verbose = true; } else { usage(argv); return 1; @@ -7808,7 +7938,7 @@ int main(int argc, char ** argv) { false, "", ggml_backend_dev_description(dev), total / 1024 / 1024, free / 1024 / 1024, true)); - bool ok = test_backend(backend, mode, op_names_filter, params_filter, output_printer.get()); + bool ok = test_backend(backend, mode, op_names_filter, params_filter, output_printer.get(), verbose); if (ok) { n_ok++; From f27d92674dbd3728c0b30d271c92213960293c2b Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Wed, 5 Nov 2025 09:54:38 -0700 Subject: [PATCH 2/5] test: Allow multiple verbose flags to fully print tensors Branch: Mamba2SSD Signed-off-by: Gabe Goodhart --- tests/test-backend-ops.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 3128cd0edccd4..674e8534f0f4f 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -1183,7 +1183,7 @@ struct test_case { std::string current_op_name; // set to true to print tensors - bool verbose = false; + bool verbose = 0; void add_sentinel(ggml_context * ctx) { if (mode == MODE_PERF || mode == MODE_GRAD || mode == MODE_SUPPORT) { @@ -1334,7 +1334,7 @@ struct test_case { // compare struct callback_userdata { bool ok; - bool verbose; + int verbose; double max_err; ggml_backend_t backend1; ggml_backend_t backend2; @@ -1368,8 +1368,8 @@ struct test_case { } if (ud->verbose) { - ggml_print_tensor(t1); - ggml_print_tensor(t2); + ggml_print_tensor(t1, ud->verbose >= 2 ? 1e10 : 3); + ggml_print_tensor(t2, ud->verbose >= 2 ? 1e10 : 3); } std::vector f1 = tensor_to_float(t1); @@ -6314,7 +6314,7 @@ static const ggml_type other_types[] = { }; // Test cases for evaluation: should try to cover edge cases while using small input sizes to keep the runtime low -static std::vector> make_test_cases_eval(bool verbose = false) { +static std::vector> make_test_cases_eval(int verbose = 0) { std::vector> test_cases; std::default_random_engine rng(0); @@ -7619,7 +7619,7 @@ static std::vector> make_test_cases_perf() { } static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op_names_filter, const char * params_filter, - printer * output_printer, bool verbose) { + printer * output_printer, int verbose) { auto filter_test_cases = [](std::vector> & test_cases, const char * params_filter) { if (params_filter == nullptr) { return; @@ -7827,7 +7827,7 @@ static void usage(char ** argv) { printf(" --output specifies output format (default: console, options: console, sql, csv)\n"); printf(" --list-ops lists all available GGML operations\n"); printf(" --show-coverage shows test coverage\n"); - printf(" --verbose | -v print tensors during ops\n"); + printf(" --verbose | -v print tensors during ops (can specify multiple times)\n"); } int main(int argc, char ** argv) { @@ -7836,7 +7836,7 @@ int main(int argc, char ** argv) { const char * op_names_filter = nullptr; const char * backend_filter = nullptr; const char * params_filter = nullptr; - bool verbose = false; + int verbose = 0; for (int i = 1; i < argc; i++) { if (strcmp(argv[i], "test") == 0) { @@ -7885,7 +7885,7 @@ int main(int argc, char ** argv) { show_test_coverage(); return 0; } else if (strcmp(argv[i], "--verbose") == 0 || strcmp(argv[i], "-v") == 0) { - verbose = true; + ++verbose; } else { usage(argv); return 1; From 7085faab46f6d347dedd784fba46d6063e62bc5c Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Wed, 5 Nov 2025 09:55:53 -0700 Subject: [PATCH 3/5] test: More verbose printing for nmse mismatch Branch: Mamba2SSD Signed-off-by: Gabe Goodhart --- tests/test-backend-ops.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 674e8534f0f4f..4fe91915c2967 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -1401,11 +1401,12 @@ struct test_case { double err = nmse(f1.data(), f2.data(), f1.size()); if (err > ud->max_err) { printf("[%s] NMSE = %.9f > %.9f ", ggml_op_desc(t1), err, ud->max_err); - //for (int i = 0; i < (int) f1.size(); i++) { - // printf("%5d %9.6f %9.6f, diff = %9.6f\n", i, f1[i], f2[i], f1[i] - f2[i]); - //} - //printf("\n"); - //exit(1); + if (ud->verbose) { + for (int i = 0; i < (int) f1.size(); i++) { + printf("%5d %9.6f %9.6f, diff = %9.6f\n", i, f1[i], f2[i], f1[i] - f2[i]); + } + printf("\n"); + } ud->ok = false; } return true; From 826d0f0b02dc6aa999b41b423de6a143fdc2cc71 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Wed, 5 Nov 2025 11:00:00 -0700 Subject: [PATCH 4/5] fix: Use std::string for string constants w/ std::strigstream This seems to break on some Windows builds https://github.com/ggml-org/llama.cpp/actions/runs/19109769793/job/54603475583?pr=17029 Branch: TestBackendOpsVerbosity Signed-off-by: Gabe Goodhart --- tests/test-backend-ops.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 4fe91915c2967..a64423cbeef10 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -245,7 +245,7 @@ static void ggml_print_tensor(ggml_tensor * t, int64_t n = 3) { GGML_ASSERT(n > 0); std::stringstream src_ss; - src_ss << "("; + src_ss << std::string("("); size_t last_src = 0; for (size_t i = 0; i < GGML_MAX_SRC; ++i) { if (t->src[i] != nullptr) { @@ -254,13 +254,13 @@ static void ggml_print_tensor(ggml_tensor * t, int64_t n = 3) { } for (size_t i = 0; i < GGML_MAX_SRC; ++i) { if (t->src[i] != nullptr) { - src_ss << t->src[i]->name << "{" << ggml_ne_string(t->src[i]) <<"}"; + src_ss << t->src[i]->name << std::string("{") << ggml_ne_string(t->src[i]) << std::string("}"); } if (i <= last_src) { - src_ss << ", "; + src_ss << std::string(", "); } } - src_ss << ")"; + src_ss << std::string(")"); printf("%s: %24s = (%s) %10s%s = {%s}\n", __func__, t->name, ggml_type_name(t->type), ggml_op_desc(t), From 6826051685cbf73c3fbd4162c3927796e9dd8d7b Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Wed, 5 Nov 2025 13:58:46 -0700 Subject: [PATCH 5/5] fix: Get rid of stringstream use Branch: TestBackendOpsVerbosity Signed-off-by: Gabe Goodhart --- tests/test-backend-ops.cpp | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index a64423cbeef10..69211b0175a15 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -244,8 +244,9 @@ static void ggml_print_tensor(ggml_tensor * t, int64_t n = 3) { GGML_ASSERT(t != nullptr); GGML_ASSERT(n > 0); - std::stringstream src_ss; - src_ss << std::string("("); + printf("%s: %24s = (%s) %10s(", __func__, + t->name, ggml_type_name(t->type), ggml_op_desc(t)); + size_t last_src = 0; for (size_t i = 0; i < GGML_MAX_SRC; ++i) { if (t->src[i] != nullptr) { @@ -254,18 +255,13 @@ static void ggml_print_tensor(ggml_tensor * t, int64_t n = 3) { } for (size_t i = 0; i < GGML_MAX_SRC; ++i) { if (t->src[i] != nullptr) { - src_ss << t->src[i]->name << std::string("{") << ggml_ne_string(t->src[i]) << std::string("}"); + printf("%s{%s}", t->src[i]->name, ggml_ne_string(t->src[i]).c_str()); } - if (i <= last_src) { - src_ss << std::string(", "); + if (i < last_src) { + printf(", "); } } - src_ss << std::string(")"); - - printf("%s: %24s = (%s) %10s%s = {%s}\n", __func__, - t->name, ggml_type_name(t->type), ggml_op_desc(t), - src_ss.str().c_str(), - ggml_ne_string(t).c_str()); + printf(") = {%s}\n", ggml_ne_string(t).c_str()); std::vector tv; tv.reserve(ggml_nelements(t));