@@ -4882,60 +4882,6 @@ struct test_topk_moe: public test_case {
48824882 }
48834883};
48844884
4885- struct test_moe_expert_reduce : public test_case {
4886- const int64_t n_embd;
4887- const int64_t n_tokens;
4888- const int64_t n_expert_used;
4889-
4890- test_moe_expert_reduce (int64_t n_embd = 64 , int64_t n_tokens = 5 , int64_t n_expert_used = 4 )
4891- : n_embd(n_embd), n_tokens(n_tokens), n_expert_used(n_expert_used) {
4892- GGML_ASSERT (n_expert_used > 1 );
4893- }
4894-
4895- std::string vars () override {
4896- return VARS_TO_STR3 (n_embd, n_tokens, n_expert_used);
4897- }
4898-
4899- std::string op_desc (ggml_tensor * t) override {
4900- GGML_UNUSED (t);
4901- return " MOE_EXPERT_REDUCE" ;
4902- }
4903-
4904- bool run_whole_graph () override { return true ; }
4905-
4906- ggml_tensor * build_graph (ggml_context * ctx) override {
4907- ggml_tensor * experts = ggml_new_tensor_3d (ctx, GGML_TYPE_F32, n_embd, n_expert_used, n_tokens);
4908- ggml_set_name (experts, " experts" );
4909-
4910- ggml_tensor * weights = ggml_new_tensor_3d (ctx, GGML_TYPE_F32, 1 , n_expert_used, n_tokens);
4911- ggml_set_name (weights, " weights" );
4912-
4913- ggml_tensor * weighted = ggml_mul (ctx, experts, weights);
4914- ggml_set_name (weighted, " weighted_experts" );
4915-
4916- std::vector<ggml_tensor *> expert_views (n_expert_used);
4917- for (int64_t i = 0 ; i < n_expert_used; ++i) {
4918- expert_views[i] = ggml_view_2d (ctx, weighted, n_embd, n_tokens, weighted->nb [2 ], i * weighted->nb [1 ]);
4919-
4920- std::string name = " expert_view_" + std::to_string (i);
4921- ggml_set_name (expert_views[i], name.c_str ());
4922- ggml_build_forward_expand (gf, expert_views[i]);
4923- }
4924-
4925- ggml_tensor * moe_out = expert_views[0 ];
4926- for (int64_t i = 1 ; i < n_expert_used; ++i) {
4927- moe_out = ggml_add (ctx, moe_out, expert_views[i]);
4928-
4929- std::string name = " expert_add_" + std::to_string (i - 1 );
4930- ggml_set_name (moe_out, name.c_str ());
4931- }
4932-
4933- ggml_set_name (moe_out, " moe_out" );
4934-
4935- return moe_out;
4936- }
4937- };
4938-
49394885struct test_mul_mat_vec_fusion : public test_case {
49404886 const ggml_type type;
49414887 const ggml_glu_op glu_op;
@@ -7415,10 +7361,6 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
74157361 test_cases.emplace_back (new test_topk_moe ({ 8 , 22 , 1 , 1 }, 4 , /* with_norm*/ false , /* delayed_softmax*/ true ));
74167362 test_cases.emplace_back (new test_topk_moe ({ 32 , 22 , 1 , 1 }, 8 , /* with_norm*/ false , /* delayed_softmax*/ true ));
74177363
7418- test_cases.emplace_back (new test_moe_expert_reduce (1024 , 5 , 4 ));
7419- test_cases.emplace_back (new test_moe_expert_reduce (80 , 3 , 6 ));
7420- test_cases.emplace_back (new test_moe_expert_reduce (80 , 3 , 7 ));
7421-
74227364#if 0
74237365 // these tests are disabled to save execution time, sbut they can be handy for debugging
74247366 test_cases.emplace_back(new test_llama(2, true));
0 commit comments