@@ -4432,6 +4432,17 @@ static void log_server_request(const httplib::Request & req, const httplib::Resp
44324432 SRV_DBG (" response: %s\n " , res.body .c_str ());
44334433}
44344434
4435+ static void res_error (httplib::Response & res, const json & error_data) {
4436+ json final_response {{" error" , error_data}};
4437+ res.set_content (safe_json_to_str (final_response), MIMETYPE_JSON);
4438+ res.status = json_value (error_data, " code" , 500 );
4439+ }
4440+
4441+ static void res_ok (httplib::Response & res, const json & data) {
4442+ res.set_content (safe_json_to_str (data), MIMETYPE_JSON);
4443+ res.status = 200 ;
4444+ }
4445+
44354446std::function<void (int )> shutdown_handler;
44364447std::atomic_flag is_terminating = ATOMIC_FLAG_INIT;
44374448
@@ -4501,19 +4512,7 @@ int main(int argc, char ** argv) {
45014512
45024513 svr->set_default_headers ({{" Server" , " llama.cpp" }});
45034514 svr->set_logger (log_server_request);
4504-
4505- auto res_error = [](httplib::Response & res, const json & error_data) {
4506- json final_response {{" error" , error_data}};
4507- res.set_content (safe_json_to_str (final_response), MIMETYPE_JSON);
4508- res.status = json_value (error_data, " code" , 500 );
4509- };
4510-
4511- auto res_ok = [](httplib::Response & res, const json & data) {
4512- res.set_content (safe_json_to_str (data), MIMETYPE_JSON);
4513- res.status = 200 ;
4514- };
4515-
4516- svr->set_exception_handler ([&res_error](const httplib::Request &, httplib::Response & res, const std::exception_ptr & ep) {
4515+ svr->set_exception_handler ([](const httplib::Request &, httplib::Response & res, const std::exception_ptr & ep) {
45174516 std::string message;
45184517 try {
45194518 std::rethrow_exception (ep);
@@ -4532,7 +4531,7 @@ int main(int argc, char ** argv) {
45324531 }
45334532 });
45344533
4535- svr->set_error_handler ([&res_error ](const httplib::Request &, httplib::Response & res) {
4534+ svr->set_error_handler ([](const httplib::Request &, httplib::Response & res) {
45364535 if (res.status == 404 ) {
45374536 res_error (res, format_error_response (" File Not Found" , ERROR_TYPE_NOT_FOUND));
45384537 }
@@ -4562,7 +4561,7 @@ int main(int argc, char ** argv) {
45624561 // Middlewares
45634562 //
45644563
4565- auto middleware_validate_api_key = [¶ms, &res_error ](const httplib::Request & req, httplib::Response & res) {
4564+ auto middleware_validate_api_key = [¶ms](const httplib::Request & req, httplib::Response & res) {
45664565 static const std::unordered_set<std::string> public_endpoints = {
45674566 " /health" ,
45684567 " /v1/health" ,
@@ -4600,7 +4599,7 @@ int main(int argc, char ** argv) {
46004599 return false ;
46014600 };
46024601
4603- auto middleware_server_state = [&res_error, & state](const httplib::Request & req, httplib::Response & res) {
4602+ auto middleware_server_state = [&state](const httplib::Request & req, httplib::Response & res) {
46044603 server_state current_state = state.load ();
46054604 if (current_state == SERVER_STATE_LOADING_MODEL) {
46064605 auto tmp = string_split<std::string>(req.path , ' .' );
@@ -4788,7 +4787,7 @@ int main(int argc, char ** argv) {
47884787 res.status = 200 ; // HTTP OK
47894788 };
47904789
4791- const auto handle_slots_save = [&ctx_server, &res_error, &res_ok, & params](const httplib::Request & req, httplib::Response & res, int id_slot) {
4790+ const auto handle_slots_save = [&ctx_server, ¶ms](const httplib::Request & req, httplib::Response & res, int id_slot) {
47924791 json request_data = json::parse (req.body );
47934792 std::string filename = request_data.at (" filename" );
47944793 if (!fs_validate_filename (filename)) {
@@ -4820,7 +4819,7 @@ int main(int argc, char ** argv) {
48204819 res_ok (res, result->to_json ());
48214820 };
48224821
4823- const auto handle_slots_restore = [&ctx_server, &res_error, &res_ok, & params](const httplib::Request & req, httplib::Response & res, int id_slot) {
4822+ const auto handle_slots_restore = [&ctx_server, ¶ms](const httplib::Request & req, httplib::Response & res, int id_slot) {
48244823 json request_data = json::parse (req.body );
48254824 std::string filename = request_data.at (" filename" );
48264825 if (!fs_validate_filename (filename)) {
@@ -4853,7 +4852,7 @@ int main(int argc, char ** argv) {
48534852 res_ok (res, result->to_json ());
48544853 };
48554854
4856- const auto handle_slots_erase = [&ctx_server, &res_error, &res_ok ](const httplib::Request & /* req */ , httplib::Response & res, int id_slot) {
4855+ const auto handle_slots_erase = [&ctx_server](const httplib::Request & /* req */ , httplib::Response & res, int id_slot) {
48574856 int task_id = ctx_server.queue_tasks .get_new_id ();
48584857 {
48594858 server_task task (SERVER_TASK_TYPE_SLOT_ERASE);
@@ -4876,7 +4875,7 @@ int main(int argc, char ** argv) {
48764875 res_ok (res, result->to_json ());
48774876 };
48784877
4879- const auto handle_slots_action = [¶ms, &res_error, & handle_slots_save, &handle_slots_restore, &handle_slots_erase](const httplib::Request & req, httplib::Response & res) {
4878+ const auto handle_slots_action = [¶ms, &handle_slots_save, &handle_slots_restore, &handle_slots_erase](const httplib::Request & req, httplib::Response & res) {
48804879 if (params.slot_save_path .empty ()) {
48814880 res_error (res, format_error_response (" This server does not support slots action. Start it with `--slot-save-path`" , ERROR_TYPE_NOT_SUPPORTED));
48824881 return ;
@@ -4905,7 +4904,7 @@ int main(int argc, char ** argv) {
49054904 }
49064905 };
49074906
4908- const auto handle_props = [¶ms, &ctx_server, &res_ok ](const httplib::Request &, httplib::Response & res) {
4907+ const auto handle_props = [¶ms, &ctx_server](const httplib::Request &, httplib::Response & res) {
49094908 json default_generation_settings_for_props;
49104909
49114910 {
@@ -4947,7 +4946,7 @@ int main(int argc, char ** argv) {
49474946 res_ok (res, data);
49484947 };
49494948
4950- const auto handle_props_change = [&ctx_server, &res_error, &res_ok ](const httplib::Request & req, httplib::Response & res) {
4949+ const auto handle_props_change = [&ctx_server](const httplib::Request & req, httplib::Response & res) {
49514950 if (!ctx_server.params_base .endpoint_props ) {
49524951 res_error (res, format_error_response (" This server does not support changing global properties. Start it with `--props`" , ERROR_TYPE_NOT_SUPPORTED));
49534952 return ;
@@ -4960,7 +4959,7 @@ int main(int argc, char ** argv) {
49604959 res_ok (res, {{ " success" , true }});
49614960 };
49624961
4963- const auto handle_api_show = [&ctx_server, &res_ok ](const httplib::Request &, httplib::Response & res) {
4962+ const auto handle_api_show = [&ctx_server](const httplib::Request &, httplib::Response & res) {
49644963 bool has_mtmd = ctx_server.mctx != nullptr ;
49654964 json data = {
49664965 {
@@ -4991,7 +4990,7 @@ int main(int argc, char ** argv) {
49914990
49924991 // handle completion-like requests (completion, chat, infill)
49934992 // we can optionally provide a custom format for partial results and final results
4994- const auto handle_completions_impl = [&ctx_server, &res_error, &res_ok ](
4993+ const auto handle_completions_impl = [&ctx_server](
49954994 server_task_type type,
49964995 json & data,
49974996 const std::vector<raw_buffer> & files,
@@ -5139,7 +5138,7 @@ int main(int argc, char ** argv) {
51395138 OAICOMPAT_TYPE_COMPLETION);
51405139 };
51415140
5142- const auto handle_infill = [&ctx_server, &res_error, & handle_completions_impl](const httplib::Request & req, httplib::Response & res) {
5141+ const auto handle_infill = [&ctx_server, &handle_completions_impl](const httplib::Request & req, httplib::Response & res) {
51435142 // check model compatibility
51445143 std::string err;
51455144 if (llama_vocab_fim_pre (ctx_server.vocab ) == LLAMA_TOKEN_NULL) {
@@ -5238,7 +5237,7 @@ int main(int argc, char ** argv) {
52385237 };
52395238
52405239 // same with handle_chat_completions, but without inference part
5241- const auto handle_apply_template = [&ctx_server, &res_ok ](const httplib::Request & req, httplib::Response & res) {
5240+ const auto handle_apply_template = [&ctx_server](const httplib::Request & req, httplib::Response & res) {
52425241 auto body = json::parse (req.body );
52435242 std::vector<raw_buffer> files; // dummy, unused
52445243 json data = oaicompat_chat_params_parse (
@@ -5248,7 +5247,7 @@ int main(int argc, char ** argv) {
52485247 res_ok (res, {{ " prompt" , std::move (data.at (" prompt" )) }});
52495248 };
52505249
5251- const auto handle_models = [¶ms, &ctx_server, &state, &res_ok ](const httplib::Request &, httplib::Response & res) {
5250+ const auto handle_models = [¶ms, &ctx_server, &state](const httplib::Request &, httplib::Response & res) {
52525251 server_state current_state = state.load ();
52535252 json model_meta = nullptr ;
52545253 if (current_state == SERVER_STATE_READY) {
@@ -5293,7 +5292,7 @@ int main(int argc, char ** argv) {
52935292 res_ok (res, models);
52945293 };
52955294
5296- const auto handle_tokenize = [&ctx_server, &res_ok ](const httplib::Request & req, httplib::Response & res) {
5295+ const auto handle_tokenize = [&ctx_server](const httplib::Request & req, httplib::Response & res) {
52975296 const json body = json::parse (req.body );
52985297
52995298 json tokens_response = json::array ();
@@ -5334,7 +5333,7 @@ int main(int argc, char ** argv) {
53345333 res_ok (res, data);
53355334 };
53365335
5337- const auto handle_detokenize = [&ctx_server, &res_ok ](const httplib::Request & req, httplib::Response & res) {
5336+ const auto handle_detokenize = [&ctx_server](const httplib::Request & req, httplib::Response & res) {
53385337 const json body = json::parse (req.body );
53395338
53405339 std::string content;
@@ -5347,7 +5346,7 @@ int main(int argc, char ** argv) {
53475346 res_ok (res, data);
53485347 };
53495348
5350- const auto handle_embeddings_impl = [&ctx_server, &res_error, &res_ok ](const httplib::Request & req, httplib::Response & res, oaicompat_type oaicompat) {
5349+ const auto handle_embeddings_impl = [&ctx_server](const httplib::Request & req, httplib::Response & res, oaicompat_type oaicompat) {
53515350 if (!ctx_server.params_base .embedding ) {
53525351 res_error (res, format_error_response (" This server does not support embeddings. Start it with `--embeddings`" , ERROR_TYPE_NOT_SUPPORTED));
53535352 return ;
@@ -5457,7 +5456,7 @@ int main(int argc, char ** argv) {
54575456 handle_embeddings_impl (req, res, OAICOMPAT_TYPE_EMBEDDING);
54585457 };
54595458
5460- const auto handle_rerank = [&ctx_server, &res_error, &res_ok ](const httplib::Request & req, httplib::Response & res) {
5459+ const auto handle_rerank = [&ctx_server](const httplib::Request & req, httplib::Response & res) {
54615460 if (!ctx_server.params_base .embedding || ctx_server.params_base .pooling_type != LLAMA_POOLING_TYPE_RANK) {
54625461 res_error (res, format_error_response (" This server does not support reranking. Start it with `--reranking`" , ERROR_TYPE_NOT_SUPPORTED));
54635462 return ;
0 commit comments