11#ifndef TGI_BACKEND_TRTLLM_FFI
22#define TGI_BACKEND_TRTLLM_FFI
33
4+ #include < chrono>
45#include < exception>
56#include < memory>
67#include < thread>
@@ -52,7 +53,7 @@ namespace huggingface::tgi::backends::trtllm {
5253 }
5354 }
5455
55- static auto as_generation_step = [](const tle::Response &r) {
56+ static auto as_generation_step = [](const tle::Response &r, const std::chrono::time_point<std::chrono::steady_clock> created ) {
5657 const auto reqId = r.getRequestId ();
5758 if (!r.hasError ()) [[likely]] {
5859 const auto result = r.getResult ();
@@ -66,14 +67,23 @@ namespace huggingface::tgi::backends::trtllm {
6667 log_prob = result.logProbs .value ()[0 ].back ();
6768 }
6869
70+ std::optional<int64_t > first_scheduled_time_ns = std::nullopt ;
71+ if (result.requestPerfMetrics ) {
72+ const auto &t = result.requestPerfMetrics ->timingMetrics ;
73+ const auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>(t.firstScheduledTime - created).count ();
74+ first_scheduled_time_ns = static_cast <int64_t >(ns);
75+ }
76+
6977 return generation_step_t {
7078 reqId,
7179 token_id.value_or (0 ),
7280 log_prob.value_or (0.0 ),
81+ first_scheduled_time_ns.value_or (0 ),
7382 result.isFinal ,
7483 as_finish_reason_t (result.finishReasons [0 ]),
7584 token_id.has_value (),
7685 log_prob.has_value (),
86+ first_scheduled_time_ns.has_value (),
7787 false ,
7888 std::string ()
7989 };
@@ -82,10 +92,12 @@ namespace huggingface::tgi::backends::trtllm {
8292 reqId,
8393 0 ,
8494 0.0 ,
95+ 0 ,
8596 true ,
8697 finish_reason_t ::kNOT_FINISHED ,
8798 false ,
8899 false ,
100+ false ,
89101 true ,
90102 std::move (r.getErrorMsg ())
91103 };
@@ -97,9 +109,16 @@ namespace huggingface::tgi::backends::trtllm {
97109 private:
98110 backend_t inner_;
99111
112+ // m_created_time is a reference point to convert time from c++ time_point
113+ // to rust Instant.
114+ std::chrono::time_point<std::chrono::steady_clock> m_created_time;
115+
116+
100117 public:
101- tensorrt_llm_backend_t (std::filesystem::path &&engine_folder, std::filesystem::path &&executor_worker_path)
102- : inner_(engine_folder, executor_worker_path) {}
118+ tensorrt_llm_backend_t (std::filesystem::path &&engine_folder, std::filesystem::path &&executor_worker_path, const std::chrono::time_point<std::chrono::steady_clock>& created_time)
119+ : inner_(engine_folder, executor_worker_path),
120+ m_created_time {created_time}
121+ {}
103122
104123 size_t num_tokens_ready () const noexcept { return inner_.num_tokens_ready (); }
105124
@@ -139,13 +158,16 @@ namespace huggingface::tgi::backends::trtllm {
139158
140159 SPDLOG_TRACE (" [FFI] Successfully pulled out {:d} responses from executor" , responses.size ());
141160
161+ auto f = [this ](const tle::Response &r){
162+ return as_generation_step (r, m_created_time);
163+ };
142164 // Transform tle::Response to generation_step_t
143165#ifdef __cpp_lib_ranges_to_container
144- auto steps = responses | std::views::transform (as_generation_step ) | std::ranges::to<std::vector>();
166+ auto steps = responses | std::views::transform (f ) | std::ranges::to<std::vector>();
145167#else
146168 auto steps = std::vector<generation_step_t >();
147169 steps.reserve (responses.size ());
148- std::transform (responses.begin (), responses.end (), std::back_inserter (steps), as_generation_step );
170+ std::transform (responses.begin (), responses.end (), std::back_inserter (steps), f );
149171#endif
150172 return std::make_unique<std::vector<generation_step_t >>(steps);
151173
@@ -197,12 +219,14 @@ namespace huggingface::tgi::backends::trtllm {
197219
198220 std::unique_ptr<tensorrt_llm_backend_t >
199221 create_backend_from_engine_folder (const rust::Str engines_folder, const rust::Str executor_worker_path) {
222+ const auto created_time = std::chrono::steady_clock::now ();
200223 std::call_once (backend_initialized_flag, initialize_tensorrt_llm_backend);
201224 return std::make_unique<tensorrt_llm_backend_t >(
202225 std::filesystem::path (std::string_view (engines_folder.begin (), engines_folder.end ()),
203226 std::filesystem::path::format::auto_format),
204227 std::filesystem::path (std::string_view (executor_worker_path.begin (), executor_worker_path.end ()),
205- std::filesystem::path::format::auto_format)
228+ std::filesystem::path::format::auto_format),
229+ created_time
206230 );
207231 }
208232}
0 commit comments