@@ -29,6 +29,8 @@ limitations under the License.
2929#include " tensorflow_serving/apis/inference.pb.h"
3030#include " tensorflow_serving/apis/predict.pb.h"
3131#include " tensorflow_serving/apis/regression.pb.h"
32+ #include " tensorflow_serving/servables/tensorflow/predict_response_tensor_serialization_option.h"
33+ #include " tensorflow_serving/servables/tensorflow/thread_pool_factory.h"
3234
3335namespace tensorflow {
3436namespace serving {
@@ -48,13 +50,27 @@ class Servable {
4850 // Returns the version associated with this servable.
4951 int64_t version () const { return version_; }
5052
51- virtual absl::Status Classify (const ClassificationRequest& request,
53+ // RunOptions group the configuration for individual inference executions.
54+ // The per-request configuration (e.g. deadline) can be passed here.
55+ struct RunOptions {
56+ // Priority of the request. Some thread pool implementation will schedule
57+ // ops based on the priority number. Larger number means higher
58+ // priority.
59+ int64_t priority = 1 ;
60+ // The deadline for this request.
61+ absl::Time deadline = absl::InfiniteFuture();
62+ };
63+
64+ virtual absl::Status Classify (const RunOptions& run_options,
65+ const ClassificationRequest& request,
5266 ClassificationResponse* response) = 0;
5367
54- virtual absl::Status Regress (const RegressionRequest& request,
68+ virtual absl::Status Regress (const RunOptions& run_options,
69+ const RegressionRequest& request,
5570 RegressionResponse* response) = 0;
5671
57- virtual absl::Status Predict (const PredictRequest& request,
72+ virtual absl::Status Predict (const RunOptions& run_options,
73+ const PredictRequest& request,
5874 PredictResponse* response) = 0;
5975
6076 // Streamed version of `Predict`. Experimental API that is not yet part of the
@@ -67,10 +83,11 @@ class Servable {
6783 // callback invocation to be delayed. The implementation guarantees that the
6884 // callback is never called after the `PredictStreamed` method returns.
6985 virtual absl::Status PredictStreamed (
70- const PredictRequest& request,
86+ const RunOptions& run_options, const PredictRequest& request,
7187 absl::AnyInvocable<void (PredictResponse)> response_callback) = 0;
7288
73- virtual absl::Status MultiInference (const MultiInferenceRequest& request,
89+ virtual absl::Status MultiInference (const RunOptions& run_options,
90+ const MultiInferenceRequest& request,
7491 MultiInferenceResponse* response) = 0;
7592
7693 virtual absl::Status GetModelMetadata (const GetModelMetadataRequest& request,
@@ -95,28 +112,32 @@ class EmptyServable : public Servable {
95112 public:
96113 EmptyServable ();
97114
98- absl::Status Classify (const ClassificationRequest& request,
115+ absl::Status Classify (const RunOptions& run_options,
116+ const ClassificationRequest& request,
99117 ClassificationResponse* response) override {
100118 return error_;
101119 }
102120
103- absl::Status Regress (const RegressionRequest& request,
121+ absl::Status Regress (const RunOptions& run_options,
122+ const RegressionRequest& request,
104123 RegressionResponse* response) override {
105124 return error_;
106125 }
107126
108- absl::Status Predict (const PredictRequest& request,
127+ absl::Status Predict (const RunOptions& run_options,
128+ const PredictRequest& request,
109129 PredictResponse* response) override {
110130 return error_;
111131 }
112132
113133 absl::Status PredictStreamed (
114- const PredictRequest& request,
134+ const RunOptions& run_options, const PredictRequest& request,
115135 absl::AnyInvocable<void (PredictResponse)> response_callback) override {
116136 return error_;
117137 }
118138
119- absl::Status MultiInference (const MultiInferenceRequest& request,
139+ absl::Status MultiInference (const RunOptions& run_options,
140+ const MultiInferenceRequest& request,
120141 MultiInferenceResponse* response) override {
121142 return error_;
122143 }
0 commit comments