@@ -11,23 +11,40 @@ enum DataPayloadEncoding {
1111 DATA_PAYLOAD_ENCODING_UTF8_TEXT = 2 ;
1212 DATA_PAYLOAD_ENCODING_BINARY_PICKLE = 3 ;
1313 DATA_PAYLOAD_ENCODING_BINARY_ZIP = 4 ;
14+ DATA_PAYLOAD_ENCODING_RAW = 5 ;
1415}
1516
1617message DataPayload {
17- optional uint64 size = 2 ;
18- optional string sha256_hash = 3 ;
1918 // URI of the BLOB where the data is stored.
2019 // S3 URI if the data is stored in S3.
2120 // Starts with "file://"" prefix if the data is stored on a local file system.
22- optional string uri = 4 ;
23- optional DataPayloadEncoding encoding = 5 ;
21+ optional string uri = 1 ;
22+ optional DataPayloadEncoding encoding = 2 ;
2423 // Not set and ignored by Server right now.
25- optional uint64 encoding_version = 6 ;
26- // Offset inside the BLOB.
27- optional uint64 offset = 7 ;
24+ optional uint64 encoding_version = 3 ;
25+
26+ optional string content_type = 4 ;
27+
28+ // DataPayload metadata is stored at the head of the binary
29+ // blob. This metadata is used optionally to store additional
30+ // metadata for the type of the object that is stored
31+ // so that we can deserialize the object correctly.
32+ optional uint64 metadata_size = 5 ;
33+
34+ optional uint64 offset = 6 ;
35+ // Includes data and metadata.
36+ optional uint64 size = 7 ;
37+ optional string sha256_hash = 8 ;
38+
39+ // ID of the function call that returned this data payload as it's return value.
40+ // Payloads which are not a function call return value, don't have this field set.
41+ optional string source_function_call_id = 9 ;
42+
43+ // Opaque ID. Generated by Server if not set by Executor.
44+ optional string id = 10 ;
45+
2846}
2947
30- // ===== report_executor_state RPC =====
3148enum GPUModel {
3249 GPU_MODEL_UNKNOWN = 0 ;
3350 GPU_MODEL_NVIDIA_A100_40GB = 1 ;
@@ -56,17 +73,17 @@ message HostResources {
5673// Specification of a single function that is allowed to be run on the Executor.
5774message AllowedFunction {
5875 optional string namespace = 1 ;
59- optional string graph_name = 2 ;
76+ optional string application_name = 2 ;
6077 optional string function_name = 3 ;
61- // If none then any version of the graph is allowed to run on the Executor.
62- optional string graph_version = 4 ;
78+ // If none then any version of the application is allowed to run on the Executor.
79+ optional string application_version = 4 ;
6380}
6481
6582enum FunctionExecutorStatus {
6683 FUNCTION_EXECUTOR_STATUS_UNKNOWN = 0 ;
6784 // FE is being created.
6885 FUNCTION_EXECUTOR_STATUS_PENDING = 1 ;
69- // FE is running and ready to accept tasks .
86+ // FE is running and ready to accept task allocations .
7087 FUNCTION_EXECUTOR_STATUS_RUNNING = 2 ;
7188 // FE is terminated, all resources are freed.
7289 FUNCTION_EXECUTOR_STATUS_TERMINATED = 3 ;
@@ -106,23 +123,29 @@ message FunctionExecutorResources {
106123 optional GPUResources gpu = 4 ;
107124}
108125
126+ message FunctionRef {
127+ optional string namespace = 1 ;
128+ optional string application_name = 2 ;
129+ optional string function_name = 3 ;
130+ optional string application_version = 4 ;
131+ }
132+
109133message FunctionExecutorDescription {
110134 optional string id = 1 ;
111- optional string namespace = 2 ;
112- optional string graph_name = 3 ;
113- optional string graph_version = 4 ;
114- optional string function_name = 5 ;
115- repeated string secret_names = 7 ;
135+ optional FunctionRef function = 2 ;
136+ repeated string secret_names = 3 ;
116137 // Timeout for customer code duration during FE creation.
117- optional uint32 customer_code_timeout_ms = 9 ;
118- optional DataPayload graph = 10 ;
119- optional FunctionExecutorResources resources = 11 ;
138+ optional uint32 initialization_timeout_ms = 4 ;
139+ optional DataPayload application = 5 ;
140+ optional FunctionExecutorResources resources = 6 ;
120141 // URI prefix for the startup output payloads.
121142 // S3 URI if the data is stored in S3.
122143 // Starts with "file://"" prefix followed by an absolute directory path if the data is stored on a local file system.
123144 // Deprecated: most probably going to be removed once external FE logs ingestion pipeline gets implemented.
124- optional string output_payload_uri_prefix = 12 ;
125- optional uint32 max_concurrency = 13 ;
145+ optional string output_payload_uri_prefix = 7 ;
146+ optional uint32 max_concurrency = 8 ;
147+ // Timeout for running task allocations on FE.
148+ optional uint32 allocation_timeout_ms = 9 ;
126149}
127150
128151message FunctionExecutorState {
@@ -132,13 +155,6 @@ message FunctionExecutorState {
132155 repeated string allocation_ids_caused_termination = 4 ;
133156}
134157
135- // Deprecated: most probably going to be removed once external FE logs ingestion pipeline gets implemented.
136- message FunctionExecutorUpdate {
137- optional FunctionExecutorDescription description = 1 ;
138- optional DataPayload startup_stdout = 2 ;
139- optional DataPayload startup_stderr = 3 ;
140- }
141-
142158enum ExecutorStatus {
143159 EXECUTOR_STATUS_UNKNOWN = 0 ;
144160 // Executor is starting up, not ready to accept task allocations and FEs.
@@ -175,8 +191,7 @@ message ExecutorState {
175191// then the updates from it won't be included in the next RPC.
176192message ExecutorUpdate {
177193 optional string executor_id = 1 ;
178- repeated TaskResult task_results = 2 ;
179- repeated FunctionExecutorUpdate function_executor_updates = 3 ;
194+ repeated AllocationResult allocation_results = 2 ;
180195}
181196
182197message ReportExecutorStateRequest {
@@ -188,38 +203,23 @@ message ReportExecutorStateRequest {
188203message ReportExecutorStateResponse {
189204}
190205
191- // ===== get_desired_executor_states RPC =====
192- message TaskRetryPolicy {
193- optional uint32 max_retries = 1 ;
194- optional uint32 initial_delay_ms = 2 ;
195- optional uint32 max_delay_ms = 3 ;
196- // The multiplier value is 1000x of the actual value to avoid working with floating point.
197- optional uint32 delay_multiplier = 4 ;
198- }
199-
200- message Task {
201- optional string id = 1 ;
202- optional string namespace = 2 ;
203- optional string graph_name = 3 ;
204- optional string graph_version = 4 ;
205- optional string function_name = 5 ;
206- optional string graph_invocation_id = 6 ;
207- optional uint32 timeout_ms = 10 ;
208- optional DataPayload input = 11 ;
209- optional DataPayload reducer_input = 12 ;
206+ message TaskAllocation {
207+ // Version is not set in function ref.
208+ optional FunctionRef function = 1 ;
209+ optional string allocation_id = 2 ;
210+ optional string task_id = 3 ;
211+ optional string request_id = 4 ;
212+ repeated DataPayload args = 5 ;
210213 // URI prefix for the output payloads.
211214 // S3 URI if the data is stored in S3.
212215 // Starts with "file://"" prefix followed by an absolute directory path if the data is stored on a local file system.
213- optional string output_payload_uri_prefix = 13 ;
214- optional TaskRetryPolicy retry_policy = 14 ;
215- // BLOB URI prefix for the invocation error payloads.
216- optional string invocation_error_payload_uri_prefix = 15 ;
217- }
216+ optional string output_payload_uri_prefix = 6 ;
217+ // BLOB URI prefix for the request error payloads.
218+ optional string request_error_payload_uri_prefix = 7 ;
219+ optional string function_executor_id = 8 ;
218220
219- message TaskAllocation {
220- optional string function_executor_id = 1 ;
221- optional Task task = 2 ;
222- optional string allocation_id = 3 ;
221+ // Always set. An empty bytes array if no metadata, e.g. for API function call.
222+ optional bytes function_call_metadata = 9 ;
223223}
224224
225225// A message sent by Executor to Server to open the stream of desired Executor States for the Executor.
@@ -237,55 +237,125 @@ message DesiredExecutorState {
237237 optional uint64 clock = 3 ;
238238}
239239
240- // ===== report_task_outcome RPC =====
241- enum TaskOutcomeCode {
242- TASK_OUTCOME_CODE_UNKNOWN = 0 ;
243- TASK_OUTCOME_CODE_SUCCESS = 1 ;
244- TASK_OUTCOME_CODE_FAILURE = 2 ;
240+ enum AllocationOutcomeCode {
241+ ALLOCATION_OUTCOME_CODE_UNKNOWN = 0 ;
242+ ALLOCATION_OUTCOME_CODE_SUCCESS = 1 ;
243+ ALLOCATION_OUTCOME_CODE_FAILURE = 2 ;
245244}
246245
247- enum TaskFailureReason {
248- TASK_FAILURE_REASON_UNKNOWN = 0 ;
246+ enum AllocationFailureReason {
247+ ALLOCATION_FAILURE_REASON_UNKNOWN = 0 ;
249248 // Internal error on Executor aka platform error.
250- TASK_FAILURE_REASON_INTERNAL_ERROR = 1 ;
249+ ALLOCATION_FAILURE_REASON_INTERNAL_ERROR = 1 ;
251250 // Clear function code failure typically by raising an exception from the function code.
252251 // Also a grey failure where we can't determine the exact cause. We attribute these to
253252 // functions to prevent service abuse but not billed intenionally failing functions.
254- TASK_FAILURE_REASON_FUNCTION_ERROR = 2 ;
253+ ALLOCATION_FAILURE_REASON_FUNCTION_ERROR = 2 ;
255254 // Function code run time exceeded its configured timeout.
256- TASK_FAILURE_REASON_FUNCTION_TIMEOUT = 3 ;
257- // Function code raised InvocationError to mark the invocation as permanently failed.
258- TASK_FAILURE_REASON_INVOCATION_ERROR = 4 ;
255+ ALLOCATION_FAILURE_REASON_FUNCTION_TIMEOUT = 3 ;
256+ // Function code raised RequestException to mark the request as permanently failed.
257+ ALLOCATION_FAILURE_REASON_REQUEST_ERROR = 4 ;
259258 // Server removed the task allocation from Executor desired state. The task allocation didn't finish before the removal.
260- TASK_FAILURE_REASON_TASK_CANCELLED = 5 ;
259+ ALLOCATION_FAILURE_REASON_ALLOCATION_CANCELLED = 5 ;
261260 // Function Executor terminated - can't run the task allocation on it anymore.
262- TASK_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED = 6 ;
261+ ALLOCATION_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED = 6 ;
263262}
264263
265- message TaskResult {
266- optional string task_id = 1 ;
264+ message ExecutionPlanUpdate {
265+ oneof op {
266+ FunctionCall function_call = 10 ;
267+ ReduceOp reduce = 12 ;
268+ }
269+ }
270+
271+ message FunctionCall {
272+ // The ID of the function call.
273+ // When all the dependencies of the function call are resolved, the
274+ // resulting DataPayload will have the same ID.
275+ // This allows resolving datapayloads of resolved functions calls when it's upstream is called again.
276+ // The ID is unique within the scope of the request.
277+ optional string id = 1 ;
278+ optional FunctionRef target = 2 ;
279+ repeated FunctionArg args = 3 ;
280+
281+ // This required metadata allows SDK to restore original function call from the arguments.
282+ optional bytes call_metadata = 5 ;
283+ }
284+
285+ message FunctionArg {
286+ oneof source {
287+ string function_call_id = 1 ;
288+ DataPayload inline_data = 2 ;
289+ }
290+ }
291+
292+ message ReduceOp {
293+ // The DataPayload of the last function call in the reducer will have the function call id set
294+ // as the id of the ReduceOp.
295+ optional string id = 1 ;
296+ // Contains at least two items.
297+ repeated FunctionArg collection = 2 ;
298+ optional FunctionRef reducer = 3 ;
299+ // This required metadata allows SDK to restore original function call from the arguments.
300+ optional bytes call_metadata = 4 ;
301+ }
302+
303+ message ExecutionPlanUpdates {
304+ repeated ExecutionPlanUpdate updates = 1 ;
305+ optional string root_function_call_id = 2 ;
306+ }
307+
308+ message AllocationResult {
309+ optional FunctionRef function = 1 ;
267310 optional string allocation_id = 2 ;
268- optional string namespace = 3 ;
269- optional string graph_name = 4 ;
270- optional string graph_version = 5 ;
271- optional string function_name = 6 ;
272- optional string graph_invocation_id = 7 ;
273- optional TaskOutcomeCode outcome_code = 9 ;
274- optional TaskFailureReason failure_reason = 10 ;
275- // Edges that the function wants the invocation to be routed to.
276- repeated string next_functions = 11 ;
277- repeated DataPayload function_outputs = 12 ;
278- // Standard output and error streams of the function.
279- // Deprecated: most probably going to be removed once external FE logs ingestion pipeline gets implemented.
280- optional DataPayload stdout = 13 ;
281- optional DataPayload stderr = 14 ;
282- // User payload for invocation error if task failed with invocation error.
283- optional DataPayload invocation_error_output = 15 ;
284- optional uint64 execution_duration_ms = 16 ;
311+ optional string task_id = 3 ;
312+ optional string request_id = 4 ;
313+ optional AllocationOutcomeCode outcome_code = 5 ;
314+ optional AllocationFailureReason failure_reason = 6 ;
315+
316+ oneof return_value {
317+ DataPayload value = 7 ;
318+ ExecutionPlanUpdates updates = 8 ;
319+ }
320+ // User payload for request error if allocation failed with request error.
321+ optional DataPayload request_error = 9 ;
322+ optional uint64 execution_duration_ms = 10 ;
285323}
286324
287- // Internal API for scheduling and running tasks on Executors. Executors are acting as clients of this API.
288- // Server is responsible for scheduling tasks on Executors and Executors are responsible for running the tasks.
325+ message FunctionCallRequest {
326+ // ID of the parent request.
327+ // Should be set by the Executor for safety.
328+ optional string parent_request_id = 1 ;
329+ // Reference of the target function.
330+ optional FunctionRef function = 2 ;
331+
332+ // Inputs to the function.
333+ repeated DataPayload inputs = 3 ;
334+
335+ // Metadata for the function call.
336+ optional bytes call_metadata = 4 ;
337+
338+ // ID of the source function call.
339+ // Should be set by the Executor for safety.
340+ optional string source_function_call_id = 5 ;
341+ }
342+
343+ message FunctionCallResult {
344+ oneof result {
345+ DataPayload output = 1 ;
346+ string exception = 2 ;
347+ }
348+ }
349+
350+ message FunctionCallResponse {
351+ oneof event {
352+ string update = 1 ;
353+ FunctionCallResult result = 2 ;
354+ }
355+ }
356+
357+ // Internal API for scheduling and running task allocations on Executors. Executors are acting as clients of this API.
358+ // Server is responsible for scheduling allocations on Executors and Executors are responsible for running the allocations.
289359//
290360// Rename with caution. Existing clients won't find the service if the service name changes. A HTTP2 ingress proxy
291361// might use the service name in it HTTP2 path based routing rules. See how gRPC uses service names in its HTTP2 paths
@@ -299,4 +369,8 @@ service ExecutorAPI {
299369 // Called by Executor to open a stream of its desired states. When Server wants Executor to change something
300370 // it puts a message on the stream with the new desired state of the Executor.
301371 rpc get_desired_executor_states (GetDesiredExecutorStatesRequest ) returns (stream DesiredExecutorState ) {}
372+
373+ // Called by the user code to invoke a blocking function call.
374+ // returns a stream until the function call is completed.
375+ rpc invoke_function (FunctionCallRequest ) returns (stream FunctionCallResponse ) {}
302376}
0 commit comments