You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: docs/deployments/api-configuration.md
+21-21Lines changed: 21 additions & 21 deletions
Display the source diff
Display the rich diff
Original file line number
Diff line number
Diff line change
@@ -10,25 +10,25 @@ Reference the section below which corresponds to your Predictor type: [Python](#
10
10
11
11
```yaml
12
12
- name: <string> # API name (required)
13
-
endpoint: <string> # the endpoint for the API (aws only) (default: <api_name>)
14
-
local_port: <int> # specify the port for API (local only) (default: 8888)
15
13
predictor:
16
14
type: python
17
15
path: <string> # path to a python file with a PythonPredictor class definition, relative to the Cortex root (required)
18
16
config: <string: value> # arbitrary dictionary passed to the constructor of the Predictor (optional)
19
17
python_path: <string> # path to the root of your Python folder that will be appended to PYTHONPATH (default: folder containing cortex.yaml)
20
18
image: <string> # docker image to use for the Predictor (default: cortexlabs/python-predictor-cpu or cortexlabs/python-predictor-gpu based on compute)
21
19
env: <string: string> # dictionary of environment variables
22
-
monitoring: # (aws only)
23
-
model_type: <string> # must be "classification" or "regression", so responses can be interpreted correctly (i.e. categorical vs continuous) (required)
24
-
key: <string> # the JSON key in the response payload of the value to monitor (required if the response payload is a JSON object)
20
+
networking:
21
+
endpoint: <string> # the endpoint for the API (aws only) (default: <api_name>)
22
+
local_port: <int> # specify the port for API (local only) (default: 8888)
23
+
api_gateway: public | none # whether to create a public API Gateway endpoint for this API (if not, the load balancer will be accessed directly) (default: public)
25
24
compute:
26
25
cpu: <string | int | float> # CPU request per replica, e.g. 200m or 1 (200m is equivalent to 0.2) (default: 200m)
27
26
gpu: <int> # GPU request per replica (default: 0)
28
27
inf: <int> # Inferentia ASIC request per replica (default: 0)
29
28
mem: <string> # memory request per replica, e.g. 200Mi or 1Gi (default: Null)
30
-
networking:
31
-
api_gateway: public | none # whether to create a public API Gateway endpoint for this API (if not, the load balancer will be accessed directly) (default: public)
29
+
monitoring: # (aws only)
30
+
model_type: <string> # must be "classification" or "regression", so responses can be interpreted correctly (i.e. categorical vs continuous) (required)
31
+
key: <string> # the JSON key in the response payload of the value to monitor (required if the response payload is a JSON object)
32
32
autoscaling: # (aws only)
33
33
min_replicas: <int> # minimum number of replicas (default: 1)
34
34
max_replicas: <int> # maximum number of replicas (default: 100)
@@ -55,8 +55,6 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
55
55
56
56
```yaml
57
57
- name: <string> # API name (required)
58
-
endpoint: <string> # the endpoint for the API (aws only) (default: <api_name>)
59
-
local_port: <int> # specify the port for API (local only) (default: 8888)
60
58
predictor:
61
59
type: tensorflow
62
60
path: <string> # path to a python file with a TensorFlowPredictor class definition, relative to the Cortex root (required)
@@ -72,16 +70,18 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
72
70
image: <string> # docker image to use for the Predictor (default: cortexlabs/tensorflow-predictor)
73
71
tensorflow_serving_image: <string> # docker image to use for the TensorFlow Serving container (default: cortexlabs/tensorflow-serving-gpu or cortexlabs/tensorflow-serving-cpu based on compute)
74
72
env: <string: string> # dictionary of environment variables
75
-
monitoring: # (aws only)
76
-
model_type: <string> # must be "classification" or "regression", so responses can be interpreted correctly (i.e. categorical vs continuous) (required)
77
-
key: <string> # the JSON key in the response payload of the value to monitor (required if the response payload is a JSON object)
73
+
networking:
74
+
endpoint: <string> # the endpoint for the API (aws only) (default: <api_name>)
75
+
local_port: <int> # specify the port for API (local only) (default: 8888)
76
+
api_gateway: public | none # whether to create a public API Gateway endpoint for this API (if not, the load balancer will be accessed directly) (default: public)
78
77
compute:
79
78
cpu: <string | int | float> # CPU request per replica, e.g. 200m or 1 (200m is equivalent to 0.2) (default: 200m)
80
79
gpu: <int> # GPU request per replica (default: 0)
81
80
inf: <int> # Inferentia ASIC request per replica (default: 0)
82
81
mem: <string> # memory request per replica, e.g. 200Mi or 1Gi (default: Null)
83
-
networking:
84
-
api_gateway: public | none # whether to create a public API Gateway endpoint for this API (if not, the load balancer will be accessed directly) (default: public)
82
+
monitoring: # (aws only)
83
+
model_type: <string> # must be "classification" or "regression", so responses can be interpreted correctly (i.e. categorical vs continuous) (required)
84
+
key: <string> # the JSON key in the response payload of the value to monitor (required if the response payload is a JSON object)
85
85
autoscaling: # (aws only)
86
86
min_replicas: <int> # minimum number of replicas (default: 1)
87
87
max_replicas: <int> # maximum number of replicas (default: 100)
@@ -108,8 +108,6 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
108
108
109
109
```yaml
110
110
- name: <string> # API name (required)
111
-
endpoint: <string> # the endpoint for the API (aws only) (default: <api_name>)
112
-
local_port: <int> # specify the port for API (local only) (default: 8888)
113
111
predictor:
114
112
type: onnx
115
113
path: <string> # path to a python file with an ONNXPredictor class definition, relative to the Cortex root (required)
@@ -123,15 +121,17 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
123
121
python_path: <string> # path to the root of your Python folder that will be appended to PYTHONPATH (default: folder containing cortex.yaml)
124
122
image: <string> # docker image to use for the Predictor (default: cortexlabs/onnx-predictor-gpu or cortexlabs/onnx-predictor-cpu based on compute)
125
123
env: <string: string> # dictionary of environment variables
126
-
monitoring: # (aws only)
127
-
model_type: <string> # must be "classification" or "regression", so responses can be interpreted correctly (i.e. categorical vs continuous) (required)
128
-
key: <string> # the JSON key in the response payload of the value to monitor (required if the response payload is a JSON object)
124
+
networking:
125
+
endpoint: <string> # the endpoint for the API (aws only) (default: <api_name>)
126
+
local_port: <int> # specify the port for API (local only) (default: 8888)
127
+
api_gateway: public | none # whether to create a public API Gateway endpoint for this API (if not, the load balancer will be accessed directly) (default: public)
129
128
compute:
130
129
cpu: <string | int | float> # CPU request per replica, e.g. 200m or 1 (200m is equivalent to 0.2) (default: 200m)
131
130
gpu: <int> # GPU request per replica (default: 0)
132
131
mem: <string> # memory request per replica, e.g. 200Mi or 1Gi (default: Null)
133
-
networking:
134
-
api_gateway: public | none # whether to create a public API Gateway endpoint for this API (if not, the load balancer will be accessed directly) (default: public)
132
+
monitoring: # (aws only)
133
+
model_type: <string> # must be "classification" or "regression", so responses can be interpreted correctly (i.e. categorical vs continuous) (required)
134
+
key: <string> # the JSON key in the response payload of the value to monitor (required if the response payload is a JSON object)
135
135
autoscaling: # (aws only)
136
136
min_replicas: <int> # minimum number of replicas (default: 1)
137
137
max_replicas: <int> # maximum number of replicas (default: 100)
0 commit comments