|
40 | 40 | from ads.model.deployment.model_deployment_runtime import ( |
41 | 41 | ModelDeploymentCondaRuntime, |
42 | 42 | ModelDeploymentContainerRuntime, |
| 43 | + ModelDeploymentMode, |
43 | 44 | ModelDeploymentRuntime, |
44 | 45 | ModelDeploymentRuntimeType, |
45 | 46 | OCIModelDeploymentRuntimeType, |
@@ -80,11 +81,6 @@ class ModelDeploymentLogType: |
80 | 81 | ACCESS = "access" |
81 | 82 |
|
82 | 83 |
|
83 | | -class ModelDeploymentMode: |
84 | | - HTTPS = "HTTPS_ONLY" |
85 | | - STREAM = "STREAM_ONLY" |
86 | | - |
87 | | - |
88 | 84 | class LogNotConfiguredError(Exception): # pragma: no cover |
89 | 85 | pass |
90 | 86 |
|
@@ -911,48 +907,59 @@ def predict( |
911 | 907 | "`data` and `json_input` are both provided. You can only use one of them." |
912 | 908 | ) |
913 | 909 |
|
914 | | - if auto_serialize_data: |
915 | | - data = data or json_input |
916 | | - serialized_data = serializer.serialize(data=data) |
917 | | - return send_request( |
918 | | - data=serialized_data, |
919 | | - endpoint=endpoint, |
920 | | - is_json_payload=_is_json_serializable(serialized_data), |
921 | | - header=header, |
922 | | - ) |
| 910 | + try: |
| 911 | + if auto_serialize_data: |
| 912 | + data = data or json_input |
| 913 | + serialized_data = serializer.serialize(data=data) |
| 914 | + return send_request( |
| 915 | + data=serialized_data, |
| 916 | + endpoint=endpoint, |
| 917 | + is_json_payload=_is_json_serializable(serialized_data), |
| 918 | + header=header, |
| 919 | + ) |
923 | 920 |
|
924 | | - if json_input is not None: |
925 | | - if not _is_json_serializable(json_input): |
926 | | - raise ValueError( |
927 | | - "`json_input` must be json serializable. " |
928 | | - "Set `auto_serialize_data` to True, or serialize the provided input data first," |
929 | | - "or using `data` to pass binary data." |
| 921 | + if json_input is not None: |
| 922 | + if not _is_json_serializable(json_input): |
| 923 | + raise ValueError( |
| 924 | + "`json_input` must be json serializable. " |
| 925 | + "Set `auto_serialize_data` to True, or serialize the provided input data first," |
| 926 | + "or using `data` to pass binary data." |
| 927 | + ) |
| 928 | + utils.get_logger().warning( |
| 929 | + "The `json_input` argument of `predict()` will be deprecated soon. " |
| 930 | + "Please use `data` argument. " |
930 | 931 | ) |
931 | | - utils.get_logger().warning( |
932 | | - "The `json_input` argument of `predict()` will be deprecated soon. " |
933 | | - "Please use `data` argument. " |
934 | | - ) |
935 | | - data = json_input |
| 932 | + data = json_input |
936 | 933 |
|
937 | | - is_json_payload = _is_json_serializable(data) |
938 | | - if not isinstance(data, bytes) and not is_json_payload: |
939 | | - raise TypeError( |
940 | | - "`data` is not bytes or json serializable. Set `auto_serialize_data` to `True` to serialize the input data." |
941 | | - ) |
942 | | - if model_name and model_version: |
943 | | - header["model-name"] = model_name |
944 | | - header["model-version"] = model_version |
945 | | - elif bool(model_version) ^ bool(model_name): |
946 | | - raise ValueError( |
947 | | - "`model_name` and `model_version` have to be provided together." |
| 934 | + is_json_payload = _is_json_serializable(data) |
| 935 | + if not isinstance(data, bytes) and not is_json_payload: |
| 936 | + raise TypeError( |
| 937 | + "`data` is not bytes or json serializable. Set `auto_serialize_data` to `True` to serialize the input data." |
| 938 | + ) |
| 939 | + if model_name and model_version: |
| 940 | + header["model-name"] = model_name |
| 941 | + header["model-version"] = model_version |
| 942 | + elif bool(model_version) ^ bool(model_name): |
| 943 | + raise ValueError( |
| 944 | + "`model_name` and `model_version` have to be provided together." |
| 945 | + ) |
| 946 | + prediction = send_request( |
| 947 | + data=data, |
| 948 | + endpoint=endpoint, |
| 949 | + is_json_payload=is_json_payload, |
| 950 | + header=header, |
948 | 951 | ) |
949 | | - prediction = send_request( |
950 | | - data=data, |
951 | | - endpoint=endpoint, |
952 | | - is_json_payload=is_json_payload, |
953 | | - header=header, |
954 | | - ) |
955 | | - return prediction |
| 952 | + return prediction |
| 953 | + except oci.exceptions.ServiceError as ex: |
| 954 | + # When bandwidth exceeds the allocated value, TooManyRequests error (429) will be raised by oci backend. |
| 955 | + if ex.status == 429: |
| 956 | + bandwidth_mbps = self.infrastructure.bandwidth_mbps or MODEL_DEPLOYMENT_BANDWIDTH_MBPS |
| 957 | + utils.get_logger().warning( |
| 958 | + f"Load balancer bandwidth exceeds the allocated {bandwidth_mbps} Mbps." |
| 959 | + "To estimate the actual bandwidth, use formula: (payload size in KB) * (estimated requests per second) * 8 / 1024." |
| 960 | + "To resolve the issue, try sizing down the payload, slowing down the request rate or increasing the allocated bandwidth." |
| 961 | + ) |
| 962 | + raise |
956 | 963 |
|
957 | 964 | def activate( |
958 | 965 | self, |
|
0 commit comments