@@ -16,7 +16,12 @@ class RequestRecommend(BaseModel):
1616 A request to recommend compute shapes and parameters for a given model.
1717 """
1818
19- model_ocid : str = Field (..., description = "The OCID of the model to recommend feasible compute shapes." )
19+ model_ocid : str = Field (
20+ ..., description = "The OCID of the model to recommend feasible compute shapes."
21+ )
22+
23+ class Config :
24+ protected_namespaces = ()
2025
2126
2227class DeploymentParams (BaseModel ): # noqa: N801
@@ -42,6 +47,9 @@ class ModelDetail(BaseModel):
4247 ..., description = "Total size of model and cache in GB."
4348 )
4449
50+ class Config :
51+ protected_namespaces = ()
52+
4553
4654class ModelConfig (BaseModel ):
4755 """
@@ -54,8 +62,13 @@ class ModelConfig(BaseModel):
5462 )
5563 recommendation : str = Field (..., description = "GPU recommendation for the model." )
5664
65+ class Config :
66+ protected_namespaces = ()
67+
5768 @classmethod
58- def constuct_model_config (cls , estimator : MemoryEstimator , allowed_gpu_memory : float ) -> "ModelConfig" :
69+ def constuct_model_config (
70+ cls , estimator : MemoryEstimator , allowed_gpu_memory : float
71+ ) -> "ModelConfig" :
5972 """
6073 Assembles a complete ModelConfig, including model details, deployment parameters (vLLM), and recommendations.
6174
@@ -78,32 +91,33 @@ def constuct_model_config(cls, estimator: MemoryEstimator, allowed_gpu_memory: f
7891 """
7992 deployment_params = DeploymentParams (
8093 quantization = getattr (estimator .llm_config , "quantization" , None ),
81- max_model_len = getattr (estimator , "seq_len" , None )
94+ max_model_len = getattr (estimator , "seq_len" , None ),
8295 )
8396 model_detail = ModelDetail (
8497 model_size_gb = round (getattr (estimator , "model_memory" , 0.0 ), 2 ),
8598 kv_cache_size_gb = round (getattr (estimator , "kv_cache_memory" , 0.0 ), 2 ),
86- total_model_gb = round (getattr (estimator , "total_memory" , 0.0 ), 2 )
99+ total_model_gb = round (getattr (estimator , "total_memory" , 0.0 ), 2 ),
87100 )
88101 return ModelConfig (
89102 model_details = model_detail ,
90103 deployment_params = deployment_params ,
91- recommendation = estimator .limiting_factor (allowed_gpu_memory )
104+ recommendation = estimator .limiting_factor (allowed_gpu_memory ),
92105 )
93106
94107
95108class ShapeReport (BaseModel ):
96109 """
97110 The feasible deployment configurations for the model per shape.
98111 """
99- shape_details : 'ComputeShapeSummary' = Field (
112+
113+ shape_details : "ComputeShapeSummary" = Field (
100114 ..., description = "Details about the compute shape (ex. VM.GPU.A10.2)."
101115 )
102- configurations : List [' ModelConfig' ] = Field (
116+ configurations : List [" ModelConfig" ] = Field (
103117 default_factory = list , description = "List of model configurations."
104118 )
105119
106- def is_dominated (self , others : List [' ShapeReport' ]) -> bool :
120+ def is_dominated (self , others : List [" ShapeReport" ]) -> bool :
107121 """
108122 Determines whether this shape is dominated by any other shape in a Pareto sense.
109123
@@ -128,31 +142,35 @@ def is_dominated(self, others: List['ShapeReport']) -> bool:
128142
129143 cand_cost = self .shape_details .gpu_specs .ranking .cost
130144 cand_perf = self .shape_details .gpu_specs .ranking .performance
131- cand_quant = QUANT_MAPPING .get (self .configurations [0 ].deployment_params .quantization , 0 )
145+ cand_quant = QUANT_MAPPING .get (
146+ self .configurations [0 ].deployment_params .quantization , 0
147+ )
132148 cand_maxlen = self .configurations [0 ].deployment_params .max_model_len
133149
134150 for other in others :
135151 other_cost = other .shape_details .gpu_specs .ranking .cost
136152 other_perf = other .shape_details .gpu_specs .ranking .performance
137- other_quant = QUANT_MAPPING .get (other .configurations [0 ].deployment_params .quantization , 0 )
153+ other_quant = QUANT_MAPPING .get (
154+ other .configurations [0 ].deployment_params .quantization , 0
155+ )
138156 other_maxlen = other .configurations [0 ].deployment_params .max_model_len
139157 if (
140- other_cost <= cand_cost and
141- other_perf >= cand_perf and
142- other_quant >= cand_quant and
143- other_maxlen >= cand_maxlen and
144- (
145- other_cost < cand_cost or
146- other_perf > cand_perf or
147- other_quant > cand_quant or
148- other_maxlen > cand_maxlen
158+ other_cost <= cand_cost
159+ and other_perf >= cand_perf
160+ and other_quant >= cand_quant
161+ and other_maxlen >= cand_maxlen
162+ and (
163+ other_cost < cand_cost
164+ or other_perf > cand_perf
165+ or other_quant > cand_quant
166+ or other_maxlen > cand_maxlen
149167 )
150168 ):
151169 return True
152170 return False
153171
154172 @classmethod
155- def pareto_front (cls , shapes : List [' ShapeReport' ]) -> List [' ShapeReport' ]:
173+ def pareto_front (cls , shapes : List [" ShapeReport" ]) -> List [" ShapeReport" ]:
156174 """
157175 Filters a list of shapes/configurations to those on the Pareto frontier.
158176
@@ -171,7 +189,11 @@ def pareto_front(cls, shapes: List['ShapeReport']) -> List['ShapeReport']:
171189 The returned set contains non-dominated deployments for maximizing
172190 performance, quantization, and model length, while minimizing cost.
173191 """
174- return [shape for shape in shapes if not shape .is_dominated ([s for s in shapes if s != shape ])]
192+ return [
193+ shape
194+ for shape in shapes
195+ if not shape .is_dominated ([s for s in shapes if s != shape ])
196+ ]
175197
176198
177199class ShapeRecommendationReport (BaseModel ):
@@ -184,7 +206,8 @@ class ShapeRecommendationReport(BaseModel):
184206 troubleshoot (Optional[TroubleshootShapeSummary]): Troubleshooting information
185207 if no valid deployment shapes are available.
186208 """
187- model_name : Optional [str ] = Field (
209+
210+ display_name : Optional [str ] = Field (
188211 "" , description = "Name of the model used for recommendations."
189212 )
190213 recommendations : List [ShapeReport ] = Field (
0 commit comments