|
21 | 21 |
|
22 | 22 | from .embedding import FakeQuantizedEmbedding |
23 | 23 | from .fake_quantize_config import ( |
24 | | - FakeQuantizeConfig, # noqa: F401, for BC |
25 | 24 | FakeQuantizeConfigBase, |
26 | 25 | IntxFakeQuantizeConfig, |
27 | 26 | _infer_fake_quantize_configs, |
28 | 27 | ) |
29 | 28 | from .linear import FakeQuantizedLinear |
30 | | -from .utils import _log_deprecation_warning |
31 | 29 |
|
32 | 30 |
|
33 | 31 | class QATStep(str, Enum): |
@@ -288,119 +286,6 @@ def _qat_config_transform( |
288 | 286 | return module |
289 | 287 |
|
290 | 288 |
|
291 | | -@dataclass |
292 | | -class IntXQuantizationAwareTrainingConfig(AOBaseConfig): |
293 | | - """ |
294 | | - (Deprecated) Please use :class:`~torchao.quantization.qat.QATConfig` instead. |
295 | | -
|
296 | | - Config for applying fake quantization to a `torch.nn.Module`. |
297 | | - to be used with :func:`~torchao.quantization.quant_api.quantize_`. |
298 | | -
|
299 | | - Example usage:: |
300 | | -
|
301 | | - from torchao.quantization import quantize_ |
302 | | - from torchao.quantization.qat import IntxFakeQuantizeConfig |
303 | | - activation_config = IntxFakeQuantizeConfig( |
304 | | - torch.int8, "per_token", is_symmetric=False, |
305 | | - ) |
306 | | - weight_config = IntxFakeQuantizeConfig( |
307 | | - torch.int4, group_size=32, is_symmetric=True, |
308 | | - ) |
309 | | - quantize_( |
310 | | - model, |
311 | | - IntXQuantizationAwareTrainingConfig(activation_config, weight_config), |
312 | | - ) |
313 | | -
|
314 | | - Note: If the config is applied on a module that is not |
315 | | - `torch.nn.Linear` or `torch.nn.Embedding`, or it is applied on |
316 | | - `torch.nn.Embedding` with an activation config, then we will raise |
317 | | - ValueError as these are not supported. |
318 | | - """ |
319 | | - |
320 | | - activation_config: Optional[FakeQuantizeConfigBase] = None |
321 | | - weight_config: Optional[FakeQuantizeConfigBase] = None |
322 | | - |
323 | | - def __post_init__(self): |
324 | | - _log_deprecation_warning(self) |
325 | | - |
326 | | - |
327 | | -# for BC |
328 | | -class intx_quantization_aware_training(IntXQuantizationAwareTrainingConfig): |
329 | | - pass |
330 | | - |
331 | | - |
332 | | -@register_quantize_module_handler(IntXQuantizationAwareTrainingConfig) |
333 | | -def _intx_quantization_aware_training_transform( |
334 | | - module: torch.nn.Module, |
335 | | - config: IntXQuantizationAwareTrainingConfig, |
336 | | -) -> torch.nn.Module: |
337 | | - mod = module |
338 | | - activation_config = config.activation_config |
339 | | - weight_config = config.weight_config |
340 | | - |
341 | | - if isinstance(mod, torch.nn.Linear): |
342 | | - return FakeQuantizedLinear.from_linear( |
343 | | - mod, |
344 | | - activation_config, |
345 | | - weight_config, |
346 | | - ) |
347 | | - elif isinstance(mod, torch.nn.Embedding): |
348 | | - if activation_config is not None: |
349 | | - raise ValueError( |
350 | | - "Activation fake quantization is not supported for embedding" |
351 | | - ) |
352 | | - return FakeQuantizedEmbedding.from_embedding(mod, weight_config) |
353 | | - else: |
354 | | - raise ValueError("Module of type '%s' does not have QAT support" % type(mod)) |
355 | | - |
356 | | - |
357 | | -@dataclass |
358 | | -class FromIntXQuantizationAwareTrainingConfig(AOBaseConfig): |
359 | | - """ |
360 | | - (Deprecated) Please use :class:`~torchao.quantization.qat.QATConfig` instead. |
361 | | -
|
362 | | - Config for converting a model with fake quantized modules, |
363 | | - such as :func:`~torchao.quantization.qat.linear.FakeQuantizedLinear` |
364 | | - and :func:`~torchao.quantization.qat.linear.FakeQuantizedEmbedding`, |
365 | | - back to model with the original, corresponding modules without |
366 | | - fake quantization. This should be used with |
367 | | - :func:`~torchao.quantization.quant_api.quantize_`. |
368 | | -
|
369 | | - Example usage:: |
370 | | -
|
371 | | - from torchao.quantization import quantize_ |
372 | | - quantize_( |
373 | | - model_with_fake_quantized_linears, |
374 | | - FromIntXQuantizationAwareTrainingConfig(), |
375 | | - ) |
376 | | - """ |
377 | | - |
378 | | - def __post_init__(self): |
379 | | - _log_deprecation_warning(self) |
380 | | - |
381 | | - |
382 | | -# for BC |
383 | | -class from_intx_quantization_aware_training(FromIntXQuantizationAwareTrainingConfig): |
384 | | - pass |
385 | | - |
386 | | - |
387 | | -@register_quantize_module_handler(FromIntXQuantizationAwareTrainingConfig) |
388 | | -def _from_intx_quantization_aware_training_transform( |
389 | | - mod: torch.nn.Module, |
390 | | - config: FromIntXQuantizationAwareTrainingConfig, |
391 | | -) -> torch.nn.Module: |
392 | | - """ |
393 | | - If the given module is a fake quantized module, return the original |
394 | | - corresponding version of the module without fake quantization. |
395 | | - """ |
396 | | - if isinstance(mod, FakeQuantizedLinear): |
397 | | - return mod.to_linear() |
398 | | - elif isinstance(mod, FakeQuantizedEmbedding): |
399 | | - return mod.to_embedding() |
400 | | - else: |
401 | | - return mod |
402 | | - |
403 | | - |
404 | 289 | class ComposableQATQuantizer(TwoStepQuantizer): |
405 | 290 | """ |
406 | 291 | Composable quantizer that users can use to apply multiple QAT quantizers easily. |
|
0 commit comments