|
97 | 97 | to_weight_tensor_with_linear_activation_quantization_metadata, |
98 | 98 | ) |
99 | 99 | from torchao.utils import ( |
100 | | - _ConfigDeprecationWrapper, |
101 | 100 | is_MI300, |
102 | 101 | is_sm_at_least_89, |
103 | 102 | is_sm_at_least_90, |
|
146 | 145 | "autoquant", |
147 | 146 | "_get_subclass_inserter", |
148 | 147 | "quantize_", |
149 | | - "int8_dynamic_activation_int4_weight", |
150 | | - "int8_dynamic_activation_int8_weight", |
151 | | - "int8_dynamic_activation_int8_semi_sparse_weight", |
152 | | - "int4_weight_only", |
153 | | - "int8_weight_only", |
154 | 148 | "intx_quantization_aware_training", |
155 | | - "float8_weight_only", |
156 | | - "uintx_weight_only", |
157 | | - "fpx_weight_only", |
158 | | - "gemlite_uintx_weight_only", |
159 | | - "float8_dynamic_activation_float8_weight", |
160 | | - "float8_static_activation_float8_weight", |
161 | 149 | "Int8DynActInt4WeightQuantizer", |
162 | 150 | "Float8DynamicActivationFloat8SemiSparseWeightConfig", |
163 | 151 | "ModuleFqnToConfig", |
@@ -464,7 +452,7 @@ def quantize_( |
464 | 452 | # Int8DynamicActivationInt8WeightConfig (optimized with int8 mm op and torch.compile) |
465 | 453 | # Int4WeightOnlyConfig (optimized with int4 tinygemm kernel and torch.compile) |
466 | 454 | # Int8WeightOnlyConfig (optimized with int8 mm op and torch.compile |
467 | | - from torchao.quantization.quant_api import int4_weight_only |
| 455 | + from torchao.quantization.quant_api import Int4WeightOnlyConfig |
468 | 456 |
|
469 | 457 | m = nn.Sequential(nn.Linear(32, 1024), nn.Linear(1024, 32)) |
470 | 458 | quantize_(m, Int4WeightOnlyConfig(group_size=32, version=1)) |
@@ -599,12 +587,6 @@ def __post_init__(self): |
599 | 587 | ) |
600 | 588 |
|
601 | 589 |
|
602 | | -# for BC |
603 | | -int8_dynamic_activation_int4_weight = _ConfigDeprecationWrapper( |
604 | | - "int8_dynamic_activation_int4_weight", Int8DynamicActivationInt4WeightConfig |
605 | | -) |
606 | | - |
607 | | - |
608 | 590 | @register_quantize_module_handler(Int8DynamicActivationInt4WeightConfig) |
609 | 591 | def _int8_dynamic_activation_int4_weight_transform( |
610 | 592 | module: torch.nn.Module, |
@@ -973,12 +955,6 @@ def __post_init__(self): |
973 | 955 | ) |
974 | 956 |
|
975 | 957 |
|
976 | | -# for bc |
977 | | -int4_dynamic_activation_int4_weight = _ConfigDeprecationWrapper( |
978 | | - "int4_dynamic_activation_int4_weight", Int4DynamicActivationInt4WeightConfig |
979 | | -) |
980 | | - |
981 | | - |
982 | 958 | @register_quantize_module_handler(Int4DynamicActivationInt4WeightConfig) |
983 | 959 | def _int4_dynamic_activation_int4_weight_transform( |
984 | 960 | module: torch.nn.Module, config: Int4DynamicActivationInt4WeightConfig |
@@ -1039,12 +1015,6 @@ def __post_init__(self): |
1039 | 1015 | ) |
1040 | 1016 |
|
1041 | 1017 |
|
1042 | | -# for BC |
1043 | | -gemlite_uintx_weight_only = _ConfigDeprecationWrapper( |
1044 | | - "gemlite_uintx_weight_only", GemliteUIntXWeightOnlyConfig |
1045 | | -) |
1046 | | - |
1047 | | - |
1048 | 1018 | @register_quantize_module_handler(GemliteUIntXWeightOnlyConfig) |
1049 | 1019 | def _gemlite_uintx_weight_only_transform( |
1050 | 1020 | module: torch.nn.Module, config: GemliteUIntXWeightOnlyConfig |
@@ -1122,11 +1092,6 @@ def __post_init__(self): |
1122 | 1092 | torch._C._log_api_usage_once("torchao.quantization.Int4WeightOnlyConfig") |
1123 | 1093 |
|
1124 | 1094 |
|
1125 | | -# for BC |
1126 | | -# TODO maybe change other callsites |
1127 | | -int4_weight_only = _ConfigDeprecationWrapper("int4_weight_only", Int4WeightOnlyConfig) |
1128 | | - |
1129 | | - |
1130 | 1095 | def _int4_weight_only_quantize_tensor(weight, config): |
1131 | 1096 | # TODO(future PR): perhaps move this logic to a different file, to keep the API |
1132 | 1097 | # file clean of implementation details |
@@ -1338,10 +1303,6 @@ def __post_init__(self): |
1338 | 1303 | torch._C._log_api_usage_once("torchao.quantization.Int8WeightOnlyConfig") |
1339 | 1304 |
|
1340 | 1305 |
|
1341 | | -# for BC |
1342 | | -int8_weight_only = _ConfigDeprecationWrapper("int8_weight_only", Int8WeightOnlyConfig) |
1343 | | - |
1344 | | - |
1345 | 1306 | def _int8_weight_only_quantize_tensor(weight, config): |
1346 | 1307 | mapping_type = MappingType.SYMMETRIC |
1347 | 1308 | target_dtype = torch.int8 |
@@ -1506,12 +1467,6 @@ def __post_init__(self): |
1506 | 1467 | ) |
1507 | 1468 |
|
1508 | 1469 |
|
1509 | | -# for BC |
1510 | | -int8_dynamic_activation_int8_weight = _ConfigDeprecationWrapper( |
1511 | | - "int8_dynamic_activation_int8_weight", Int8DynamicActivationInt8WeightConfig |
1512 | | -) |
1513 | | - |
1514 | | - |
1515 | 1470 | def _int8_dynamic_activation_int8_weight_quantize_tensor(weight, config): |
1516 | 1471 | layout = config.layout |
1517 | 1472 | act_mapping_type = config.act_mapping_type |
@@ -1617,12 +1572,6 @@ def __post_init__(self): |
1617 | 1572 | torch._C._log_api_usage_once("torchao.quantization.Float8WeightOnlyConfig") |
1618 | 1573 |
|
1619 | 1574 |
|
1620 | | -# for BC |
1621 | | -float8_weight_only = _ConfigDeprecationWrapper( |
1622 | | - "float8_weight_only", Float8WeightOnlyConfig |
1623 | | -) |
1624 | | - |
1625 | | - |
1626 | 1575 | def _float8_weight_only_quant_tensor(weight, config): |
1627 | 1576 | if config.version == 1: |
1628 | 1577 | warnings.warn( |
@@ -1800,12 +1749,6 @@ def __post_init__(self): |
1800 | 1749 | self.mm_config = Float8MMConfig(use_fast_accum=default_use_fast_accum) |
1801 | 1750 |
|
1802 | 1751 |
|
1803 | | -# for bc |
1804 | | -float8_dynamic_activation_float8_weight = _ConfigDeprecationWrapper( |
1805 | | - "float8_dynamic_activation_float8_weight", Float8DynamicActivationFloat8WeightConfig |
1806 | | -) |
1807 | | - |
1808 | | - |
1809 | 1752 | def _float8_dynamic_activation_float8_weight_quantize_tensor(weight, config): |
1810 | 1753 | activation_dtype = config.activation_dtype |
1811 | 1754 | weight_dtype = config.weight_dtype |
@@ -1995,12 +1938,6 @@ def __post_init__(self): |
1995 | 1938 | ) |
1996 | 1939 |
|
1997 | 1940 |
|
1998 | | -# for bc |
1999 | | -float8_static_activation_float8_weight = _ConfigDeprecationWrapper( |
2000 | | - "float8_static_activation_float8_weight", Float8StaticActivationFloat8WeightConfig |
2001 | | -) |
2002 | | - |
2003 | | - |
2004 | 1941 | @register_quantize_module_handler(Float8StaticActivationFloat8WeightConfig) |
2005 | 1942 | def _float8_static_activation_float8_weight_transform( |
2006 | 1943 | module: torch.nn.Module, config: Float8StaticActivationFloat8WeightConfig |
@@ -2086,12 +2023,6 @@ def __post_init__(self): |
2086 | 2023 | ) |
2087 | 2024 |
|
2088 | 2025 |
|
2089 | | -# for BC |
2090 | | -uintx_weight_only = _ConfigDeprecationWrapper( |
2091 | | - "uintx_weight_only", UIntXWeightOnlyConfig |
2092 | | -) |
2093 | | - |
2094 | | - |
2095 | 2026 | @register_quantize_module_handler(UIntXWeightOnlyConfig) |
2096 | 2027 | def _uintx_weight_only_transform( |
2097 | 2028 | module: torch.nn.Module, config: UIntXWeightOnlyConfig |
@@ -2373,10 +2304,6 @@ def __post_init__(self): |
2373 | 2304 | ) |
2374 | 2305 |
|
2375 | 2306 |
|
2376 | | -# for BC |
2377 | | -fpx_weight_only = _ConfigDeprecationWrapper("fpx_weight_only", FPXWeightOnlyConfig) |
2378 | | - |
2379 | | - |
2380 | 2307 | @register_quantize_module_handler(FPXWeightOnlyConfig) |
2381 | 2308 | def _fpx_weight_only_transform( |
2382 | 2309 | module: torch.nn.Module, config: FPXWeightOnlyConfig |
|
0 commit comments