|
96 | 96 | to_weight_tensor_with_linear_activation_quantization_metadata, |
97 | 97 | ) |
98 | 98 | from torchao.utils import ( |
99 | | - _ConfigDeprecationWrapper, |
100 | 99 | is_MI300, |
101 | 100 | is_sm_at_least_89, |
102 | 101 | is_sm_at_least_90, |
|
148 | 147 | "autoquant", |
149 | 148 | "_get_subclass_inserter", |
150 | 149 | "quantize_", |
151 | | - "int8_dynamic_activation_int4_weight", |
152 | | - "int8_dynamic_activation_int8_weight", |
153 | | - "int8_dynamic_activation_int8_semi_sparse_weight", |
154 | | - "int4_weight_only", |
155 | | - "int8_weight_only", |
156 | 150 | "intx_quantization_aware_training", |
157 | | - "float8_weight_only", |
158 | | - "uintx_weight_only", |
159 | | - "fpx_weight_only", |
160 | | - "gemlite_uintx_weight_only", |
161 | | - "float8_dynamic_activation_float8_weight", |
162 | | - "float8_static_activation_float8_weight", |
163 | 151 | "Int8DynActInt4WeightQuantizer", |
164 | 152 | "Float8DynamicActivationFloat8SemiSparseWeightConfig", |
165 | 153 | "ModuleFqnToConfig", |
@@ -519,7 +507,7 @@ def quantize_( |
519 | 507 | # Int8DynamicActivationInt8WeightConfig (optimized with int8 mm op and torch.compile) |
520 | 508 | # Int4WeightOnlyConfig (optimized with int4 tinygemm kernel and torch.compile) |
521 | 509 | # Int8WeightOnlyConfig (optimized with int8 mm op and torch.compile |
522 | | - from torchao.quantization.quant_api import int4_weight_only |
| 510 | + from torchao.quantization.quant_api import Int4WeightOnlyConfig |
523 | 511 |
|
524 | 512 | m = nn.Sequential(nn.Linear(32, 1024), nn.Linear(1024, 32)) |
525 | 513 | quantize_(m, Int4WeightOnlyConfig(group_size=32, version=1)) |
@@ -641,12 +629,6 @@ def __post_init__(self): |
641 | 629 | ) |
642 | 630 |
|
643 | 631 |
|
644 | | -# for BC |
645 | | -int8_dynamic_activation_int4_weight = _ConfigDeprecationWrapper( |
646 | | - "int8_dynamic_activation_int4_weight", Int8DynamicActivationInt4WeightConfig |
647 | | -) |
648 | | - |
649 | | - |
650 | 632 | @register_quantize_module_handler(Int8DynamicActivationInt4WeightConfig) |
651 | 633 | def _int8_dynamic_activation_int4_weight_transform( |
652 | 634 | module: torch.nn.Module, |
@@ -1012,12 +994,6 @@ def __post_init__(self): |
1012 | 994 | ) |
1013 | 995 |
|
1014 | 996 |
|
1015 | | -# for bc |
1016 | | -int4_dynamic_activation_int4_weight = _ConfigDeprecationWrapper( |
1017 | | - "int4_dynamic_activation_int4_weight", Int4DynamicActivationInt4WeightConfig |
1018 | | -) |
1019 | | - |
1020 | | - |
1021 | 997 | @register_quantize_module_handler(Int4DynamicActivationInt4WeightConfig) |
1022 | 998 | def _int4_dynamic_activation_int4_weight_transform( |
1023 | 999 | module: torch.nn.Module, config: Int4DynamicActivationInt4WeightConfig |
@@ -1075,12 +1051,6 @@ def __post_init__(self): |
1075 | 1051 | ) |
1076 | 1052 |
|
1077 | 1053 |
|
1078 | | -# for BC |
1079 | | -gemlite_uintx_weight_only = _ConfigDeprecationWrapper( |
1080 | | - "gemlite_uintx_weight_only", GemliteUIntXWeightOnlyConfig |
1081 | | -) |
1082 | | - |
1083 | | - |
1084 | 1054 | @register_quantize_module_handler(GemliteUIntXWeightOnlyConfig) |
1085 | 1055 | def _gemlite_uintx_weight_only_transform( |
1086 | 1056 | module: torch.nn.Module, config: GemliteUIntXWeightOnlyConfig |
@@ -1158,11 +1128,6 @@ def __post_init__(self): |
1158 | 1128 | torch._C._log_api_usage_once("torchao.quantization.Int4WeightOnlyConfig") |
1159 | 1129 |
|
1160 | 1130 |
|
1161 | | -# for BC |
1162 | | -# TODO maybe change other callsites |
1163 | | -int4_weight_only = _ConfigDeprecationWrapper("int4_weight_only", Int4WeightOnlyConfig) |
1164 | | - |
1165 | | - |
1166 | 1131 | def _int4_weight_only_quantize_tensor(weight, config): |
1167 | 1132 | # TODO(future PR): perhaps move this logic to a different file, to keep the API |
1168 | 1133 | # file clean of implementation details |
@@ -1374,10 +1339,6 @@ def __post_init__(self): |
1374 | 1339 | torch._C._log_api_usage_once("torchao.quantization.Int8WeightOnlyConfig") |
1375 | 1340 |
|
1376 | 1341 |
|
1377 | | -# for BC |
1378 | | -int8_weight_only = _ConfigDeprecationWrapper("int8_weight_only", Int8WeightOnlyConfig) |
1379 | | - |
1380 | | - |
1381 | 1342 | def _int8_weight_only_quantize_tensor(weight, config): |
1382 | 1343 | mapping_type = MappingType.SYMMETRIC |
1383 | 1344 | target_dtype = torch.int8 |
@@ -1535,12 +1496,6 @@ def __post_init__(self): |
1535 | 1496 | ) |
1536 | 1497 |
|
1537 | 1498 |
|
1538 | | -# for BC |
1539 | | -int8_dynamic_activation_int8_weight = _ConfigDeprecationWrapper( |
1540 | | - "int8_dynamic_activation_int8_weight", Int8DynamicActivationInt8WeightConfig |
1541 | | -) |
1542 | | - |
1543 | | - |
1544 | 1499 | def _int8_dynamic_activation_int8_weight_quantize_tensor(weight, config): |
1545 | 1500 | layout = config.layout |
1546 | 1501 | act_mapping_type = config.act_mapping_type |
@@ -1646,12 +1601,6 @@ def __post_init__(self): |
1646 | 1601 | torch._C._log_api_usage_once("torchao.quantization.Float8WeightOnlyConfig") |
1647 | 1602 |
|
1648 | 1603 |
|
1649 | | -# for BC |
1650 | | -float8_weight_only = _ConfigDeprecationWrapper( |
1651 | | - "float8_weight_only", Float8WeightOnlyConfig |
1652 | | -) |
1653 | | - |
1654 | | - |
1655 | 1604 | def _float8_weight_only_quant_tensor(weight, config): |
1656 | 1605 | if config.version == 1: |
1657 | 1606 | warnings.warn( |
@@ -1806,12 +1755,6 @@ def __post_init__(self): |
1806 | 1755 | self.granularity = [activation_granularity, weight_granularity] |
1807 | 1756 |
|
1808 | 1757 |
|
1809 | | -# for bc |
1810 | | -float8_dynamic_activation_float8_weight = _ConfigDeprecationWrapper( |
1811 | | - "float8_dynamic_activation_float8_weight", Float8DynamicActivationFloat8WeightConfig |
1812 | | -) |
1813 | | - |
1814 | | - |
1815 | 1758 | def _float8_dynamic_activation_float8_weight_quantize_tensor(weight, config): |
1816 | 1759 | activation_dtype = config.activation_dtype |
1817 | 1760 | weight_dtype = config.weight_dtype |
@@ -1981,12 +1924,6 @@ def __post_init__(self): |
1981 | 1924 | ) |
1982 | 1925 |
|
1983 | 1926 |
|
1984 | | -# for bc |
1985 | | -float8_static_activation_float8_weight = _ConfigDeprecationWrapper( |
1986 | | - "float8_static_activation_float8_weight", Float8StaticActivationFloat8WeightConfig |
1987 | | -) |
1988 | | - |
1989 | | - |
1990 | 1927 | @register_quantize_module_handler(Float8StaticActivationFloat8WeightConfig) |
1991 | 1928 | def _float8_static_activation_float8_weight_transform( |
1992 | 1929 | module: torch.nn.Module, config: Float8StaticActivationFloat8WeightConfig |
@@ -2066,12 +2003,6 @@ def __post_init__(self): |
2066 | 2003 | torch._C._log_api_usage_once("torchao.quantization.UIntXWeightOnlyConfig") |
2067 | 2004 |
|
2068 | 2005 |
|
2069 | | -# for BC |
2070 | | -uintx_weight_only = _ConfigDeprecationWrapper( |
2071 | | - "uintx_weight_only", UIntXWeightOnlyConfig |
2072 | | -) |
2073 | | - |
2074 | | - |
2075 | 2006 | @register_quantize_module_handler(UIntXWeightOnlyConfig) |
2076 | 2007 | def _uintx_weight_only_transform( |
2077 | 2008 | module: torch.nn.Module, config: UIntXWeightOnlyConfig |
@@ -2350,10 +2281,6 @@ def __post_init__(self): |
2350 | 2281 | torch._C._log_api_usage_once("torchao.quantization.FPXWeightOnlyConfig") |
2351 | 2282 |
|
2352 | 2283 |
|
2353 | | -# for BC |
2354 | | -fpx_weight_only = _ConfigDeprecationWrapper("fpx_weight_only", FPXWeightOnlyConfig) |
2355 | | - |
2356 | | - |
2357 | 2284 | @register_quantize_module_handler(FPXWeightOnlyConfig) |
2358 | 2285 | def _fpx_weight_only_transform( |
2359 | 2286 | module: torch.nn.Module, config: FPXWeightOnlyConfig |
|
0 commit comments