|
96 | 96 | to_weight_tensor_with_linear_activation_quantization_metadata, |
97 | 97 | ) |
98 | 98 | from torchao.utils import ( |
99 | | - _ConfigDeprecationWrapper, |
100 | 99 | is_MI300, |
101 | 100 | is_sm_at_least_89, |
102 | 101 | is_sm_at_least_90, |
|
148 | 147 | "autoquant", |
149 | 148 | "_get_subclass_inserter", |
150 | 149 | "quantize_", |
151 | | - "int8_dynamic_activation_int4_weight", |
152 | | - "int8_dynamic_activation_int8_weight", |
153 | | - "int8_dynamic_activation_int8_semi_sparse_weight", |
154 | | - "int4_weight_only", |
155 | | - "int8_weight_only", |
156 | 150 | "intx_quantization_aware_training", |
157 | | - "float8_weight_only", |
158 | | - "uintx_weight_only", |
159 | | - "fpx_weight_only", |
160 | | - "gemlite_uintx_weight_only", |
161 | | - "float8_dynamic_activation_float8_weight", |
162 | | - "float8_static_activation_float8_weight", |
163 | 151 | "Int8DynActInt4WeightQuantizer", |
164 | 152 | "Float8DynamicActivationFloat8SemiSparseWeightConfig", |
165 | 153 | "ModuleFqnToConfig", |
@@ -507,7 +495,7 @@ def quantize_( |
507 | 495 | # Int8DynamicActivationInt8WeightConfig (optimized with int8 mm op and torch.compile) |
508 | 496 | # Int4WeightOnlyConfig (optimized with int4 tinygemm kernel and torch.compile) |
509 | 497 | # Int8WeightOnlyConfig (optimized with int8 mm op and torch.compile |
510 | | - from torchao.quantization.quant_api import int4_weight_only |
| 498 | + from torchao.quantization.quant_api import Int4WeightOnlyConfig |
511 | 499 |
|
512 | 500 | m = nn.Sequential(nn.Linear(32, 1024), nn.Linear(1024, 32)) |
513 | 501 | quantize_(m, Int4WeightOnlyConfig(group_size=32, version=1)) |
@@ -629,12 +617,6 @@ def __post_init__(self): |
629 | 617 | ) |
630 | 618 |
|
631 | 619 |
|
632 | | -# for BC |
633 | | -int8_dynamic_activation_int4_weight = _ConfigDeprecationWrapper( |
634 | | - "int8_dynamic_activation_int4_weight", Int8DynamicActivationInt4WeightConfig |
635 | | -) |
636 | | - |
637 | | - |
638 | 620 | @register_quantize_module_handler(Int8DynamicActivationInt4WeightConfig) |
639 | 621 | def _int8_dynamic_activation_int4_weight_transform( |
640 | 622 | module: torch.nn.Module, |
@@ -1000,12 +982,6 @@ def __post_init__(self): |
1000 | 982 | ) |
1001 | 983 |
|
1002 | 984 |
|
1003 | | -# for bc |
1004 | | -int4_dynamic_activation_int4_weight = _ConfigDeprecationWrapper( |
1005 | | - "int4_dynamic_activation_int4_weight", Int4DynamicActivationInt4WeightConfig |
1006 | | -) |
1007 | | - |
1008 | | - |
1009 | 985 | @register_quantize_module_handler(Int4DynamicActivationInt4WeightConfig) |
1010 | 986 | def _int4_dynamic_activation_int4_weight_transform( |
1011 | 987 | module: torch.nn.Module, config: Int4DynamicActivationInt4WeightConfig |
@@ -1063,12 +1039,6 @@ def __post_init__(self): |
1063 | 1039 | ) |
1064 | 1040 |
|
1065 | 1041 |
|
1066 | | -# for BC |
1067 | | -gemlite_uintx_weight_only = _ConfigDeprecationWrapper( |
1068 | | - "gemlite_uintx_weight_only", GemliteUIntXWeightOnlyConfig |
1069 | | -) |
1070 | | - |
1071 | | - |
1072 | 1042 | @register_quantize_module_handler(GemliteUIntXWeightOnlyConfig) |
1073 | 1043 | def _gemlite_uintx_weight_only_transform( |
1074 | 1044 | module: torch.nn.Module, config: GemliteUIntXWeightOnlyConfig |
@@ -1146,11 +1116,6 @@ def __post_init__(self): |
1146 | 1116 | torch._C._log_api_usage_once("torchao.quantization.Int4WeightOnlyConfig") |
1147 | 1117 |
|
1148 | 1118 |
|
1149 | | -# for BC |
1150 | | -# TODO maybe change other callsites |
1151 | | -int4_weight_only = _ConfigDeprecationWrapper("int4_weight_only", Int4WeightOnlyConfig) |
1152 | | - |
1153 | | - |
1154 | 1119 | def _int4_weight_only_quantize_tensor(weight, config): |
1155 | 1120 | # TODO(future PR): perhaps move this logic to a different file, to keep the API |
1156 | 1121 | # file clean of implementation details |
@@ -1362,10 +1327,6 @@ def __post_init__(self): |
1362 | 1327 | torch._C._log_api_usage_once("torchao.quantization.Int8WeightOnlyConfig") |
1363 | 1328 |
|
1364 | 1329 |
|
1365 | | -# for BC |
1366 | | -int8_weight_only = _ConfigDeprecationWrapper("int8_weight_only", Int8WeightOnlyConfig) |
1367 | | - |
1368 | | - |
1369 | 1330 | def _int8_weight_only_quantize_tensor(weight, config): |
1370 | 1331 | mapping_type = MappingType.SYMMETRIC |
1371 | 1332 | target_dtype = torch.int8 |
@@ -1523,12 +1484,6 @@ def __post_init__(self): |
1523 | 1484 | ) |
1524 | 1485 |
|
1525 | 1486 |
|
1526 | | -# for BC |
1527 | | -int8_dynamic_activation_int8_weight = _ConfigDeprecationWrapper( |
1528 | | - "int8_dynamic_activation_int8_weight", Int8DynamicActivationInt8WeightConfig |
1529 | | -) |
1530 | | - |
1531 | | - |
1532 | 1487 | def _int8_dynamic_activation_int8_weight_quantize_tensor(weight, config): |
1533 | 1488 | layout = config.layout |
1534 | 1489 | act_mapping_type = config.act_mapping_type |
@@ -1634,12 +1589,6 @@ def __post_init__(self): |
1634 | 1589 | torch._C._log_api_usage_once("torchao.quantization.Float8WeightOnlyConfig") |
1635 | 1590 |
|
1636 | 1591 |
|
1637 | | -# for BC |
1638 | | -float8_weight_only = _ConfigDeprecationWrapper( |
1639 | | - "float8_weight_only", Float8WeightOnlyConfig |
1640 | | -) |
1641 | | - |
1642 | | - |
1643 | 1592 | def _float8_weight_only_quant_tensor(weight, config): |
1644 | 1593 | if config.version == 1: |
1645 | 1594 | warnings.warn( |
@@ -1798,12 +1747,6 @@ def __post_init__(self): |
1798 | 1747 | self.granularity = [activation_granularity, weight_granularity] |
1799 | 1748 |
|
1800 | 1749 |
|
1801 | | -# for bc |
1802 | | -float8_dynamic_activation_float8_weight = _ConfigDeprecationWrapper( |
1803 | | - "float8_dynamic_activation_float8_weight", Float8DynamicActivationFloat8WeightConfig |
1804 | | -) |
1805 | | - |
1806 | | - |
1807 | 1750 | def _float8_dynamic_activation_float8_weight_quantize_tensor(weight, config): |
1808 | 1751 | activation_dtype = config.activation_dtype |
1809 | 1752 | weight_dtype = config.weight_dtype |
@@ -1979,12 +1922,6 @@ def __post_init__(self): |
1979 | 1922 | ) |
1980 | 1923 |
|
1981 | 1924 |
|
1982 | | -# for bc |
1983 | | -float8_static_activation_float8_weight = _ConfigDeprecationWrapper( |
1984 | | - "float8_static_activation_float8_weight", Float8StaticActivationFloat8WeightConfig |
1985 | | -) |
1986 | | - |
1987 | | - |
1988 | 1925 | @register_quantize_module_handler(Float8StaticActivationFloat8WeightConfig) |
1989 | 1926 | def _float8_static_activation_float8_weight_transform( |
1990 | 1927 | module: torch.nn.Module, config: Float8StaticActivationFloat8WeightConfig |
@@ -2067,12 +2004,6 @@ def __post_init__(self): |
2067 | 2004 | torch._C._log_api_usage_once("torchao.quantization.UIntXWeightOnlyConfig") |
2068 | 2005 |
|
2069 | 2006 |
|
2070 | | -# for BC |
2071 | | -uintx_weight_only = _ConfigDeprecationWrapper( |
2072 | | - "uintx_weight_only", UIntXWeightOnlyConfig |
2073 | | -) |
2074 | | - |
2075 | | - |
2076 | 2007 | @register_quantize_module_handler(UIntXWeightOnlyConfig) |
2077 | 2008 | def _uintx_weight_only_transform( |
2078 | 2009 | module: torch.nn.Module, config: UIntXWeightOnlyConfig |
@@ -2351,10 +2282,6 @@ def __post_init__(self): |
2351 | 2282 | torch._C._log_api_usage_once("torchao.quantization.FPXWeightOnlyConfig") |
2352 | 2283 |
|
2353 | 2284 |
|
2354 | | -# for BC |
2355 | | -fpx_weight_only = _ConfigDeprecationWrapper("fpx_weight_only", FPXWeightOnlyConfig) |
2356 | | - |
2357 | | - |
2358 | 2285 | @register_quantize_module_handler(FPXWeightOnlyConfig) |
2359 | 2286 | def _fpx_weight_only_transform( |
2360 | 2287 | module: torch.nn.Module, config: FPXWeightOnlyConfig |
|
0 commit comments