pytorch
diff --git a/‎benchmarks/microbenchmarks/utils.py‎
Lines changed: 1 addition & 1 deletion b/‎benchmarks/microbenchmarks/utils.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/api_ref_dtypes.rst‎
Lines changed: 2 additions & 2 deletions b/‎docs/source/api_ref_dtypes.rst‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎test/dtypes/test_uintx.py‎
Lines changed: 1 addition & 0 deletions b/‎test/dtypes/test_uintx.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎test/quantization/test_marlin_qqq.py‎
Lines changed: 1 addition & 1 deletion b/‎test/quantization/test_marlin_qqq.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎torchao/_models/llama/generate.py‎
Lines changed: 1 addition & 1 deletion b/‎torchao/_models/llama/generate.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎torchao/dtypes/__init__.py‎
Lines changed: 5 additions & 3 deletions b/‎torchao/dtypes/__init__.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎torchao/dtypes/affine_quantized_tensor_ops.py‎
Lines changed: 4 additions & 4 deletions b/‎torchao/dtypes/affine_quantized_tensor_ops.py‎
Lines changed: 4 additions & 4 deletions
@@ -218,7 +218,7 @@ def string_to_config(
         )
     if "marlin" in quantization:
         if "qqq" in quantization:
-            from torchao.dtypes import MarlinQQQLayout
+            from torchao.prototype.dtypes import MarlinQQQLayout
 
             return Int8DynamicActivationInt4WeightConfig(
                 group_size=128,
 
@@ -23,8 +23,6 @@ Layouts and Tensor Subclasses
     FloatxTensorCoreLayout
     MarlinSparseLayout
     UintxLayout
-    MarlinQQQTensor
-    MarlinQQQLayout
     Int4CPULayout
     CutlassSemiSparseLayout
 
@@ -53,6 +51,8 @@ Prototype
     BlockSparseLayout
     CutlassInt4PackedLayout
     Int8DynamicActInt4WeightCPULayout
+    MarlinQQQTensor
+    MarlinQQQLayout
 
 ..
   _NF4Tensor - add after fixing torchao/dtypes/nf4tensor.py:docstring
 
@@ -182,6 +182,7 @@ def test_uintx_api_deprecation():
         ),
         ("CutlassInt4PackedLayout", "torchao.dtypes.uintx.cutlass_int4_packed_layout"),
         ("BlockSparseLayout", "torchao.dtypes.uintx.block_sparse_layout"),
+        ("MarlinQQQLayout", "torchao.dtypes.uintx.marlin_qqq_tensor"),
     ]
 
     for api_name, module_path in deprecated_apis:
 
@@ -10,7 +10,7 @@
 from torch import nn
 from torch.testing._internal.common_utils import TestCase, run_tests
 
-from torchao.dtypes import MarlinQQQLayout
+from torchao.prototype.dtypes import MarlinQQQLayout
 from torchao.quantization.marlin_qqq import (
     pack_to_marlin_qqq,
     unpack_from_marlin_qqq,
 
@@ -460,7 +460,7 @@ def ffn_or_attn_only(mod, fqn):
                 )
         if "marlin" in quantization:
             if "qqq" in quantization:
-                from torchao.dtypes import MarlinQQQLayout
+                from torchao.prototype.dtypes import MarlinQQQLayout
 
                 quantize_(
                     model,
 
@@ -16,19 +16,21 @@
 from .uintx import (
     Int4CPULayout,
     Int4XPULayout,
-    MarlinQQQLayout,
-    MarlinQQQTensor,
     MarlinSparseLayout,
     PackedLinearInt8DynamicActivationIntxWeightLayout,
     QDQLayout,
     SemiSparseLayout,
     TensorCoreTiledLayout,
     UintxLayout,
-    to_marlinqqq_quantized_intx,
 )
 from .uintx.block_sparse_layout import BlockSparseLayout
 from .uintx.cutlass_int4_packed_layout import CutlassInt4PackedLayout
 from .uintx.dyn_int8_act_int4_wei_cpu_layout import Int8DynamicActInt4WeightCPULayout
+from .uintx.marlin_qqq_tensor import (
+    MarlinQQQLayout,
+    MarlinQQQTensor,
+    to_marlinqqq_quantized_intx,
+)
 from .utils import (
     Layout,
     PlainLayout,
 
@@ -39,10 +39,6 @@
     _linear_fp_act_uint4_weight_int8_zero_check,
     _linear_fp_act_uint4_weight_int8_zero_impl,
 )
-from torchao.dtypes.uintx.marlin_qqq_tensor import (
-    _linear_int8_act_int4_weight_marlin_qqq_check,
-    _linear_int8_act_int4_weight_marlin_qqq_impl,
-)
 from torchao.dtypes.uintx.marlin_sparse_layout import (
     _linear_fp_act_int4_weight_sparse_marlin_check,
     _linear_fp_act_int4_weight_sparse_marlin_impl,
@@ -94,6 +90,10 @@
     _linear_int8_act_int4_weight_cpu_check,
     _linear_int8_act_int4_weight_cpu_impl,
 )
+from torchao.prototype.dtypes.uintx.marlin_qqq_tensor import (
+    _linear_int8_act_int4_weight_marlin_qqq_check,
+    _linear_int8_act_int4_weight_marlin_qqq_impl,
+)
 from torchao.quantization.quant_primitives import (
     ZeroPointDomain,
     _dequantize_affine_no_zero_point,
Original file line number	Diff line number	Diff line change
`@@ -218,7 +218,7 @@ def string_to_config(`
`218`	`218`	`)`
`219`	`219`	`if "marlin" in quantization:`
`220`	`220`	`if "qqq" in quantization:`
`221`		`- from torchao.dtypes import MarlinQQQLayout`
	`221`	`+ from torchao.prototype.dtypes import MarlinQQQLayout`
`222`	`222`
`223`	`223`	`return Int8DynamicActivationInt4WeightConfig(`
`224`	`224`	`group_size=128,`
Original file line number	Diff line number	Diff line change
`@@ -182,6 +182,7 @@ def test_uintx_api_deprecation():`
`182`	`182`	`),`
`183`	`183`	`("CutlassInt4PackedLayout", "torchao.dtypes.uintx.cutlass_int4_packed_layout"),`
`184`	`184`	`("BlockSparseLayout", "torchao.dtypes.uintx.block_sparse_layout"),`
	`185`	`+ ("MarlinQQQLayout", "torchao.dtypes.uintx.marlin_qqq_tensor"),`
`185`	`186`	`]`
`186`	`187`
`187`	`188`	`for api_name, module_path in deprecated_apis:`
Original file line number	Diff line number	Diff line change
`@@ -460,7 +460,7 @@ def ffn_or_attn_only(mod, fqn):`
`460`	`460`	`)`
`461`	`461`	`if "marlin" in quantization:`
`462`	`462`	`if "qqq" in quantization:`
`463`		`- from torchao.dtypes import MarlinQQQLayout`
	`463`	`+ from torchao.prototype.dtypes import MarlinQQQLayout`
`464`	`464`
`465`	`465`	`quantize_(`
`466`	`466`	`model,`