|
7 | 7 | import numpy as np |
8 | 8 | import torch |
9 | 9 | from torch.fx.node import Argument, Node, Target |
| 10 | +from torch_tensorrt._utils import is_tensorrt_version_supported |
10 | 11 | from torch_tensorrt.dynamo._settings import CompilationSettings |
11 | 12 | from torch_tensorrt.dynamo._SourceIR import SourceIR |
12 | 13 | from torch_tensorrt.dynamo.conversion import impl |
@@ -620,40 +621,41 @@ def aten_ops_quantize_op( |
620 | 621 | ) |
621 | 622 |
|
622 | 623 |
|
623 | | -try: |
624 | | - import modelopt.torch.quantization as mtq # noqa: F401 |
| 624 | +if is_tensorrt_version_supported("10.8.0"): |
| 625 | + try: |
| 626 | + import modelopt.torch.quantization as mtq # noqa: F401 |
625 | 627 |
|
626 | | - assert torch.ops.tensorrt.dynamic_block_quantize_op.default |
627 | | -except Exception as e: |
628 | | - _LOGGER.warning( |
629 | | - "Unable to import quantize op. Please install modelopt library (https://github.com/NVIDIA/TensorRT-Model-Optimizer?tab=readme-ov-file#installation) to add support for compiling quantized models" |
630 | | - ) |
631 | | -else: |
| 628 | + assert torch.ops.tensorrt.dynamic_block_quantize_op.default |
| 629 | + except Exception as e: |
| 630 | + _LOGGER.warning( |
| 631 | + "Unable to import quantize op. Please install modelopt library (https://github.com/NVIDIA/TensorRT-Model-Optimizer?tab=readme-ov-file#installation) to add support for compiling quantized models" |
| 632 | + ) |
| 633 | + else: |
632 | 634 |
|
633 | | - @dynamo_tensorrt_converter( |
634 | | - torch.ops.tensorrt.dynamic_block_quantize_op.default, |
635 | | - supports_dynamic_shapes=True, |
636 | | - ) |
637 | | - def aten_ops_dynamic_block_quantize_op( |
638 | | - ctx: ConversionContext, |
639 | | - target: Target, |
640 | | - args: Tuple[Argument, ...], |
641 | | - kwargs: Dict[str, Argument], |
642 | | - name: str, |
643 | | - ) -> Union[TRTTensor, Sequence[TRTTensor]]: |
644 | | - return impl.dynamic_block_quantize.quantize( |
645 | | - ctx, |
646 | | - target, |
647 | | - SourceIR.ATEN, |
648 | | - name, |
649 | | - args[0], |
650 | | - args[1], |
651 | | - args[2], |
652 | | - args[3], |
653 | | - args[4], |
654 | | - args[5], |
655 | | - args[6], |
| 635 | + @dynamo_tensorrt_converter( |
| 636 | + torch.ops.tensorrt.dynamic_block_quantize_op.default, |
| 637 | + supports_dynamic_shapes=True, |
656 | 638 | ) |
| 639 | + def aten_ops_dynamic_block_quantize_op( |
| 640 | + ctx: ConversionContext, |
| 641 | + target: Target, |
| 642 | + args: Tuple[Argument, ...], |
| 643 | + kwargs: Dict[str, Argument], |
| 644 | + name: str, |
| 645 | + ) -> Union[TRTTensor, Sequence[TRTTensor]]: |
| 646 | + return impl.dynamic_block_quantize.quantize( |
| 647 | + ctx, |
| 648 | + target, |
| 649 | + SourceIR.ATEN, |
| 650 | + name, |
| 651 | + args[0], |
| 652 | + args[1], |
| 653 | + args[2], |
| 654 | + args[3], |
| 655 | + args[4], |
| 656 | + args[5], |
| 657 | + args[6], |
| 658 | + ) |
657 | 659 |
|
658 | 660 |
|
659 | 661 | @dynamo_tensorrt_converter(torch.ops.aten.squeeze.dim, supports_dynamic_shapes=True) |
|
0 commit comments