diff --git a/QEfficient/__init__.py b/QEfficient/__init__.py index 33c6f5588..7f63b34ca 100644 --- a/QEfficient/__init__.py +++ b/QEfficient/__init__.py @@ -9,19 +9,49 @@ import warnings import QEfficient.utils.model_registery # noqa: F401 +from QEfficient.base import ( + QEFFAutoModel, + QEFFAutoModelForCausalLM, + QEFFAutoModelForCTC, + QEFFAutoModelForImageTextToText, + QEFFAutoModelForSpeechSeq2Seq, + QEFFCommonLoader, +) +from QEfficient.compile.compile_helper import compile +from QEfficient.exporter.export_hf_to_cloud_ai_100 import qualcomm_efficient_converter +from QEfficient.generation.text_generation_inference import cloud_ai_100_exec_kv +from QEfficient.peft import QEffAutoPeftModelForCausalLM +from QEfficient.transformers.transform import transform from QEfficient.utils import custom_format_warning from QEfficient.utils.logging_utils import logger +# Users can use QEfficient.export for exporting models to ONNX +export = qualcomm_efficient_converter +__all__ = [ + "transform", + "export", + "compile", + "cloud_ai_100_exec_kv", + "QEFFAutoModel", + "QEFFAutoModelForCausalLM", + "QEFFAutoModelForCTC", + "QEffAutoPeftModelForCausalLM", + "QEFFAutoModelForImageTextToText", + "QEFFAutoModelForSpeechSeq2Seq", + "QEFFCommonLoader", +] # For faster downloads via hf_transfer # This code is put above import statements as this needs to be executed before # hf_transfer is imported (will happen on line 15 via leading imports) os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" # Placeholder for all non-transformer models registered in QEfficient - # custom warning for the better logging experience warnings.formatwarning = custom_format_warning +# Conditionally import QAIC-related modules if the SDK is installed +__version__ = "0.0.1.dev0" + def check_qaic_sdk(): """Check if QAIC SDK is installed""" @@ -37,40 +67,5 @@ def check_qaic_sdk(): return False -# Conditionally import QAIC-related modules if the SDK is installed -__version__ = "0.0.1.dev0" - -if check_qaic_sdk(): - from QEfficient.base import ( - QEFFAutoModel, - QEFFAutoModelForCausalLM, - QEFFAutoModelForCTC, - QEFFAutoModelForImageTextToText, - QEFFAutoModelForSpeechSeq2Seq, - QEFFCommonLoader, - ) - from QEfficient.compile.compile_helper import compile - from QEfficient.exporter.export_hf_to_cloud_ai_100 import qualcomm_efficient_converter - from QEfficient.generation.text_generation_inference import cloud_ai_100_exec_kv - from QEfficient.peft import QEffAutoPeftModelForCausalLM - from QEfficient.transformers.transform import transform - - # Users can use QEfficient.export for exporting models to ONNX - export = qualcomm_efficient_converter - - __all__ = [ - "transform", - "export", - "compile", - "cloud_ai_100_exec_kv", - "QEFFAutoModel", - "QEFFAutoModelForCausalLM", - "QEFFAutoModelForCTC", - "QEffAutoPeftModelForCausalLM", - "QEFFAutoModelForImageTextToText", - "QEFFAutoModelForSpeechSeq2Seq", - "QEFFCommonLoader", - ] - -else: +if not check_qaic_sdk(): logger.warning("QAIC SDK is not installed, eager mode features won't be available!") diff --git a/QEfficient/generation/cloud_infer.py b/QEfficient/generation/cloud_infer.py index 5068c174e..652a641e2 100644 --- a/QEfficient/generation/cloud_infer.py +++ b/QEfficient/generation/cloud_infer.py @@ -5,6 +5,8 @@ # # ----------------------------------------------------------------------------- +import platform +import sys from pathlib import Path from typing import Dict, List, Optional, Union from warnings import warn @@ -13,32 +15,29 @@ try: import qaicrt + + is_qaicrt_imported = True except ImportError: - import platform - import sys + try: + sys.path.append(f"/opt/qti-aic/dev/lib/{platform.machine()}") + import qaicrt - sys.path.append(f"/opt/qti-aic/dev/lib/{platform.machine()}") - import qaicrt + is_qaicrt_imported = True + except ImportError: + is_qaicrt_imported = False try: import QAicApi_pb2 as aicapi -except ImportError: - import sys - sys.path.append("/opt/qti-aic/dev/python") - import QAicApi_pb2 as aicapi + is_aicapi_imported = True +except ImportError: + try: + sys.path.append("/opt/qti-aic/dev/python") + import QAicApi_pb2 as aicapi -aic_to_np_dtype_mapping = { - aicapi.FLOAT_TYPE: np.dtype(np.float32), - aicapi.FLOAT_16_TYPE: np.dtype(np.float16), - aicapi.INT8_Q_TYPE: np.dtype(np.int8), - aicapi.UINT8_Q_TYPE: np.dtype(np.uint8), - aicapi.INT16_Q_TYPE: np.dtype(np.int16), - aicapi.INT32_Q_TYPE: np.dtype(np.int32), - aicapi.INT32_I_TYPE: np.dtype(np.int32), - aicapi.INT64_I_TYPE: np.dtype(np.int64), - aicapi.INT8_TYPE: np.dtype(np.int8), -} + is_aicapi_imported = True + except ImportError: + is_qaicrt_imported = False class QAICInferenceSession: @@ -58,6 +57,25 @@ def __init__( :activate: bool. If false, activation will be disabled. Default=True. :enable_debug_logs: bool. If True, It will enable debug logs. Default=False. """ + if not (is_qaicrt_imported and is_aicapi_imported): + raise ImportError( + "Unable to import `qaicrt` and/or `QAicApi_pb2` libraries required for executing QPC files on the CLOUD AI platform.\n" + "Please ensure that the QAIC platform SDK and apps SDK are installed correctly." + ) + + # Build dtype mapping once (depends on aicapi constants) + self.aic_to_np_dtype_mapping = { + aicapi.FLOAT_TYPE: np.dtype(np.float32), + aicapi.FLOAT_16_TYPE: np.dtype(np.float16), + aicapi.INT8_Q_TYPE: np.dtype(np.int8), + aicapi.UINT8_Q_TYPE: np.dtype(np.uint8), + aicapi.INT16_Q_TYPE: np.dtype(np.int16), + aicapi.INT32_Q_TYPE: np.dtype(np.int32), + aicapi.INT32_I_TYPE: np.dtype(np.int32), + aicapi.INT64_I_TYPE: np.dtype(np.int64), + aicapi.INT8_TYPE: np.dtype(np.int8), + } + # Load QPC if device_ids is not None: devices = qaicrt.QIDList(device_ids) @@ -77,7 +95,7 @@ def __init__( raise RuntimeError("Failed to getIoDescriptor") iodesc.ParseFromString(bytes(iodesc_data)) self.allowed_shapes = [ - [(aic_to_np_dtype_mapping[x.type].itemsize, list(x.dims)) for x in allowed_shape.shapes] + [(self.aic_to_np_dtype_mapping[x.type].itemsize, list(x.dims)) for x in allowed_shape.shapes] for allowed_shape in iodesc.allowed_shapes ] self.bindings = iodesc.selected_set.bindings @@ -97,7 +115,7 @@ def __init__( # Create input qbuffers and buf_dims self.qbuffers = [qaicrt.QBuffer(bytes(binding.size)) for binding in self.bindings] self.buf_dims = qaicrt.BufferDimensionsVecRef( - [(aic_to_np_dtype_mapping[binding.type].itemsize, list(binding.dims)) for binding in self.bindings] + [(self.aic_to_np_dtype_mapping[binding.type].itemsize, list(binding.dims)) for binding in self.bindings] ) @property @@ -205,6 +223,6 @@ def run(self, inputs: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]: continue outputs[output_name] = np.frombuffer( bytes(output_qbuffers[buffer_index]), - aic_to_np_dtype_mapping[self.bindings[buffer_index].type], + self.aic_to_np_dtype_mapping[self.bindings[buffer_index].type], ).reshape(self.buf_dims[buffer_index][1]) return outputs