Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 32 additions & 37 deletions QEfficient/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,49 @@
import warnings

import QEfficient.utils.model_registery # noqa: F401
from QEfficient.base import (
QEFFAutoModel,
QEFFAutoModelForCausalLM,
QEFFAutoModelForCTC,
QEFFAutoModelForImageTextToText,
QEFFAutoModelForSpeechSeq2Seq,
QEFFCommonLoader,
)
from QEfficient.compile.compile_helper import compile
from QEfficient.exporter.export_hf_to_cloud_ai_100 import qualcomm_efficient_converter
from QEfficient.generation.text_generation_inference import cloud_ai_100_exec_kv
from QEfficient.peft import QEffAutoPeftModelForCausalLM
from QEfficient.transformers.transform import transform
from QEfficient.utils import custom_format_warning
from QEfficient.utils.logging_utils import logger

# Users can use QEfficient.export for exporting models to ONNX
export = qualcomm_efficient_converter
__all__ = [
"transform",
"export",
"compile",
"cloud_ai_100_exec_kv",
"QEFFAutoModel",
"QEFFAutoModelForCausalLM",
"QEFFAutoModelForCTC",
"QEffAutoPeftModelForCausalLM",
"QEFFAutoModelForImageTextToText",
"QEFFAutoModelForSpeechSeq2Seq",
"QEFFCommonLoader",
]
# For faster downloads via hf_transfer
# This code is put above import statements as this needs to be executed before
# hf_transfer is imported (will happen on line 15 via leading imports)
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
# Placeholder for all non-transformer models registered in QEfficient


# custom warning for the better logging experience
warnings.formatwarning = custom_format_warning

# Conditionally import QAIC-related modules if the SDK is installed
__version__ = "0.0.1.dev0"


def check_qaic_sdk():
"""Check if QAIC SDK is installed"""
Expand All @@ -37,40 +67,5 @@ def check_qaic_sdk():
return False


# Conditionally import QAIC-related modules if the SDK is installed
__version__ = "0.0.1.dev0"

if check_qaic_sdk():
from QEfficient.base import (
QEFFAutoModel,
QEFFAutoModelForCausalLM,
QEFFAutoModelForCTC,
QEFFAutoModelForImageTextToText,
QEFFAutoModelForSpeechSeq2Seq,
QEFFCommonLoader,
)
from QEfficient.compile.compile_helper import compile
from QEfficient.exporter.export_hf_to_cloud_ai_100 import qualcomm_efficient_converter
from QEfficient.generation.text_generation_inference import cloud_ai_100_exec_kv
from QEfficient.peft import QEffAutoPeftModelForCausalLM
from QEfficient.transformers.transform import transform

# Users can use QEfficient.export for exporting models to ONNX
export = qualcomm_efficient_converter

__all__ = [
"transform",
"export",
"compile",
"cloud_ai_100_exec_kv",
"QEFFAutoModel",
"QEFFAutoModelForCausalLM",
"QEFFAutoModelForCTC",
"QEffAutoPeftModelForCausalLM",
"QEFFAutoModelForImageTextToText",
"QEFFAutoModelForSpeechSeq2Seq",
"QEFFCommonLoader",
]

else:
if not check_qaic_sdk():
logger.warning("QAIC SDK is not installed, eager mode features won't be available!")
62 changes: 40 additions & 22 deletions QEfficient/generation/cloud_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#
# -----------------------------------------------------------------------------

import platform
import sys
from pathlib import Path
from typing import Dict, List, Optional, Union
from warnings import warn
Expand All @@ -13,32 +15,29 @@

try:
import qaicrt

is_qaicrt_imported = True
except ImportError:
import platform
import sys
try:
sys.path.append(f"/opt/qti-aic/dev/lib/{platform.machine()}")
import qaicrt

sys.path.append(f"/opt/qti-aic/dev/lib/{platform.machine()}")
import qaicrt
is_qaicrt_imported = True
except ImportError:
is_qaicrt_imported = False

try:
import QAicApi_pb2 as aicapi
except ImportError:
import sys

sys.path.append("/opt/qti-aic/dev/python")
import QAicApi_pb2 as aicapi
is_aicapi_imported = True
except ImportError:
try:
sys.path.append("/opt/qti-aic/dev/python")
import QAicApi_pb2 as aicapi

aic_to_np_dtype_mapping = {
aicapi.FLOAT_TYPE: np.dtype(np.float32),
aicapi.FLOAT_16_TYPE: np.dtype(np.float16),
aicapi.INT8_Q_TYPE: np.dtype(np.int8),
aicapi.UINT8_Q_TYPE: np.dtype(np.uint8),
aicapi.INT16_Q_TYPE: np.dtype(np.int16),
aicapi.INT32_Q_TYPE: np.dtype(np.int32),
aicapi.INT32_I_TYPE: np.dtype(np.int32),
aicapi.INT64_I_TYPE: np.dtype(np.int64),
aicapi.INT8_TYPE: np.dtype(np.int8),
}
is_aicapi_imported = True
except ImportError:
is_qaicrt_imported = False


class QAICInferenceSession:
Expand All @@ -58,6 +57,25 @@ def __init__(
:activate: bool. If false, activation will be disabled. Default=True.
:enable_debug_logs: bool. If True, It will enable debug logs. Default=False.
"""
if not (is_qaicrt_imported and is_aicapi_imported):
raise ImportError(
"Unable to import `qaicrt` and/or `QAicApi_pb2` libraries required for executing QPC files on the CLOUD AI platform.\n"
"Please ensure that the QAIC platform SDK and apps SDK are installed correctly."
)

# Build dtype mapping once (depends on aicapi constants)
self.aic_to_np_dtype_mapping = {
aicapi.FLOAT_TYPE: np.dtype(np.float32),
aicapi.FLOAT_16_TYPE: np.dtype(np.float16),
aicapi.INT8_Q_TYPE: np.dtype(np.int8),
aicapi.UINT8_Q_TYPE: np.dtype(np.uint8),
aicapi.INT16_Q_TYPE: np.dtype(np.int16),
aicapi.INT32_Q_TYPE: np.dtype(np.int32),
aicapi.INT32_I_TYPE: np.dtype(np.int32),
aicapi.INT64_I_TYPE: np.dtype(np.int64),
aicapi.INT8_TYPE: np.dtype(np.int8),
}

# Load QPC
if device_ids is not None:
devices = qaicrt.QIDList(device_ids)
Expand All @@ -77,7 +95,7 @@ def __init__(
raise RuntimeError("Failed to getIoDescriptor")
iodesc.ParseFromString(bytes(iodesc_data))
self.allowed_shapes = [
[(aic_to_np_dtype_mapping[x.type].itemsize, list(x.dims)) for x in allowed_shape.shapes]
[(self.aic_to_np_dtype_mapping[x.type].itemsize, list(x.dims)) for x in allowed_shape.shapes]
for allowed_shape in iodesc.allowed_shapes
]
self.bindings = iodesc.selected_set.bindings
Expand All @@ -97,7 +115,7 @@ def __init__(
# Create input qbuffers and buf_dims
self.qbuffers = [qaicrt.QBuffer(bytes(binding.size)) for binding in self.bindings]
self.buf_dims = qaicrt.BufferDimensionsVecRef(
[(aic_to_np_dtype_mapping[binding.type].itemsize, list(binding.dims)) for binding in self.bindings]
[(self.aic_to_np_dtype_mapping[binding.type].itemsize, list(binding.dims)) for binding in self.bindings]
)

@property
Expand Down Expand Up @@ -205,6 +223,6 @@ def run(self, inputs: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]:
continue
outputs[output_name] = np.frombuffer(
bytes(output_qbuffers[buffer_index]),
aic_to_np_dtype_mapping[self.bindings[buffer_index].type],
self.aic_to_np_dtype_mapping[self.bindings[buffer_index].type],
).reshape(self.buf_dims[buffer_index][1])
return outputs
Loading