Skip to content
This repository was archived by the owner on Oct 25, 2024. It is now read-only.

Commit 20ae003

Browse files
[Neural Speed]load model from modelscope (#1382)
Co-authored-by: Wenxin Zhang <wenxin.zhang@intel.com>
1 parent 584ed50 commit 20ae003

File tree

9 files changed

+54
-11
lines changed

9 files changed

+54
-11
lines changed

README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,22 @@ model = AutoModelForCausalLM.from_pretrained(model_name, load_in_4bit=True)
216216
outputs = model.generate(inputs)
217217
```
218218

219+
You can also load PyTorch Model from Modelscope
220+
>**Note**:require modelscope
221+
```python
222+
from transformers import TextStreamer
223+
from modelscope import AutoTokenizer
224+
from intel_extension_for_transformers.transformers import AutoModelForCausalLM
225+
model_name = "qwen/Qwen-7B" # Modelscope model_id or local model
226+
prompt = "Once upon a time, there existed a little girl,"
227+
228+
model = AutoModelForCausalLM.from_pretrained(model_name, load_in_4bit=True, model_hub="modelscope")
229+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
230+
inputs = tokenizer(prompt, return_tensors="pt").input_ids
231+
streamer = TextStreamer(tokenizer)
232+
outputs = model.generate(inputs, streamer=streamer, max_new_tokens=300)
233+
```
234+
219235
You can also load the low-bit model quantized by GPTQ/AWQ/RTN/AutoRound algorithm.
220236
```python
221237
from transformers import AutoTokenizer

examples/huggingface/neural_speed/perplexity/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@ tiktoken
1313
py-cpuinfo
1414
cmake
1515
gguf
16-
neural-speed
16+
neural-speed==1.0a0

examples/huggingface/neural_speed/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
intel_extension_for_transformers
2-
neural-speed
2+
neural-speed==1.0a0
33
git+https://github.com/EleutherAI/lm-evaluation-harness.git@cc9778fbe4fa1a709be2abed9deb6180fd40e7e2
44
sentencepiece
55
gguf
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
--extra-index-url https://download.pytorch.org/whl/cpu
2+
accelerate
3+
auto-gptq
4+
cmake
5+
datasets
6+
einops
7+
gguf
8+
neural-speed==1.0a0
9+
numpy
10+
peft
11+
protobuf<3.20
12+
py-cpuinfo
13+
sentencepiece
14+
tiktoken
15+
torch==2.2.0+cpu
16+
transformers
17+
transformers_stream_generator

intel_extension_for_transformers/neural_chat/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ git+https://github.com/EleutherAI/lm-evaluation-harness.git@cc9778fbe4fa1a709be2
88
huggingface_hub
99
intel_extension_for_pytorch==2.2.0
1010
neural-compressor
11-
neural_speed
11+
neural_speed==1.0a0
1212
numpy==1.23.5
1313
onnx>=1.15.0
1414
optimum

intel_extension_for_transformers/neural_chat/requirements_cpu.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ git+https://github.com/EleutherAI/lm-evaluation-harness.git@cc9778fbe4fa1a709be2
77
huggingface_hub
88
intel_extension_for_pytorch==2.2.0
99
neural-compressor
10-
neural_speed
10+
neural_speed==1.0a0
1111
numpy==1.23.5
1212
optimum
1313
optimum-intel

intel_extension_for_transformers/neural_chat/tests/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ langid
4040
librosa
4141
markdown
4242
neural-compressor
43-
neural_speed
43+
neural_speed==1.0a0
4444
num2words
4545
numba
4646
numpy==1.23.5

intel_extension_for_transformers/transformers/modeling/modeling_auto.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,8 @@ class _BaseQBitsAutoModelClass:
281281
"qwen",
282282
"phi",
283283
"whisper",
284+
"qwen2",
285+
"gemma",
284286
]
285287

286288
model_type_list_for_gptq = [
@@ -361,12 +363,19 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
361363
)
362364

363365
config = kwargs.pop("config", None)
366+
model_hub = kwargs.pop("model_hub", "huggingface")
364367

365368
if not isinstance(config, PretrainedConfig):
366-
config, _ = AutoConfig.from_pretrained(
367-
pretrained_model_name_or_path,
368-
return_unused_kwargs=True,
369-
**kwargs,
369+
if model_hub == "modelscope":
370+
import modelscope # pylint: disable=E0401
371+
config = modelscope.AutoConfig.from_pretrained(pretrained_model_name_or_path,
372+
trust_remote_code=True)
373+
else:
374+
config, _ = AutoConfig.from_pretrained(
375+
pretrained_model_name_or_path,
376+
return_unused_kwargs=True,
377+
**kwargs,
378+
370379
)
371380

372381
quantization_config = kwargs.pop("quantization_config", None)
@@ -541,7 +550,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
541550
from neural_speed import Model
542551

543552
model = Model()
544-
model.init(
553+
model.init( # pylint: disable=E1123
545554
pretrained_model_name_or_path,
546555
weight_dtype=quantization_config.weight_dtype,
547556
alg=quantization_config.scheme,
@@ -557,6 +566,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
557566
use_gptq=quantization_config.quant_method.value == "gptq"
558567
or quantization_config.quant_method.value == "autoround",
559568
use_awq=quantization_config.quant_method.value == "awq",
569+
model_hub=model_hub,
560570
)
561571
model.quantization_config = quantization_config
562572
return model

tests/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ git+https://github.com/intel/neural-compressor.git
1111
intel-extension-for-pytorch==2.2.0
1212
intel-tensorflow==2.14.0
1313
mlflow
14-
neural-speed
14+
neural-speed==1.0a0
1515
nlpaug==1.1.9
1616
onnx==1.15.0
1717
onnxruntime==1.17.1

0 commit comments

Comments
 (0)