Skip to content
This repository was archived by the owner on Oct 9, 2024. It is now read-only.

Commit 79d412c

Browse files
authored
load tokenizer method (#30)
make all tokenizers to be left padded
1 parent 776c77c commit 79d412c

File tree

6 files changed

+20
-10
lines changed

6 files changed

+20
-10
lines changed

inference_server/model_handler/deployment.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from transformers import AutoTokenizer
1414

1515
from ..constants import DS_INFERENCE, DS_ZERO
16-
from ..models import get_downloaded_model_path, get_model_class
16+
from ..models import get_downloaded_model_path, get_model_class, load_tokenizer
1717
from ..utils import (
1818
GenerateResponse,
1919
TokenizeRequest,
@@ -32,7 +32,7 @@ def __init__(self, args: argparse.Namespace, use_grpc_server: bool = False, cuda
3232
self.use_grpc_server = use_grpc_server
3333

3434
if self.use_grpc_server:
35-
self.tokenizer = AutoTokenizer.from_pretrained(get_downloaded_model_path(args.model_name))
35+
self.tokenizer = load_tokenizer(get_downloaded_model_path(args.model_name))
3636

3737
self.initialize_ports()
3838

inference_server/models/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from ..constants import DS_INFERENCE, DS_ZERO, HF_ACCELERATE
2-
from .model import Model, get_downloaded_model_path
2+
from .model import Model, get_downloaded_model_path, load_tokenizer
33

44

55
def get_model_class(deployment_framework: str):

inference_server/models/ds_inference.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from transformers import AutoConfig, AutoTokenizer
1313

1414
from ..utils import print_rank_n, run_rank_n
15-
from .model import Model, get_downloaded_model_path, get_hf_model_class
15+
from .model import Model, get_downloaded_model_path, get_hf_model_class, load_tokenizer
1616

1717

1818
# basic DeepSpeed inference model class for benchmarking
@@ -26,7 +26,7 @@ def __init__(self, args: Namespace) -> None:
2626

2727
downloaded_model_path = get_downloaded_model_path(args.model_name)
2828

29-
self.tokenizer = AutoTokenizer.from_pretrained(downloaded_model_path)
29+
self.tokenizer = load_tokenizer(downloaded_model_path)
3030
self.pad = self.tokenizer.pad_token_id
3131

3232
# create dummy tensors for allocating space which will be filled with

inference_server/models/ds_zero.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from transformers.deepspeed import HfDeepSpeedConfig
1010

1111
from ..utils import print_rank_n
12-
from .model import Model, get_downloaded_model_path, get_hf_model_class
12+
from .model import Model, get_downloaded_model_path, get_hf_model_class, load_tokenizer
1313

1414

1515
class DSZeROModel(Model):
@@ -54,7 +54,7 @@ def __init__(self, args: Namespace) -> None:
5454
# this tells from_pretrained to instantiate directly on gpus
5555
dschf = HfDeepSpeedConfig(ds_config)
5656

57-
self.tokenizer = AutoTokenizer.from_pretrained(downloaded_model_path)
57+
self.tokenizer = load_tokenizer(downloaded_model_path)
5858
self.pad = self.tokenizer.pad_token_id
5959

6060
self.model = get_hf_model_class(args.model_class).from_pretrained(

inference_server/models/hf_accelerate.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from transformers import AutoModelForCausalLM, AutoTokenizer
66

77
from ..utils import print_rank_n
8-
from .model import Model, get_downloaded_model_path, get_hf_model_class
8+
from .model import Model, get_downloaded_model_path, get_hf_model_class, load_tokenizer
99

1010

1111
class HFAccelerateModel(Model):
@@ -16,7 +16,7 @@ def __init__(self, args: Namespace) -> None:
1616

1717
downloaded_model_path = get_downloaded_model_path(args.model_name)
1818

19-
self.tokenizer = AutoTokenizer.from_pretrained(downloaded_model_path)
19+
self.tokenizer = load_tokenizer(downloaded_model_path)
2020
self.pad = self.tokenizer.pad_token_id
2121

2222
kwargs = {"pretrained_model_name_or_path": downloaded_model_path, "device_map": "auto"}

inference_server/models/model.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
import transformers
99
from huggingface_hub import snapshot_download
10-
from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM
10+
from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer
1111
from transformers.utils import is_offline_mode
1212

1313
from ..utils import GenerateRequest, GenerateResponse, GenerationMixin, TokenizeRequest, TokenizeResponse, run_rank_n
@@ -115,3 +115,13 @@ def check_batch_size(batch_size: int, max_batch_size: int) -> None:
115115
# this is a hack for now
116116
def get_hf_model_class(model_class: str) -> Union[AutoModelForCausalLM, AutoModelForSeq2SeqLM]:
117117
return getattr(transformers, model_class)
118+
119+
120+
def load_tokenizer(model_name: str) -> AutoTokenizer:
121+
tokenizer = AutoTokenizer.from_pretrained(model_name)
122+
123+
if tokenizer.pad_token_id is None:
124+
tokenizer.add_special_tokens({"pad_token": "[PAD]"})
125+
126+
tokenizer.padding_side = "left"
127+
return tokenizer

0 commit comments

Comments
 (0)