@@ -215,7 +215,7 @@ public enum ChatTemplateArgument {
215215///
216216/// This is the main protocol that defines all tokenizer operations, including text processing,
217217/// chat template application, and special token handling.
218- public protocol Tokenizer {
218+ public protocol Tokenizer : Sendable {
219219 /// Tokenizes the input text into a sequence of tokens.
220220 ///
221221 /// - Parameter text: The input text to tokenize
@@ -451,7 +451,7 @@ let specialTokenAttributes: [String] = [
451451/// This class provides a complete tokenizer implementation that can be initialized from
452452/// Hugging Face Hub configuration files and supports all standard tokenization operations
453453/// including chat template application, normalization, pre-tokenization, and post-processing.
454- public class PreTrainedTokenizer : Tokenizer {
454+ public class PreTrainedTokenizer : @ unchecked Sendable , Tokenizer {
455455 let model : TokenizingModel
456456
457457 public var bosToken : String ? { model. bosToken }
@@ -477,6 +477,9 @@ public class PreTrainedTokenizer: Tokenizer {
477477 /// Cache for compiled Jinja templates keyed by their literal template string
478478 private var compiledChatTemplateCache : [ String : Template ] = [ : ]
479479
480+ /// Lock to protect the compiled chat template cache from concurrent access
481+ private let cacheLock = NSLock ( )
482+
480483 /// Initializes a tokenizer from Hugging Face configuration files.
481484 ///
482485 /// - Parameters:
@@ -531,10 +534,26 @@ public class PreTrainedTokenizer: Tokenizer {
531534 }
532535
533536 private func compiledTemplate( for templateString: String ) throws -> Template {
537+ // Fast path: check cache under lock
538+ cacheLock. lock ( )
534539 if let cached = compiledChatTemplateCache [ templateString] {
540+ cacheLock. unlock ( )
535541 return cached
536542 }
543+ cacheLock. unlock ( )
544+
545+ // Compile template outside of lock to avoid holding lock during expensive operation
537546 let compiled = try Template ( templateString)
547+
548+ // Insert into cache under lock (using double-checked locking pattern)
549+ cacheLock. lock ( )
550+ defer { cacheLock. unlock ( ) }
551+
552+ // Check again in case another thread compiled the same template
553+ if let cached = compiledChatTemplateCache [ templateString] {
554+ return cached
555+ }
556+
538557 compiledChatTemplateCache [ templateString] = compiled
539558 return compiled
540559 }
@@ -907,7 +926,7 @@ public extension AutoTokenizer {
907926
908927// MARK: - Tokenizer model classes
909928
910- class T5Tokenizer : UnigramTokenizer { }
929+ class T5Tokenizer : UnigramTokenizer , @ unchecked Sendable { }
911930
912931// MARK: - PreTrainedTokenizer classes
913932
@@ -956,7 +975,7 @@ func maybeUpdatePostProcessor(tokenizerConfig: Config, processorConfig: Config?)
956975}
957976
958977/// See https://github.com/xenova/transformers.js/blob/1a9964fb09b8f54fcbeac46dc6aae8d76795809d/src/tokenizers.js#L3203 for these exceptions
959- class LlamaPreTrainedTokenizer : PreTrainedTokenizer {
978+ class LlamaPreTrainedTokenizer : PreTrainedTokenizer , @ unchecked Sendable {
960979 let isLegacy : Bool
961980
962981 required init ( tokenizerConfig: Config , tokenizerData: Config , strict: Bool = true ) throws {
0 commit comments