lets callers inject a prebuilt Tokenizer in the LanguageModel (#278)

kashif · pcuenca · web-flow · commit 9dad29e0209e · 2025-10-27T15:12:45.000+01:00
* lets callers inject a prebuilt Tokenizer

* add a tokenizerFolder argument

* Update Examples/transformers-cli/Sources/transformers-cli/Transformers.swift

Co-authored-by: Pedro Cuenca &lt;pedro@huggingface.co&gt;

* Update README.md

Co-authored-by: Pedro Cuenca &lt;pedro@huggingface.co&gt;

* Update Sources/Models/LanguageModel.swift

Co-authored-by: Pedro Cuenca &lt;pedro@huggingface.co&gt;

* Update Sources/Models/LanguageModel.swift

Co-authored-by: Pedro Cuenca &lt;pedro@huggingface.co&gt;

* Update README.md

Co-authored-by: Pedro Cuenca &lt;pedro@huggingface.co&gt;

* removed configuration

* add tokenizerFromLocalFolder test

* add fixtures

* simplified test

* fix names

* Update Sources/Models/LanguageModel.swift

Co-authored-by: Pedro Cuenca &lt;pedro@huggingface.co&gt;

---------

Co-authored-by: Pedro Cuenca &lt;pedro@huggingface.co&gt;
diff --git a/Examples/transformers-cli/Sources/transformers-cli/Transformers.swift b/Examples/transformers-cli/Sources/transformers-cli/Transformers.swift
@@ -49,6 +49,9 @@ struct TransformersCLI: AsyncParsableCommand {
     @Option(help: "Repetition penalty to discourage repeating tokens (typical: 1.0-2.0, 1.0 = no penalty)")
     var repetitionPenalty: Float?
 
+    @Option(help: "Path to a local folder containing tokenizer_config.json and tokenizer.json")
+    var tokenizerPath: String?
+
     func generate(
         model: LanguageModel,
         config: GenerationConfig,
@@ -104,7 +107,17 @@ struct TransformersCLI: AsyncParsableCommand {
         let url = URL(filePath: modelPath)
         let compiledURL = try compile(at: url)
         print("Loading model \(compiledURL)")
-        let model = try LanguageModel.loadCompiled(url: compiledURL, computeUnits: computeUnits.asMLComputeUnits)
+        let model: LanguageModel
+        if let tokenizerFolder {
+            let tokenizerURL = URL(filePath: tokenizerFolder, directoryHint: .isDirectory)
+            model = try LanguageModel.loadCompiled(
+                url: compiledURL,
+                tokenizerFolder: tokenizerURL,
+                computeUnits: computeUnits.asMLComputeUnits
+            )
+        } else {
+            model = try LanguageModel.loadCompiled(url: compiledURL, computeUnits: computeUnits.asMLComputeUnits)
+        }
 
         var config = model.defaultGenerationConfig
         config.doSample = doSample
diff --git a/README.md b/README.md
@@ -88,6 +88,36 @@ example converting and running Mistral 7B using CoreML [here](https://github.com
 
 The [modernization of Core ML](https://github.com/huggingface/swift-transformers/pull/257) and corresponding examples were primarily contributed by @joshnewnham, @1duo, @alejandro-isaza, @aseemw. Thank you 🙏
 
+### Offline CoreML tokenizers
+
+When you bundle a compiled CoreML model and tokenizer files with your app, you can skip any network requests by injecting
+the tokenizer when constructing `LanguageModel`:
+
+```swift
+let compiledURL: URL = ... // path to .mlmodelc
+let tokenizerFolder: URL = ... // folder containing tokenizer_config.json and tokenizer.json
+
+// Construct the tokenizer from local files (inside an async context)
+let tokenizer = try await AutoTokenizer.from(modelFolder: tokenizerFolder)
+let model = try LanguageModel.loadCompiled(
+    url: compiledURL,
+    tokenizer: tokenizer
+)
+```
+
+Make sure the tokenizer assets come from the same Hugging Face repo as the original checkpoint or are compatible with the model you use. For the
+Mistral example in `Examples/Mistral7B/`, you can fetch the tokenizer like this:
+
+```bash
+huggingface-cli download \
+  mistralai/Mistral-7B-Instruct-v0.3 \
+  tokenizer.json tokenizer_config.json \
+  --local-dir Examples/Mistral7B/local-tokenizer
+```
+
+If the repo is gated, authenticate with `huggingface-cli login` first. Both initializers reuse the tokenizer
+you pass in and never reach out to the Hugging Face Hub.
+
 ## Usage via SwiftPM
 
 To use `swift-transformers` with SwiftPM, you can add this to your `Package.swift`:
@@ -139,5 +169,3 @@ To format your code, run `swift format -i --recursive .`.
 ## License
 
 [Apache 2](LICENSE).
-
-
diff --git a/Sources/Models/LanguageModel.swift b/Sources/Models/LanguageModel.swift
@@ -33,12 +33,26 @@ public class LanguageModel {
 
     /// Creates a new language model instance from a CoreML model.
     ///
-    /// - Parameter model: The CoreML model to wrap
+    /// - Parameters:
+    ///   - model: The CoreML model to wrap
+    ///   - tokenizer: Optional preconstructed tokenizer to reuse
     /// - Important: Triggers a fatal error if the model doesn't have the expected input shape information
-    public required init(model: MLModel) {
+    public required init(
+        model: MLModel,
+        tokenizer: Tokenizer? = nil
+    ) {
         self.model = model
+        _tokenizer = tokenizer
         (minContextLength, maxContextLength) = Self.contextRange(from: model)
-        configuration = LanguageModelConfigurationFromHub(modelName: modelName)
+        if tokenizer == nil {
+            self.configuration = LanguageModelConfigurationFromHub(modelName: modelName)
+        } else {
+            self.configuration = nil
+        }
+    }
+
+    public convenience required init(model: MLModel) {
+        self.init(model: model, tokenizer: nil)
     }
 
     public func resetState() async {}
@@ -142,15 +156,28 @@ public extension LanguageModel {
     /// - Parameters:
     ///   - url: The URL of the compiled CoreML model file (.mlmodelc)
     ///   - computeUnits: The compute units to use for model inference
+    ///   - tokenizer: Optional tokenizer instance to reuse
     /// - Returns: A configured `LanguageModel` instance
     /// - Throws: An error if the model cannot be loaded from the specified URL
-    static func loadCompiled(url: URL, computeUnits: MLComputeUnits = .cpuAndGPU) throws -> LanguageModel {
+    static func loadCompiled(
+        url: URL,
+        computeUnits: MLComputeUnits = .cpuAndGPU,
+        tokenizer: Tokenizer? = nil
+    ) throws -> LanguageModel {
         let config = MLModelConfiguration()
         config.computeUnits = computeUnits
         let model = try MLModel(contentsOf: url, configuration: config)
         return switch kvCacheAvailability(for: model) {
-        case .statefulKVCache: LanguageModelWithStatefulKVCache(model: model)
-        default: LanguageModel(model: model)
+        case .statefulKVCache:
+            LanguageModelWithStatefulKVCache(
+                model: model,
+                tokenizer: tokenizer
+            )
+        default:
+            LanguageModel(
+                model: model,
+                tokenizer: tokenizer
+            )
         }
     }
 }
@@ -304,7 +331,8 @@ public extension LanguageModel {
     /// - Throws: An error if the configuration cannot be loaded
     var modelConfig: Config? {
         get async throws {
-            try await configuration!.modelConfig
+            guard let configuration else { return nil }
+            return try await configuration.modelConfig
         }
     }
 
@@ -314,7 +342,8 @@ public extension LanguageModel {
     /// - Throws: An error if the configuration cannot be loaded
     var tokenizerConfig: Config? {
         get async throws {
-            try await configuration!.tokenizerConfig
+            guard let configuration else { return nil }
+            return try await configuration.tokenizerConfig
         }
     }
 
@@ -324,7 +353,10 @@ public extension LanguageModel {
     /// - Throws: An error if the tokenizer data cannot be loaded
     var tokenizerData: Config {
         get async throws {
-            try await configuration!.tokenizerData
+            guard let configuration else {
+                throw TokenizerError.missingConfig
+            }
+            return try await configuration.tokenizerData
         }
     }
 
@@ -459,8 +491,11 @@ public class LanguageModelWithStatefulKVCache: LanguageModel {
 
     var state: MLState?
 
-    public required init(model: MLModel) {
-        super.init(model: model)
+    public required init(
+        model: MLModel,
+        tokenizer: Tokenizer? = nil
+    ) {
+        super.init(model: model, tokenizer: tokenizer)
         // To support pre-filling and extend, the input must support
         // flexible shapes.
         guard maxContextLength - minContextLength > 1 else {
@@ -531,11 +566,15 @@ public class LanguageModelWithStatefulKVCache: LanguageModel {
 public enum TokenizerError: LocalizedError {
     /// The tokenizer configuration file could not be found.
     case tokenizerConfigNotFound
+    /// The language model configuration required to load tokenizer data is missing.
+    case missingConfig
 
     public var errorDescription: String? {
         switch self {
         case .tokenizerConfigNotFound:
             String(localized: "Tokenizer configuration could not be found. The model may be missing required tokenizer files.", comment: "Error when tokenizer configuration is missing")
+        case .missingConfig:
+            String(localized: "Language model configuration was not set, tokenizer assets could not be loaded.", comment: "Error when configuration needed for tokenizer data is missing")
         }
     }
 }
diff --git a/Tests/TokenizersTests/Resources/tokenizer.json b/Tests/TokenizersTests/Resources/tokenizer.json
@@ -0,0 +1,51 @@
+{
+  "version": "1.0",
+  "truncation": null,
+  "padding": null,
+  "added_tokens": [
+    {
+      "id": 0,
+      "content": "<bos>"
+    },
+    {
+      "id": 1,
+      "content": "<pad>"
+    },
+    {
+      "id": 2,
+      "content": "<eos>"
+    },
+    {
+      "id": 3,
+      "content": "<unk>"
+    }
+  ],
+  "model": {
+    "type": "BPE",
+    "vocab": {
+      "<bos>": 0,
+      "<pad>": 1,
+      "<eos>": 2,
+      "<unk>": 3,
+      "offline": 4,
+      "path": 5,
+      "_": 6
+    },
+    "merges": [
+      "off line",
+      "li ne",
+      "pa th",
+      "_ of",
+      "_ pa"
+    ],
+    "continuing_subword_prefix": "",
+    "end_of_word_suffix": "",
+    "unk_token": "<unk>"
+  },
+  "normalizer": {
+    "type": "Lowercase"
+  },
+  "pre_tokenizer": {
+    "type": "Whitespace"
+  }
+}
diff --git a/Tests/TokenizersTests/Resources/tokenizer_config.json b/Tests/TokenizersTests/Resources/tokenizer_config.json
@@ -0,0 +1,9 @@
+{
+  "tokenizer_class": "GPT2Tokenizer",
+  "bos_token": "<bos>",
+  "eos_token": "<eos>",
+  "unk_token": "<unk>",
+  "pad_token": "<pad>",
+  "model_max_length": 128,
+  "do_lower_case": false
+}
diff --git a/Tests/TokenizersTests/TokenizerTests.swift b/Tests/TokenizersTests/TokenizerTests.swift
@@ -177,6 +177,39 @@ struct TokenizerTests {
         #expect(tokenizer.encode(text: "<|im_start|>user<|im_sep|>Who are you?<|im_end|><|im_start|>assistant<|im_sep|>") == [100264, 882, 100266, 15546, 527, 499, 30, 100265, 100264, 78191, 100266])
     }
 
+    @Test
+    func tokenizerFromLocalFolder() async throws {
+        let bundle = Bundle.module
+        guard
+            let tokenizerConfigURL = bundle.url(
+                forResource: "tokenizer_config",
+                withExtension: "json"
+            ),
+            bundle.url(
+                forResource: "tokenizer",
+                withExtension: "json"
+            ) != nil
+        else {
+            Issue.record("Missing offline tokenizer fixtures")
+            return
+        }
+
+        let configuration = LanguageModelConfigurationFromHub(modelFolder: tokenizerConfigURL.deletingLastPathComponent())
+
+        let tokenizerConfigOpt = try await configuration.tokenizerConfig
+        #expect(tokenizerConfigOpt != nil)
+        let tokenizerConfig = tokenizerConfigOpt!
+        let tokenizerData = try await configuration.tokenizerData
+
+        let tokenizer = try AutoTokenizer.from(
+            tokenizerConfig: tokenizerConfig,
+            tokenizerData: tokenizerData
+        )
+
+        let encoded = tokenizer.encode(text: "offline path")
+        #expect(!encoded.isEmpty)
+    }
+
     /// https://github.com/huggingface/swift-transformers/issues/96
     @Test
     func legacyLlamaBehaviour() async throws {