huggingface
diff --git a/‎Sources/Generation/CoreML+Extensions.swift‎
Lines changed: 65 additions & 7 deletions b/‎Sources/Generation/CoreML+Extensions.swift‎
Lines changed: 65 additions & 7 deletions
diff --git a/‎Sources/Generation/Generation.swift‎
Lines changed: 49 additions & 3 deletions b/‎Sources/Generation/Generation.swift‎
Lines changed: 49 additions & 3 deletions
diff --git a/‎Sources/Generation/GenerationConfig.swift‎
Lines changed: 49 additions & 1 deletion b/‎Sources/Generation/GenerationConfig.swift‎
Lines changed: 49 additions & 1 deletion
diff --git a/‎Sources/Generation/LogitsWarper/LogitsProcessor.swift‎
Lines changed: 15 additions & 0 deletions b/‎Sources/Generation/LogitsWarper/LogitsProcessor.swift‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎Sources/Generation/LogitsWarper/LogitsWarper.swift‎
Lines changed: 23 additions & 1 deletion b/‎Sources/Generation/LogitsWarper/LogitsWarper.swift‎
Lines changed: 23 additions & 1 deletion
@@ -11,7 +11,15 @@ import CoreML
 import Foundation
 
 extension MLMultiArray {
-    /// All values will be stored in the last dimension of the MLMultiArray (default is dims=1)
+    /// Creates an MLMultiArray from an array of integers.
+    ///
+    /// All values are stored in the last dimension of the MLMultiArray, with leading
+    /// dimensions set to 1. For example, with dims=2, the shape becomes [1, arr.count].
+    ///
+    /// - Parameters:
+    ///   - arr: Array of integers to convert
+    ///   - dims: Number of dimensions for the resulting MLMultiArray
+    /// - Returns: MLMultiArray containing the integer values
     static func from(_ arr: [Int], dims: Int = 1) -> MLMultiArray {
         var shape = Array(repeating: 1, count: dims)
         shape[shape.count - 1] = arr.count
@@ -27,7 +35,15 @@ extension MLMultiArray {
         return o
     }
 
-    /// All values will be stored in the last dimension of the MLMultiArray (default is dims=1)
+    /// Creates an MLMultiArray from an array of doubles.
+    ///
+    /// All values are stored in the last dimension of the MLMultiArray, with leading
+    /// dimensions set to 1. For example, with dims=2, the shape becomes [1, arr.count].
+    ///
+    /// - Parameters:
+    ///   - arr: Array of doubles to convert
+    ///   - dims: Number of dimensions for the resulting MLMultiArray
+    /// - Returns: MLMultiArray containing the double values
     static func from(_ arr: [Double], dims: Int = 1) -> MLMultiArray {
         var shape = Array(repeating: 1, count: dims)
         shape[shape.count - 1] = arr.count
@@ -43,7 +59,13 @@ extension MLMultiArray {
         return o
     }
 
-    /// This will concatenate all dimensions into one one-dim array.
+    /// Converts an MLMultiArray to a flat array of integers.
+    ///
+    /// Concatenates all dimensions into a single one-dimensional array by reading
+    /// the MLMultiArray data in memory order.
+    ///
+    /// - Parameter o: MLMultiArray to convert
+    /// - Returns: Flat array of integer values
     static func toIntArray(_ o: MLMultiArray) -> [Int] {
         var arr = Array(repeating: 0, count: o.count)
         let ptr = UnsafeMutablePointer<Int32>(OpaquePointer(o.dataPointer))
@@ -53,9 +75,18 @@ extension MLMultiArray {
         return arr
     }
 
+    /// Converts this MLMultiArray to a flat array of integers.
+    ///
+    /// - Returns: Flat array of integer values
     func toIntArray() -> [Int] { Self.toIntArray(self) }
 
-    /// This will concatenate all dimensions into one one-dim array.
+    /// Converts an MLMultiArray to a flat array of doubles.
+    ///
+    /// Concatenates all dimensions into a single one-dimensional array by reading
+    /// the MLMultiArray data in memory order.
+    ///
+    /// - Parameter o: MLMultiArray to convert
+    /// - Returns: Flat array of double values
     static func toDoubleArray(_ o: MLMultiArray) -> [Double] {
         var arr: [Double] = Array(repeating: 0, count: o.count)
         let ptr = UnsafeMutablePointer<Double>(OpaquePointer(o.dataPointer))
@@ -65,11 +96,17 @@ extension MLMultiArray {
         return arr
     }
 
+    /// Converts this MLMultiArray to a flat array of doubles.
+    ///
+    /// - Returns: Flat array of double values
     func toDoubleArray() -> [Double] { Self.toDoubleArray(self) }
 
-    /// Helper to construct a sequentially-indexed multi array,
-    /// useful for debugging and unit tests
-    /// Example in 3 dimensions:
+    /// Creates a test MLMultiArray with sequentially indexed values.
+    ///
+    /// Useful for debugging and unit tests. Values are assigned sequentially
+    /// starting from 0, following the memory layout of the specified shape.
+    ///
+    /// Example output for shape [2, 3, 4]:
     /// ```
     /// [[[ 0, 1, 2, 3 ],
     ///   [ 4, 5, 6, 7 ],
@@ -78,6 +115,9 @@ extension MLMultiArray {
     ///   [ 16, 17, 18, 19 ],
     ///   [ 20, 21, 22, 23 ]]]
     /// ```
+    ///
+    /// - Parameter shape: Desired shape of the test tensor
+    /// - Returns: MLMultiArray with sequential values for testing
     static func testTensor(shape: [Int]) -> MLMultiArray {
         let arr = try! MLMultiArray(shape: shape as [NSNumber], dataType: .double)
         let ptr = UnsafeMutablePointer<Double>(OpaquePointer(arr.dataPointer))
@@ -199,6 +239,12 @@ extension MLMultiArray {
 }
 
 extension MLShapedArray<Float> {
+    /// Efficiently extracts float values from the shaped array.
+    ///
+    /// Uses optimized memory copying when possible (stride=1), falling back to
+    /// slower scalar access for non-contiguous arrays.
+    ///
+    /// - Returns: Array of Float values from the shaped array
     var floats: [Float] {
         guard strides.first == 1, strides.count == 1 else {
             // For some reason this path is slow.
@@ -213,6 +259,12 @@ extension MLShapedArray<Float> {
 }
 
 extension MLShapedArraySlice<Float> {
+    /// Efficiently extracts float values from the shaped array slice.
+    ///
+    /// Uses optimized memory copying when possible (stride=1), falling back to
+    /// slower scalar access for non-contiguous slices.
+    ///
+    /// - Returns: Array of Float values from the shaped array slice
     var floats: [Float] {
         guard strides.first == 1, strides.count == 1 else {
             // For some reason this path is slow.
@@ -227,6 +279,12 @@ extension MLShapedArraySlice<Float> {
 }
 
 extension MLMultiArray {
+    /// Efficiently extracts float values from the MLMultiArray if it contains float32 data.
+    ///
+    /// Uses fast memory copying to extract all float values as a contiguous array.
+    /// Returns nil if the array doesn't contain float32 data.
+    ///
+    /// - Returns: Array of Float values, or nil if not float32 type
     var floats: [Float]? {
         guard dataType == .float32 else { return nil }
 
 
@@ -10,28 +10,62 @@ import CoreML
 
 import Tokenizers
 
+/// Supported text generation modes.
 public enum GenerationMode {
+    /// Contrastive search generation mode
     case contrastiveSearch
+    /// Greedy decoding generation mode
     case greedy
+    /// Sampling-based generation mode
     case sample
+    /// Beam search generation mode
     case beam
+    /// Group beam search generation mode
     case groupBeam
+    /// Unsupported generation mode
     case unsupported
 }
 
+/// Array of token IDs representing input tokens.
 public typealias InputTokens = [Int]
+
+/// Array of token IDs representing generated output tokens.
 public typealias GenerationOutput = [Int]
 
-/// A callable (a model, usually), that predicts the next token after a given sequence
+/// A callable model that predicts the next token after a given sequence.
+///
+/// - Parameter tokens: Input token sequence
+/// - Parameter config: Generation configuration
+/// - Returns: Logits array for next token prediction
 public typealias NextTokenModel = (InputTokens, GenerationConfig) -> any MLShapedArrayProtocol
 
+/// Callback for receiving generated tokens during streaming.
 public typealias PredictionTokensCallback = (GenerationOutput) -> Void
+
+/// Callback for receiving generated text during streaming.
 public typealias PredictionStringCallback = (String) -> Void
 
-// TODO: callbacks (for streaming)
+/// Protocol for text generation implementations.
 public protocol Generation {
+    /// Performs greedy search generation.
+    ///
+    /// - Parameters:
+    ///   - config: Generation configuration
+    ///   - tokens: Input token sequence
+    ///   - model: Model for next token prediction
+    ///   - callback: Optional callback for streaming tokens
+    /// - Returns: Generated token sequence
     func greedySearch(config: GenerationConfig, tokens: InputTokens, model: NextTokenModel, callback: PredictionTokensCallback?) async -> GenerationOutput
 
+    /// Generates text from a prompt string.
+    ///
+    /// - Parameters:
+    ///   - config: Generation configuration
+    ///   - prompt: Input prompt text
+    ///   - model: Model for next token prediction
+    ///   - tokenizer: Tokenizer for encoding/decoding
+    ///   - callback: Optional callback for streaming text
+    /// - Returns: Generated text string
     func generate(config: GenerationConfig, prompt: String, model: NextTokenModel, tokenizer: Tokenizer, callback: PredictionStringCallback?) async -> String
 }
 
@@ -50,7 +84,19 @@ public extension Generation {
         return outputTokens
     }
 
-    /// https://github.com/huggingface/transformers/blob/42017d82baa083da2bee3055fdac80c81ee97b8a/src/transformers/generation/utils.py#L1552
+    /// Performs sampling-based text generation with configurable logits warping.
+    ///
+    /// Uses various logits warpers (temperature, top-k, top-p, repetition penalty) to modify
+    /// token probabilities before sampling, enabling diverse and controllable text generation.
+    ///
+    /// - Parameters:
+    ///   - config: Generation configuration with sampling parameters
+    ///   - tokens: Input token sequence
+    ///   - model: Model for next token prediction
+    ///   - callback: Optional callback for streaming tokens
+    /// - Returns: Generated token sequence
+    ///
+    /// - Note: Based on https://github.com/huggingface/transformers/blob/42017d82baa083da2bee3055fdac80c81ee97b8a/src/transformers/generation/utils.py#L1552
     func sample(config: GenerationConfig, tokens: InputTokens, model: NextTokenModel, callback: PredictionTokensCallback? = nil) async -> GenerationOutput {
         // Iterate until we find the eos token or reach the max length
         // TODO: additional stopping criteria
 
@@ -7,23 +7,65 @@
 
 import Foundation
 
-/// Essentials taken from https://github.com/huggingface/transformers/blob/main/src/transformers/generation/configuration_utils.py
+/// Configuration parameters for text generation algorithms.
+///
+/// Contains all the parameters needed to control various aspects of text generation,
+/// including sampling parameters, beam search settings, and special token IDs.
+///
+/// - Note: Based on https://github.com/huggingface/transformers/blob/main/src/transformers/generation/configuration_utils.py
 public struct GenerationConfig {
+    /// Maximum total length of the generated sequence (input + output tokens).
     public var maxLength = 20
+
+    /// Maximum number of new tokens to generate.
     public var maxNewTokens: Int
+
+    /// Whether to use sampling instead of deterministic decoding.
     public var doSample = false
+
+    /// Number of beams for beam search (1 for greedy decoding).
     public var numBeams = 1
+
+    /// Number of beam groups for group beam search.
     public var numBeamGroups = 1
+
+    /// Penalty parameter for contrastive search.
     public var penaltyAlpha: Double?
+
+    /// Temperature for sampling (higher values increase randomness).
     public var temperature = 1.0
+
+    /// Number of top tokens to consider for top-k sampling.
     public var topK = 50
+
+    /// Cumulative probability threshold for top-p sampling.
     public var topP = 1.0
+
+    /// Penalty for token repetition (1.0 means no penalty).
     public var repetitionPenalty = 1.0
 
+    /// Token ID used for padding sequences.
     public var padTokenId: Int?
+
+    /// Token ID for beginning of sequence.
     public var bosTokenId: Int?
+
+    /// Token ID for end of sequence.
     public var eosTokenId: Int?
 
+    /// Creates a new generation configuration.
+    ///
+    /// - Parameters:
+    ///   - maxLength: Maximum total sequence length
+    ///   - maxNewTokens: Maximum new tokens to generate
+    ///   - doSample: Enable sampling instead of greedy decoding
+    ///   - numBeams: Number of beams for beam search
+    ///   - numBeamGroups: Number of beam groups for group beam search
+    ///   - penaltyAlpha: Penalty parameter for contrastive search
+    ///   - temperature: Sampling temperature
+    ///   - topK: Top-k sampling parameter
+    ///   - topP: Top-p sampling parameter
+    ///   - repetitionPenalty: Repetition penalty factor
     public init(maxLength: Int = 20, maxNewTokens: Int, doSample: Bool = false, numBeams: Int = 1, numBeamGroups: Int = 1, penaltyAlpha: Double? = nil, temperature: Double = 1.0, topK: Int = 50, topP: Double = 1.0, repetitionPenalty: Double = 1.0) {
         self.maxLength = maxLength
         self.maxNewTokens = maxNewTokens
@@ -39,6 +81,12 @@ public struct GenerationConfig {
 }
 
 public extension GenerationConfig {
+    /// Determines the appropriate generation mode based on configuration parameters.
+    ///
+    /// Analyzes the combination of sampling settings, beam parameters, and penalty values
+    /// to automatically select the most appropriate generation algorithm.
+    ///
+    /// - Returns: The determined generation mode
     var generationMode: GenerationMode {
         // Exclude this case from the pattern matching below
         if topK > 1, !doSample, penaltyAlpha != nil, penaltyAlpha! > 0 {
 
@@ -1,12 +1,27 @@
 import Foundation
 
+/// Processes logits by applying a sequence of logits warpers.
+///
+/// Coordinates the application of multiple logits warpers in sequence,
+/// allowing for complex probability transformations during text generation.
 public struct LogitsProcessor {
+    /// Array of logits warpers to apply in sequence.
     public var logitsWarpers: [any LogitsWarper]
 
+    /// Creates a new logits processor.
+    ///
+    /// - Parameter logitsWarpers: Array of warpers to apply in sequence
     public init(logitsWarpers: [any LogitsWarper]) {
         self.logitsWarpers = logitsWarpers
     }
 
+    /// Processes logits by applying all warpers in sequence.
+    ///
+    /// Each warper is applied to the output of the previous warper, allowing
+    /// for complex chaining of probability transformations.
+    ///
+    /// - Parameter arr: Input logits array
+    /// - Returns: Tuple of processed (indices, logits)
     public func callAsFunction(_ arr: [Float]) -> (indices: [Int], logits: [Float]) {
         var indices = Array(arr.indices)
         var logits = arr
 
@@ -1,12 +1,34 @@
 import Foundation
 
-/// Protocol for all logit warpers that can be applied during generation
+/// Protocol for logits warpers that transform token probabilities during generation.
+///
+/// Logits warpers modify the probability distribution over tokens before sampling,
+/// enabling techniques like temperature scaling, top-k/top-p filtering, and repetition penalties.
 public protocol LogitsWarper {
+    /// Warps the logits and corresponding indices.
+    ///
+    /// - Parameters:
+    ///   - indices: Array of token indices corresponding to the logits
+    ///   - logits: Array of logit values to transform
+    /// - Returns: Tuple of transformed (indices, logits)
     func warp(indices: [Int], logits: [Float]) -> (indices: [Int], logits: [Float])
+
+    /// Convenience method that calls the warp function.
+    ///
+    /// - Parameters:
+    ///   - indices: Array of token indices
+    ///   - logits: Array of logit values
+    /// - Returns: Tuple of transformed (indices, logits)
     func callAsFunction(_ indices: [Int], _ logits: [Float]) -> (indices: [Int], logits: [Float])
 }
 
 public extension LogitsWarper {
+    /// Default implementation of callAsFunction that delegates to warp.
+    ///
+    /// - Parameters:
+    ///   - indices: Array of token indices
+    ///   - logits: Array of logit values
+    /// - Returns: Tuple of transformed (indices, logits)
     func callAsFunction(_ indices: [Int], _ logits: [Float]) -> (indices: [Int], logits: [Float]) {
         warp(indices: indices, logits: logits)
     }