managedcode
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎README.md‎
Lines changed: 41 additions & 3 deletions b/‎README.md‎
Lines changed: 41 additions & 3 deletions
diff --git a/‎TESTING.md‎
Lines changed: 0 additions & 38 deletions b/‎TESTING.md‎
Lines changed: 0 additions & 38 deletions
diff --git a/‎native/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎native/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎native/include/mlxsharp/api.h‎
Lines changed: 27 additions & 0 deletions b/‎native/include/mlxsharp/api.h‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎native/include/mlxsharp/llm_model_runner.h‎
Lines changed: 40 additions & 0 deletions b/‎native/include/mlxsharp/llm_model_runner.h‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎native/src/llm_model_runner.cpp‎
Lines changed: 50 additions & 0 deletions b/‎native/src/llm_model_runner.cpp‎
Lines changed: 50 additions & 0 deletions
diff --git a/‎native/src/mlxsharp.cpp‎
Lines changed: 100 additions & 0 deletions b/‎native/src/mlxsharp.cpp‎
Lines changed: 100 additions & 0 deletions
diff --git a/‎src/MLXSharp.Tests/ArraySmokeTests.cs‎
Lines changed: 1 addition & 0 deletions b/‎src/MLXSharp.Tests/ArraySmokeTests.cs‎
Lines changed: 1 addition & 0 deletions
@@ -116,8 +116,8 @@ jobs:
         with:
           python-version: '3.11'
 
-      - name: Install HuggingFace CLI
-        run: pip install huggingface_hub
+      - name: Install Python dependencies
+        run: python -m pip install huggingface_hub mlx-lm
 
       - name: Download test model from HuggingFace
         run: |
 
@@ -93,6 +93,7 @@ src/MLXSharp.Tests/          # Integration tests
    ```bash
    dotnet pack src/MLXSharp/MLXSharp.csproj \
        -p:MLXSharpMacNativeBinary=$PWD/native/build/macos/libmlxsharp.dylib \
+       -p:MLXSharpMacMetallibBinary=$PWD/native/build/macos/extern/mlx/mlx/backend/metal/kernels/mlx.metallib \
        -p:MLXSharpLinuxNativeBinary=$PWD/native/build/linux/libmlxsharp.so
    ```
 
@@ -104,13 +105,50 @@ The CMake project vendored from MLX builds MLX and the shim in one go. macOS bui
 3. `package-test` (macOS): downloads both native artifacts, stages them into `src/MLXSharp/runtimes/{rid}/native`, rebuilds, runs the integration tests, and produces NuGet packages.
 
 ## Testing
-Tests require a local MLX model bundle. Point `MLXSHARP_MODEL_PATH` to the directory before running:
+The managed integration tests still piggy-back on `mlx_lm` until the native runner is feature-complete. Bring your own HuggingFace bundle (any MLX-compatible repo) and point `MLXSHARP_MODEL_PATH` to it before running:
 
 ```bash
-export MLXSHARP_MODEL_PATH=$PWD/models/Qwen1.5-0.5B-Chat-4bit
-huggingface-cli download mlx-community/Qwen1.5-0.5B-Chat-4bit --local-dir "$MLXSHARP_MODEL_PATH"
+export MLXSHARP_HF_MODEL_ID=<your-mlx-model>
+export MLXSHARP_MODEL_PATH=$PWD/models/<your-mlx-model>
+huggingface-cli download "$MLXSHARP_HF_MODEL_ID" --local-dir "$MLXSHARP_MODEL_PATH"
+python -m pip install mlx-lm
 dotnet test
 ```
 
+`MLXSHARP_HF_MODEL_ID` is picked up by the Python smoke test; omit it to fall back to `mlx-community/Qwen1.5-0.5B-Chat-4bit`.
+
+When running locally you can place prebuilt binaries under `libs/native-osx-arm64` (and/or `libs/native-libs`) and a corresponding model bundle under `model/`. The test harness auto-discovers these folders and configures `MLXSHARP_LIBRARY`, `MLXSHARP_MODEL_PATH`, and `MLXSHARP_TOKENIZER_PATH` so you can iterate completely offline.
+
+The integration suite invokes `python -m mlx_lm.generate` with deterministic settings (temperature `0`, seed `42`) and asserts that the generated response for prompts like “Скільки буде 2+2?” contains the correct answer. Test output includes the raw generation transcript so you can verify the model behaviour directly from the CI logs.
+
+### Native pipeline (experimental)
+
+Work is in progress to move inference fully into the native MLX backend. The current build exposes new configuration knobs via `MlxClientOptions`:
+
+| Option | Description |
+| --- | --- |
+| `EnableNativeModelRunner` | Turns on the experimental native transformer pipeline. Still returns “not implemented” until the native side is completed. |
+| `NativeModelDirectory` | Directory containing `config.json`, `*.safetensors`, etc. |
+| `TokenizerPath` | Path to the HuggingFace `tokenizer.json` (loaded with `Microsoft.ML.Tokenizers`). |
+| `MaxGeneratedTokens`, `Temperature`, `TopP`, `TopK` | Generation parameters that will flow into the native pipeline. |
+
+When the C++ implementation catches up you’ll be able to set the environment variables below and exercise the path end-to-end:
+
+```bash
+export MLXSHARP_TOKENIZER_PATH=$PWD/models/<model-name>/tokenizer.json
+export MLXSHARP_MODEL_PATH=$PWD/models/<model-name>
+```
+
+Until then, `EnableNativeModelRunner` should stay `false` to avoid runtime errors from the stub implementation.
+
+### MSBuild properties
+
+| Property | Purpose |
+| --- | --- |
+| `MLXSharpMacNativeBinary` | Path to `libmlxsharp.dylib` that gets packaged into the NuGet runtime folder. |
+| `MLXSharpMacMetallibBinary` | Path to the matching `mlx.metallib` that ships next to the dylib. |
+| `MLXSharpLinuxNativeBinary` | Path to the Linux shared object (`libmlxsharp.so`). |
+| `MLXSharpSkipMacNativeValidation` / `MLXSharpSkipLinuxNativeValidation` | Opt-out flags for validation logic when you intentionally omit platform binaries. |
+
 ## Versioning & platform support
 This initial release is focused on macOS developers who want MLX inside .NET applications. Linux binaries are produced to keep NuGet packages complete, and Windows support is not yet available.
@@ -17,6 +17,7 @@ add_subdirectory(${CMAKE_SOURCE_DIR}/../extern/mlx ${CMAKE_BINARY_DIR}/extern/ml
 
 add_library(mlxsharp SHARED
     src/mlxsharp.cpp
+    src/llm_model_runner.cpp
 )
 
 target_link_libraries(mlxsharp PRIVATE mlx)
 
@@ -137,6 +137,18 @@ typedef struct mlx_usage {
     int output_tokens;
 } mlx_usage;
 
+typedef struct mlxsharp_generation_options {
+    int max_tokens;
+    float temperature;
+    float top_p;
+    int top_k;
+} mlxsharp_generation_options;
+
+typedef struct mlxsharp_token_buffer {
+    int32_t* tokens;
+    size_t length;
+} mlxsharp_token_buffer;
+
 int mlxsharp_create_session(
     const char* chat_model_id,
     const char* embedding_model_id,
@@ -171,6 +183,21 @@ void mlxsharp_free_buffer(unsigned char* buffer);
 
 void mlxsharp_release_session(void* session);
 
+int mlxsharp_session_load_model(
+    void* session,
+    const char* model_directory,
+    const char* tokenizer_path);
+
+int mlxsharp_session_generate_tokens(
+    void* session,
+    const int32_t* prompt_tokens,
+    size_t prompt_token_count,
+    const mlxsharp_generation_options* options,
+    mlxsharp_token_buffer* output_tokens,
+    mlx_usage* usage);
+
+void mlxsharp_release_tokens(mlxsharp_token_buffer* buffer);
+
 #ifdef __cplusplus
 }
 #endif
@@ -0,0 +1,40 @@
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace mlxsharp::llm {
+
+struct GenerationOptions {
+    int max_tokens;
+    float temperature;
+    float top_p;
+    int top_k;
+};
+
+class ModelRunner {
+public:
+    static std::unique_ptr<ModelRunner> Create(const std::string& model_directory, const std::string& tokenizer_path);
+
+    ~ModelRunner();
+
+    ModelRunner(const ModelRunner&) = delete;
+    ModelRunner& operator=(const ModelRunner&) = delete;
+    ModelRunner(ModelRunner&&) noexcept = default;
+    ModelRunner& operator=(ModelRunner&&) noexcept = default;
+
+    const std::string& model_directory() const noexcept { return model_directory_; }
+    const std::string& tokenizer_path() const noexcept { return tokenizer_path_; }
+
+    std::vector<int32_t> Generate(const std::vector<int32_t>& prompt_tokens, const GenerationOptions& options);
+
+private:
+    ModelRunner(std::string model_directory, std::string tokenizer_path);
+
+    std::string model_directory_;
+    std::string tokenizer_path_;
+};
+
+} // namespace mlxsharp::llm
@@ -0,0 +1,50 @@
+#include "mlxsharp/llm_model_runner.h"
+
+#include <filesystem>
+#include <stdexcept>
+
+namespace fs = std::filesystem;
+
+namespace mlxsharp::llm {
+
+std::unique_ptr<ModelRunner> ModelRunner::Create(const std::string& model_directory, const std::string& tokenizer_path)
+{
+    if (model_directory.empty())
+    {
+        throw std::invalid_argument("Model directory cannot be empty.");
+    }
+
+    if (tokenizer_path.empty())
+    {
+        throw std::invalid_argument("Tokenizer path cannot be empty.");
+    }
+
+    const fs::path model_dir(model_directory);
+    if (!fs::exists(model_dir) || !fs::is_directory(model_dir))
+    {
+        throw std::invalid_argument("Model directory does not exist: " + model_directory);
+    }
+
+    const fs::path tokenizer_file(tokenizer_path);
+    if (!fs::exists(tokenizer_file) || !fs::is_regular_file(tokenizer_file))
+    {
+        throw std::invalid_argument("Tokenizer file does not exist: " + tokenizer_path);
+    }
+
+    return std::unique_ptr<ModelRunner>(new ModelRunner(model_directory, tokenizer_path));
+}
+
+ModelRunner::ModelRunner(std::string model_directory, std::string tokenizer_path)
+    : model_directory_(std::move(model_directory)),
+      tokenizer_path_(std::move(tokenizer_path))
+{
+}
+
+ModelRunner::~ModelRunner() = default;
+
+std::vector<int32_t> ModelRunner::Generate(const std::vector<int32_t>& /*prompt_tokens*/, const GenerationOptions& /*options*/)
+{
+    throw std::runtime_error("Native transformer generation is not implemented yet.");
+}
+
+} // namespace mlxsharp::llm
@@ -1,4 +1,5 @@
 #include "mlxsharp/api.h"
+#include "mlxsharp/llm_model_runner.h"
 
 #include <algorithm>
 #include <atomic>
@@ -43,6 +44,7 @@ struct mlxsharp_session {
     std::string chat_model;
     std::string embedding_model;
     std::string image_model;
+    std::unique_ptr<mlxsharp::llm::ModelRunner> model_runner;
 
     mlxsharp_session(mlxsharp_context_t* ctx, std::string chat, std::string embed, std::string image)
         : context(ctx),
@@ -563,6 +565,104 @@ void mlxsharp_free_buffer(unsigned char* data) {
     std::free(data);
 }
 
+int mlxsharp_session_load_model(
+    void* session_ptr,
+    const char* model_directory,
+    const char* tokenizer_path) {
+    if (session_ptr == nullptr) {
+        return set_error(MLXSHARP_STATUS_INVALID_ARGUMENT, "Session pointer is null.");
+    }
+
+    if (model_directory == nullptr || tokenizer_path == nullptr) {
+        return set_error(MLXSHARP_STATUS_INVALID_ARGUMENT, "Model directory or tokenizer path is null.");
+    }
+
+    auto* session = static_cast<mlxsharp_session_t*>(session_ptr);
+
+    return invoke([&]() -> int {
+        auto model = mlxsharp::llm::ModelRunner::Create(model_directory, tokenizer_path);
+        session->model_runner = std::move(model);
+        return MLXSHARP_STATUS_SUCCESS;
+    });
+}
+
+int mlxsharp_session_generate_tokens(
+    void* session_ptr,
+    const int32_t* prompt_tokens,
+    size_t prompt_token_count,
+    const mlxsharp_generation_options* options,
+    mlxsharp_token_buffer* output_tokens,
+    mlx_usage* usage) {
+    if (session_ptr == nullptr) {
+        return set_error(MLXSHARP_STATUS_INVALID_ARGUMENT, "Session pointer is null.");
+    }
+
+    if (output_tokens == nullptr) {
+        return set_error(MLXSHARP_STATUS_INVALID_ARGUMENT, kNullOutParameter);
+    }
+
+    output_tokens->tokens = nullptr;
+    output_tokens->length = 0;
+
+    auto* session = static_cast<mlxsharp_session_t*>(session_ptr);
+
+    if (session->model_runner == nullptr) {
+        return set_error(MLXSHARP_STATUS_INVALID_ARGUMENT, "Model is not loaded. Call mlxsharp_session_load_model first.");
+    }
+
+    if (prompt_token_count > 0 && prompt_tokens == nullptr) {
+        return set_error(MLXSHARP_STATUS_INVALID_ARGUMENT, "Prompt tokens pointer is null.");
+    }
+
+    if (options == nullptr) {
+        return set_error(MLXSHARP_STATUS_INVALID_ARGUMENT, "Generation options pointer is null.");
+    }
+
+    return invoke([&]() -> int {
+        std::vector<int32_t> prompt;
+        prompt.reserve(prompt_token_count);
+        for (size_t i = 0; i < prompt_token_count; ++i) {
+            prompt.push_back(prompt_tokens[i]);
+        }
+
+        mlxsharp::llm::GenerationOptions native_options{
+            options->max_tokens,
+            options->temperature,
+            options->top_p,
+            options->top_k,
+        };
+
+        auto generated = session->model_runner->Generate(prompt, native_options);
+        output_tokens->length = generated.size();
+
+        if (generated.empty()) {
+            assign_usage(usage, static_cast<int>(prompt_token_count), 0);
+            return MLXSHARP_STATUS_SUCCESS;
+        }
+
+        auto* buffer = static_cast<int32_t*>(std::malloc(generated.size() * sizeof(int32_t)));
+        if (buffer == nullptr) {
+            return set_error(MLXSHARP_STATUS_OUT_OF_MEMORY, "Failed to allocate output token buffer.");
+        }
+
+        std::memcpy(buffer, generated.data(), generated.size() * sizeof(int32_t));
+        output_tokens->tokens = buffer;
+
+        assign_usage(usage, static_cast<int>(prompt_token_count), static_cast<int>(generated.size()));
+        return MLXSHARP_STATUS_SUCCESS;
+    });
+}
+
+void mlxsharp_release_tokens(mlxsharp_token_buffer* buffer) {
+    if (buffer == nullptr || buffer->tokens == nullptr) {
+        return;
+    }
+
+    std::free(buffer->tokens);
+    buffer->tokens = nullptr;
+    buffer->length = 0;
+}
+
 void mlxsharp_release_session(void* session_ptr) {
     if (session_ptr == nullptr) {
         return;
 
@@ -44,6 +44,7 @@ internal sealed class RequiresNativeLibraryFactAttribute : FactAttribute
 {
     public RequiresNativeLibraryFactAttribute()
     {
+        TestEnvironment.EnsureInitialized();
         if (!NativeLibraryLocator.TryEnsure(out var skipReason))
         {
             Skip = skipReason ?? "Native MLX library is not available.";
Original file line number	Diff line number	Diff line change
`@@ -17,6 +17,7 @@ add_subdirectory(${CMAKE_SOURCE_DIR}/../extern/mlx ${CMAKE_BINARY_DIR}/extern/ml`
`17`	`17`
`18`	`18`	`add_library(mlxsharp SHARED`
`19`	`19`	`src/mlxsharp.cpp`
	`20`	`+ src/llm_model_runner.cpp`
`20`	`21`	`)`
`21`	`22`
`22`	`23`	`target_link_libraries(mlxsharp PRIVATE mlx)`
Original file line number	Diff line number	Diff line change
`@@ -44,6 +44,7 @@ internal sealed class RequiresNativeLibraryFactAttribute : FactAttribute`
`44`	`44`	`{`
`45`	`45`	`public RequiresNativeLibraryFactAttribute()`
`46`	`46`	`{`
	`47`	`+ TestEnvironment.EnsureInitialized();`
`47`	`48`	`if (!NativeLibraryLocator.TryEnsure(out var skipReason))`
`48`	`49`	`{`
`49`	`50`	`Skip = skipReason ?? "Native MLX library is not available.";`