Skip to content

Commit ae516e0

Browse files
committed
updates
1 parent 854772b commit ae516e0

File tree

17 files changed

+796
-56
lines changed

17 files changed

+796
-56
lines changed

.github/workflows/ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,8 @@ jobs:
116116
with:
117117
python-version: '3.11'
118118

119-
- name: Install HuggingFace CLI
120-
run: pip install huggingface_hub
119+
- name: Install Python dependencies
120+
run: python -m pip install huggingface_hub mlx-lm
121121

122122
- name: Download test model from HuggingFace
123123
run: |

README.md

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ src/MLXSharp.Tests/ # Integration tests
9393
```bash
9494
dotnet pack src/MLXSharp/MLXSharp.csproj \
9595
-p:MLXSharpMacNativeBinary=$PWD/native/build/macos/libmlxsharp.dylib \
96+
-p:MLXSharpMacMetallibBinary=$PWD/native/build/macos/extern/mlx/mlx/backend/metal/kernels/mlx.metallib \
9697
-p:MLXSharpLinuxNativeBinary=$PWD/native/build/linux/libmlxsharp.so
9798
```
9899

@@ -104,13 +105,50 @@ The CMake project vendored from MLX builds MLX and the shim in one go. macOS bui
104105
3. `package-test` (macOS): downloads both native artifacts, stages them into `src/MLXSharp/runtimes/{rid}/native`, rebuilds, runs the integration tests, and produces NuGet packages.
105106

106107
## Testing
107-
Tests require a local MLX model bundle. Point `MLXSHARP_MODEL_PATH` to the directory before running:
108+
The managed integration tests still piggy-back on `mlx_lm` until the native runner is feature-complete. Bring your own HuggingFace bundle (any MLX-compatible repo) and point `MLXSHARP_MODEL_PATH` to it before running:
108109

109110
```bash
110-
export MLXSHARP_MODEL_PATH=$PWD/models/Qwen1.5-0.5B-Chat-4bit
111-
huggingface-cli download mlx-community/Qwen1.5-0.5B-Chat-4bit --local-dir "$MLXSHARP_MODEL_PATH"
111+
export MLXSHARP_HF_MODEL_ID=<your-mlx-model>
112+
export MLXSHARP_MODEL_PATH=$PWD/models/<your-mlx-model>
113+
huggingface-cli download "$MLXSHARP_HF_MODEL_ID" --local-dir "$MLXSHARP_MODEL_PATH"
114+
python -m pip install mlx-lm
112115
dotnet test
113116
```
114117

118+
`MLXSHARP_HF_MODEL_ID` is picked up by the Python smoke test; omit it to fall back to `mlx-community/Qwen1.5-0.5B-Chat-4bit`.
119+
120+
When running locally you can place prebuilt binaries under `libs/native-osx-arm64` (and/or `libs/native-libs`) and a corresponding model bundle under `model/`. The test harness auto-discovers these folders and configures `MLXSHARP_LIBRARY`, `MLXSHARP_MODEL_PATH`, and `MLXSHARP_TOKENIZER_PATH` so you can iterate completely offline.
121+
122+
The integration suite invokes `python -m mlx_lm.generate` with deterministic settings (temperature `0`, seed `42`) and asserts that the generated response for prompts like “Скільки буде 2+2?” contains the correct answer. Test output includes the raw generation transcript so you can verify the model behaviour directly from the CI logs.
123+
124+
### Native pipeline (experimental)
125+
126+
Work is in progress to move inference fully into the native MLX backend. The current build exposes new configuration knobs via `MlxClientOptions`:
127+
128+
| Option | Description |
129+
| --- | --- |
130+
| `EnableNativeModelRunner` | Turns on the experimental native transformer pipeline. Still returns “not implemented” until the native side is completed. |
131+
| `NativeModelDirectory` | Directory containing `config.json`, `*.safetensors`, etc. |
132+
| `TokenizerPath` | Path to the HuggingFace `tokenizer.json` (loaded with `Microsoft.ML.Tokenizers`). |
133+
| `MaxGeneratedTokens`, `Temperature`, `TopP`, `TopK` | Generation parameters that will flow into the native pipeline. |
134+
135+
When the C++ implementation catches up you’ll be able to set the environment variables below and exercise the path end-to-end:
136+
137+
```bash
138+
export MLXSHARP_TOKENIZER_PATH=$PWD/models/<model-name>/tokenizer.json
139+
export MLXSHARP_MODEL_PATH=$PWD/models/<model-name>
140+
```
141+
142+
Until then, `EnableNativeModelRunner` should stay `false` to avoid runtime errors from the stub implementation.
143+
144+
### MSBuild properties
145+
146+
| Property | Purpose |
147+
| --- | --- |
148+
| `MLXSharpMacNativeBinary` | Path to `libmlxsharp.dylib` that gets packaged into the NuGet runtime folder. |
149+
| `MLXSharpMacMetallibBinary` | Path to the matching `mlx.metallib` that ships next to the dylib. |
150+
| `MLXSharpLinuxNativeBinary` | Path to the Linux shared object (`libmlxsharp.so`). |
151+
| `MLXSharpSkipMacNativeValidation` / `MLXSharpSkipLinuxNativeValidation` | Opt-out flags for validation logic when you intentionally omit platform binaries. |
152+
115153
## Versioning & platform support
116154
This initial release is focused on macOS developers who want MLX inside .NET applications. Linux binaries are produced to keep NuGet packages complete, and Windows support is not yet available.

TESTING.md

Lines changed: 0 additions & 38 deletions
This file was deleted.

native/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ add_subdirectory(${CMAKE_SOURCE_DIR}/../extern/mlx ${CMAKE_BINARY_DIR}/extern/ml
1717

1818
add_library(mlxsharp SHARED
1919
src/mlxsharp.cpp
20+
src/llm_model_runner.cpp
2021
)
2122

2223
target_link_libraries(mlxsharp PRIVATE mlx)

native/include/mlxsharp/api.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,18 @@ typedef struct mlx_usage {
137137
int output_tokens;
138138
} mlx_usage;
139139

140+
typedef struct mlxsharp_generation_options {
141+
int max_tokens;
142+
float temperature;
143+
float top_p;
144+
int top_k;
145+
} mlxsharp_generation_options;
146+
147+
typedef struct mlxsharp_token_buffer {
148+
int32_t* tokens;
149+
size_t length;
150+
} mlxsharp_token_buffer;
151+
140152
int mlxsharp_create_session(
141153
const char* chat_model_id,
142154
const char* embedding_model_id,
@@ -171,6 +183,21 @@ void mlxsharp_free_buffer(unsigned char* buffer);
171183

172184
void mlxsharp_release_session(void* session);
173185

186+
int mlxsharp_session_load_model(
187+
void* session,
188+
const char* model_directory,
189+
const char* tokenizer_path);
190+
191+
int mlxsharp_session_generate_tokens(
192+
void* session,
193+
const int32_t* prompt_tokens,
194+
size_t prompt_token_count,
195+
const mlxsharp_generation_options* options,
196+
mlxsharp_token_buffer* output_tokens,
197+
mlx_usage* usage);
198+
199+
void mlxsharp_release_tokens(mlxsharp_token_buffer* buffer);
200+
174201
#ifdef __cplusplus
175202
}
176203
#endif
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#pragma once
2+
3+
#include <cstdint>
4+
#include <memory>
5+
#include <string>
6+
#include <vector>
7+
8+
namespace mlxsharp::llm {
9+
10+
struct GenerationOptions {
11+
int max_tokens;
12+
float temperature;
13+
float top_p;
14+
int top_k;
15+
};
16+
17+
class ModelRunner {
18+
public:
19+
static std::unique_ptr<ModelRunner> Create(const std::string& model_directory, const std::string& tokenizer_path);
20+
21+
~ModelRunner();
22+
23+
ModelRunner(const ModelRunner&) = delete;
24+
ModelRunner& operator=(const ModelRunner&) = delete;
25+
ModelRunner(ModelRunner&&) noexcept = default;
26+
ModelRunner& operator=(ModelRunner&&) noexcept = default;
27+
28+
const std::string& model_directory() const noexcept { return model_directory_; }
29+
const std::string& tokenizer_path() const noexcept { return tokenizer_path_; }
30+
31+
std::vector<int32_t> Generate(const std::vector<int32_t>& prompt_tokens, const GenerationOptions& options);
32+
33+
private:
34+
ModelRunner(std::string model_directory, std::string tokenizer_path);
35+
36+
std::string model_directory_;
37+
std::string tokenizer_path_;
38+
};
39+
40+
} // namespace mlxsharp::llm

native/src/llm_model_runner.cpp

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#include "mlxsharp/llm_model_runner.h"
2+
3+
#include <filesystem>
4+
#include <stdexcept>
5+
6+
namespace fs = std::filesystem;
7+
8+
namespace mlxsharp::llm {
9+
10+
std::unique_ptr<ModelRunner> ModelRunner::Create(const std::string& model_directory, const std::string& tokenizer_path)
11+
{
12+
if (model_directory.empty())
13+
{
14+
throw std::invalid_argument("Model directory cannot be empty.");
15+
}
16+
17+
if (tokenizer_path.empty())
18+
{
19+
throw std::invalid_argument("Tokenizer path cannot be empty.");
20+
}
21+
22+
const fs::path model_dir(model_directory);
23+
if (!fs::exists(model_dir) || !fs::is_directory(model_dir))
24+
{
25+
throw std::invalid_argument("Model directory does not exist: " + model_directory);
26+
}
27+
28+
const fs::path tokenizer_file(tokenizer_path);
29+
if (!fs::exists(tokenizer_file) || !fs::is_regular_file(tokenizer_file))
30+
{
31+
throw std::invalid_argument("Tokenizer file does not exist: " + tokenizer_path);
32+
}
33+
34+
return std::unique_ptr<ModelRunner>(new ModelRunner(model_directory, tokenizer_path));
35+
}
36+
37+
ModelRunner::ModelRunner(std::string model_directory, std::string tokenizer_path)
38+
: model_directory_(std::move(model_directory)),
39+
tokenizer_path_(std::move(tokenizer_path))
40+
{
41+
}
42+
43+
ModelRunner::~ModelRunner() = default;
44+
45+
std::vector<int32_t> ModelRunner::Generate(const std::vector<int32_t>& /*prompt_tokens*/, const GenerationOptions& /*options*/)
46+
{
47+
throw std::runtime_error("Native transformer generation is not implemented yet.");
48+
}
49+
50+
} // namespace mlxsharp::llm

native/src/mlxsharp.cpp

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "mlxsharp/api.h"
2+
#include "mlxsharp/llm_model_runner.h"
23

34
#include <algorithm>
45
#include <atomic>
@@ -43,6 +44,7 @@ struct mlxsharp_session {
4344
std::string chat_model;
4445
std::string embedding_model;
4546
std::string image_model;
47+
std::unique_ptr<mlxsharp::llm::ModelRunner> model_runner;
4648

4749
mlxsharp_session(mlxsharp_context_t* ctx, std::string chat, std::string embed, std::string image)
4850
: context(ctx),
@@ -563,6 +565,104 @@ void mlxsharp_free_buffer(unsigned char* data) {
563565
std::free(data);
564566
}
565567

568+
int mlxsharp_session_load_model(
569+
void* session_ptr,
570+
const char* model_directory,
571+
const char* tokenizer_path) {
572+
if (session_ptr == nullptr) {
573+
return set_error(MLXSHARP_STATUS_INVALID_ARGUMENT, "Session pointer is null.");
574+
}
575+
576+
if (model_directory == nullptr || tokenizer_path == nullptr) {
577+
return set_error(MLXSHARP_STATUS_INVALID_ARGUMENT, "Model directory or tokenizer path is null.");
578+
}
579+
580+
auto* session = static_cast<mlxsharp_session_t*>(session_ptr);
581+
582+
return invoke([&]() -> int {
583+
auto model = mlxsharp::llm::ModelRunner::Create(model_directory, tokenizer_path);
584+
session->model_runner = std::move(model);
585+
return MLXSHARP_STATUS_SUCCESS;
586+
});
587+
}
588+
589+
int mlxsharp_session_generate_tokens(
590+
void* session_ptr,
591+
const int32_t* prompt_tokens,
592+
size_t prompt_token_count,
593+
const mlxsharp_generation_options* options,
594+
mlxsharp_token_buffer* output_tokens,
595+
mlx_usage* usage) {
596+
if (session_ptr == nullptr) {
597+
return set_error(MLXSHARP_STATUS_INVALID_ARGUMENT, "Session pointer is null.");
598+
}
599+
600+
if (output_tokens == nullptr) {
601+
return set_error(MLXSHARP_STATUS_INVALID_ARGUMENT, kNullOutParameter);
602+
}
603+
604+
output_tokens->tokens = nullptr;
605+
output_tokens->length = 0;
606+
607+
auto* session = static_cast<mlxsharp_session_t*>(session_ptr);
608+
609+
if (session->model_runner == nullptr) {
610+
return set_error(MLXSHARP_STATUS_INVALID_ARGUMENT, "Model is not loaded. Call mlxsharp_session_load_model first.");
611+
}
612+
613+
if (prompt_token_count > 0 && prompt_tokens == nullptr) {
614+
return set_error(MLXSHARP_STATUS_INVALID_ARGUMENT, "Prompt tokens pointer is null.");
615+
}
616+
617+
if (options == nullptr) {
618+
return set_error(MLXSHARP_STATUS_INVALID_ARGUMENT, "Generation options pointer is null.");
619+
}
620+
621+
return invoke([&]() -> int {
622+
std::vector<int32_t> prompt;
623+
prompt.reserve(prompt_token_count);
624+
for (size_t i = 0; i < prompt_token_count; ++i) {
625+
prompt.push_back(prompt_tokens[i]);
626+
}
627+
628+
mlxsharp::llm::GenerationOptions native_options{
629+
options->max_tokens,
630+
options->temperature,
631+
options->top_p,
632+
options->top_k,
633+
};
634+
635+
auto generated = session->model_runner->Generate(prompt, native_options);
636+
output_tokens->length = generated.size();
637+
638+
if (generated.empty()) {
639+
assign_usage(usage, static_cast<int>(prompt_token_count), 0);
640+
return MLXSHARP_STATUS_SUCCESS;
641+
}
642+
643+
auto* buffer = static_cast<int32_t*>(std::malloc(generated.size() * sizeof(int32_t)));
644+
if (buffer == nullptr) {
645+
return set_error(MLXSHARP_STATUS_OUT_OF_MEMORY, "Failed to allocate output token buffer.");
646+
}
647+
648+
std::memcpy(buffer, generated.data(), generated.size() * sizeof(int32_t));
649+
output_tokens->tokens = buffer;
650+
651+
assign_usage(usage, static_cast<int>(prompt_token_count), static_cast<int>(generated.size()));
652+
return MLXSHARP_STATUS_SUCCESS;
653+
});
654+
}
655+
656+
void mlxsharp_release_tokens(mlxsharp_token_buffer* buffer) {
657+
if (buffer == nullptr || buffer->tokens == nullptr) {
658+
return;
659+
}
660+
661+
std::free(buffer->tokens);
662+
buffer->tokens = nullptr;
663+
buffer->length = 0;
664+
}
665+
566666
void mlxsharp_release_session(void* session_ptr) {
567667
if (session_ptr == nullptr) {
568668
return;

src/MLXSharp.Tests/ArraySmokeTests.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ internal sealed class RequiresNativeLibraryFactAttribute : FactAttribute
4444
{
4545
public RequiresNativeLibraryFactAttribute()
4646
{
47+
TestEnvironment.EnsureInitialized();
4748
if (!NativeLibraryLocator.TryEnsure(out var skipReason))
4849
{
4950
Skip = skipReason ?? "Native MLX library is not available.";

0 commit comments

Comments
 (0)