Skip to content

Commit e61994e

Browse files
committed
Merge tag 'v0.3.7' into ampere-build-3.0.0
2 parents 1f62d57 + 710e19a commit e61994e

18 files changed

+1181
-979
lines changed

.github/workflows/build-and-release.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
runs-on: ${{ matrix.os }}
1212
strategy:
1313
matrix:
14-
os: [ubuntu-20.04, windows-2019, macos-12]
14+
os: [ubuntu-20.04, windows-2019, macos-13]
1515

1616
steps:
1717
- uses: actions/checkout@v4
@@ -42,7 +42,7 @@ jobs:
4242
shell: cmd
4343

4444
- name: Build wheels
45-
uses: pypa/cibuildwheel@v2.21.1
45+
uses: pypa/cibuildwheel@v2.22.0
4646
env:
4747
# disable repair
4848
CIBW_REPAIR_WHEEL_COMMAND: ""
@@ -69,7 +69,7 @@ jobs:
6969
platforms: linux/arm64
7070

7171
- name: Build wheels
72-
uses: pypa/cibuildwheel@v2.21.1
72+
uses: pypa/cibuildwheel@v2.22.0
7373
env:
7474
CIBW_SKIP: "*musllinux* pp*"
7575
CIBW_REPAIR_WHEEL_COMMAND: ""

.github/workflows/build-wheels-cuda.yaml

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ jobs:
2222
$matrix = @{
2323
'os' = @('ubuntu-latest', 'windows-2019')
2424
'pyver' = @("3.9", "3.10", "3.11", "3.12")
25-
'cuda' = @("12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.0")
25+
'cuda' = @("12.1.1", "12.2.2", "12.3.2", "12.4.1") #, "12.5.1", "12.6.1")
2626
'releasetag' = @("basic")
2727
}
2828
@@ -59,13 +59,11 @@ jobs:
5959
cache: 'pip'
6060

6161
- name: Setup Mamba
62-
uses: conda-incubator/setup-miniconda@v3.0.4
62+
uses: conda-incubator/setup-miniconda@v3.1.0
6363
with:
64-
activate-environment: "build"
64+
activate-environment: "llamacpp"
6565
python-version: ${{ matrix.pyver }}
66-
miniforge-variant: Mambaforge
6766
miniforge-version: latest
68-
use-mamba: true
6967
add-pip-as-python-dependency: true
7068
auto-activate-base: false
7169

.github/workflows/build-wheels-metal.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
runs-on: ${{ matrix.os }}
1212
strategy:
1313
matrix:
14-
os: [macos-12, macos-13, macos-14]
14+
os: [macos-13, macos-14, macos-15]
1515

1616
steps:
1717
- uses: actions/checkout@v4
@@ -43,7 +43,7 @@ jobs:
4343
shell: cmd
4444

4545
- name: Build wheels
46-
uses: pypa/cibuildwheel@v2.21.1
46+
uses: pypa/cibuildwheel@v2.22.0
4747
env:
4848
# disable repair
4949
CIBW_REPAIR_WHEEL_COMMAND: ""

.github/workflows/generate-index-from-release.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ jobs:
4444
./scripts/releases-to-pep-503.sh index/whl/cu122 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu122$'
4545
./scripts/releases-to-pep-503.sh index/whl/cu123 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu123$'
4646
./scripts/releases-to-pep-503.sh index/whl/cu124 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu124$'
47-
./scripts/releases-to-pep-503.sh index/whl/cu125 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu125$'
47+
# ./scripts/releases-to-pep-503.sh index/whl/cu125 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu124$'
48+
# ./scripts/releases-to-pep-503.sh index/whl/cu126 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu124$'
4849
./scripts/releases-to-pep-503.sh index/whl/metal '^[v]?[0-9]+\.[0-9]+\.[0-9]+-metal$'
4950
- name: Upload artifact
5051
uses: actions/upload-pages-artifact@v3

.github/workflows/test.yaml

Lines changed: 24 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -51,19 +51,11 @@ jobs:
5151
path: ~/.cache/huggingface/hub
5252
key: ${{ runner.os }}-model-${{ env.REPO_ID }}-${{ env.MODEL_FILE }}
5353
- name: Install dependencies (Linux/MacOS)
54-
if: runner.os != 'Windows'
5554
run: |
5655
python -m pip install --upgrade pip
5756
python -m pip install uv
5857
python -m uv pip install -e .[all] --verbose
5958
shell: bash
60-
- name: Install dependencies (Windows)
61-
if: runner.os == 'Windows'
62-
run: |
63-
python -m pip install --upgrade pip
64-
python -m pip install uv
65-
python -m uv pip install -e .[all] --verbose
66-
shell: cmd
6759
- name: Test with pytest
6860
run: |
6961
python -m pytest
@@ -90,30 +82,21 @@ jobs:
9082
with:
9183
path: ~/.cache/huggingface/hub
9284
key: ${{ runner.os }}-model-${{ env.REPO_ID }}-${{ env.MODEL_FILE }}
93-
94-
- name: Install dependencies (Linux/MacOS)
95-
if: runner.os != 'Windows'
96-
run: |
97-
python -m pip install --upgrade pip
98-
python -m pip install uv
99-
python -m uv pip install -e .[all] --verbose
100-
shell: bash
10185

10286
- name: Install dependencies (Windows)
103-
if: runner.os == 'Windows'
10487
run: |
10588
python -m pip install --upgrade pip
10689
python -m pip install uv
10790
python -m uv pip install -e .[all] --verbose
108-
shell: cmd
91+
shell: cmd
10992

11093
- name: Test with pytest
11194
run: |
11295
python -m pytest
11396
11497
build-macos:
11598
needs: download-model
116-
runs-on: macos-latest
99+
runs-on: macos-13
117100
strategy:
118101
matrix:
119102
python-version: ["3.9", "3.10", "3.11", "3.12"]
@@ -128,35 +111,33 @@ jobs:
128111
python-version: ${{ matrix.python-version }}
129112
cache: 'pip'
130113

114+
- name: System Info
115+
run: |
116+
uname -a
117+
sysctl -n machdep.cpu.brand_string
118+
python3 -c "import platform; print(platform.machine(), platform.architecture())"
119+
131120
- name: Restore model cache
132121
uses: actions/cache@v4
133122
with:
134123
path: ~/.cache/huggingface/hub
135124
key: ${{ runner.os }}-model-${{ env.REPO_ID }}-${{ env.MODEL_FILE }}
136125

137126
- name: Install dependencies (Linux/MacOS)
138-
if: runner.os != 'Windows'
139127
run: |
140-
python -m pip install --upgrade pip
141-
python -m pip install uv
142-
python -m uv pip install -e .[all] --verbose
128+
python3 -m pip install --upgrade pip
129+
python3 -m pip install uv
130+
python3 -m uv pip install -e .[all] --verbose
131+
CMAKE_ARGS="-DLLAMA_METAL=off" python3 -m uv pip install .[all] --verbose
143132
shell: bash
144133

145-
- name: Install dependencies (Windows)
146-
if: runner.os == 'Windows'
147-
run: |
148-
python -m pip install --upgrade pip
149-
python -m pip install uv
150-
python -m uv pip install -e .[all] --verbose
151-
shell: cmd
152-
153134
- name: Test with pytest
154135
run: |
155-
python -m pytest
136+
python3 -m pytest
156137
157138
build-macos-metal:
158139
needs: download-model
159-
runs-on: macos-latest
140+
runs-on: macos-13
160141
steps:
161142
- uses: actions/checkout@v4
162143
with:
@@ -167,25 +148,24 @@ jobs:
167148
with:
168149
python-version: "3.9"
169150

151+
- name: System Info
152+
run: |
153+
uname -a
154+
sysctl -n machdep.cpu.brand_string
155+
python3 -c "import platform; print(platform.machine(), platform.architecture())"
156+
170157
- name: Restore model cache
171158
uses: actions/cache@v4
172159
with:
173160
path: ~/.cache/huggingface/hub
174161
key: ${{ runner.os }}-model-${{ env.REPO_ID }}-${{ env.MODEL_FILE }}
175162

176-
- name: Install dependencies (Linux/MacOS)
177-
if: runner.os != 'Windows'
163+
- name: Install dependencies
178164
run: |
179-
python -m pip install --upgrade pip
180-
python -m pip install uv
181-
CMAKE_ARGS="-DLLAMA_METAL=on" python -m uv pip install .[all] --verbose
165+
python3 -m pip install --upgrade pip
166+
CMAKE_ARGS="-DLLAMA_METAL=on" python3 -m pip install .[all] --verbose
182167
shell: bash
183168

184-
- name: Install dependencies (Windows)
185-
if: runner.os == 'Windows'
186-
run: |
187-
python -m pip install --upgrade pip
188-
CMAKE_ARGS="-DGGML_METAL=on" python -m pip install .[all] --verbose
189169
- name: Test with pytest
190170
run: |
191-
python -m pytest
171+
python3 -m pytest

CHANGELOG.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,41 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [0.3.7]
11+
12+
- feat: Update llama.cpp to ggerganov/llama.cpp@794fe23f29fb40104975c91fe19f23798f7c726e
13+
- fix(ci): Fix the CUDA workflow by @oobabooga in #1894
14+
- fix: error showing time spent in llama perf context print, adds `no_perf` flag to `Llama` class by @shakalaca in #1898
15+
16+
## [0.3.6]
17+
18+
- feat: Update llama.cpp to ggerganov/llama.cpp@f7cd13301c2a88f97073fd119072b4cc92c08df1
19+
- fix(server): streaming resource lock by @gjpower in #1879
20+
21+
## [0.3.5]
22+
23+
- feat: Update llama.cpp to ggerganov/llama.cpp@26a8406ba9198eb6fdd8329fa717555b4f77f05f
24+
- fix(ci): Fix release by updating macos runner image to non-deprecated version by @abetlen in afedfc888462f9a6e809dc9455eb3b663764cc3f
25+
- fix(server): add missing await statements for async exit_stack handling by @gjpower in #1858
26+
27+
## [0.3.4]
28+
29+
- fix(ci): Build wheels for macos 13-15, cuda 12.1-12.4 by @abetlen in ca808028bd16b8327bd84128d48015a4b1304690
30+
31+
## [0.3.3]
32+
33+
- feat: Update llama.cpp to ggerganov/llama.cpp@ce8784bdb153ff7794dde5a50b0ebfa51baa6171
34+
- fix: chat API logprobs format by @domdomegg in #1788
35+
- feat: Add support for CUDA 12.6, fix CUDA 12.5 by @Smartappli in #1775
36+
- fix: Make content not required in ChatCompletionRequestAssistantMessage by @feloy in #1807
37+
- fix: Fix pickling of Llama class by setting seed from _seed member by @abetlen in 2523472c3eccb9ab9277117cc4ff705212b6888a
38+
- fix: Fix logit-bias type hint by @ddh0 in #1802
39+
- fix(server): Avoid thread starvation on many concurrent requests by making use of asyncio to lock llama_proxy context by @gjpower in #1798
40+
- fix(server): Added missing exit_stack.close() to /v1/chat/completions by @Ian321 in #1796
41+
- fix(examples): Refactor Batching notebook to use new sampler chain API by @lukestanley in #1793
42+
- fix(docs): Update development instructions by @Florents-Tselai in #1833
43+
- fix(docs): Remove ref to llama_eval in llama_cpp.py docs by @richdougherty in #1819
44+
1045
## [0.3.2]
1146

1247
- feat: Update llama.cpp to ggerganov/llama.cpp@74d73dc85cc2057446bf63cc37ff649ae7cebd80

CMakeLists.txt

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -62,18 +62,35 @@ if (LLAMA_BUILD)
6262
# Enable building of the common library
6363
set(LLAMA_BUILD_COMMON ON CACHE BOOL "Build llama.cpp common library" FORCE)
6464

65-
# Building llama
66-
if (APPLE AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
67-
# Need to disable these llama.cpp flags on Apple x86_64,
68-
# otherwise users may encounter invalid instruction errors
69-
set(GGML_AVX "Off" CACHE BOOL "ggml: enable AVX" FORCE)
70-
set(GGML_AVX2 "Off" CACHE BOOL "ggml: enable AVX2" FORCE)
71-
set(GGML_FMA "Off" CACHE BOOL "gml: enable FMA" FORCE)
72-
set(GGML_F16C "Off" CACHE BOOL "gml: enable F16C" FORCE)
73-
endif()
74-
65+
# Architecture detection and settings for Apple platforms
7566
if (APPLE)
76-
set(GGML_METAL_EMBED_LIBRARY "On" CACHE BOOL "llama: embed metal library" FORCE)
67+
# Get the target architecture
68+
execute_process(
69+
COMMAND uname -m
70+
OUTPUT_VARIABLE HOST_ARCH
71+
OUTPUT_STRIP_TRAILING_WHITESPACE
72+
)
73+
74+
# If CMAKE_OSX_ARCHITECTURES is not set, use the host architecture
75+
if(NOT CMAKE_OSX_ARCHITECTURES)
76+
set(CMAKE_OSX_ARCHITECTURES ${HOST_ARCH} CACHE STRING "Build architecture for macOS" FORCE)
77+
endif()
78+
79+
message(STATUS "Host architecture: ${HOST_ARCH}")
80+
message(STATUS "Target architecture: ${CMAKE_OSX_ARCHITECTURES}")
81+
82+
# Configure based on target architecture
83+
if(CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64")
84+
# Intel Mac settings
85+
set(GGML_AVX "OFF" CACHE BOOL "ggml: enable AVX" FORCE)
86+
set(GGML_AVX2 "OFF" CACHE BOOL "ggml: enable AVX2" FORCE)
87+
set(GGML_FMA "OFF" CACHE BOOL "ggml: enable FMA" FORCE)
88+
set(GGML_F16C "OFF" CACHE BOOL "ggml: enable F16C" FORCE)
89+
endif()
90+
91+
# Metal settings (enable for both architectures)
92+
set(GGML_METAL "ON" CACHE BOOL "ggml: enable Metal" FORCE)
93+
set(GGML_METAL_EMBED_LIBRARY "ON" CACHE BOOL "ggml: embed metal library" FORCE)
7794
endif()
7895

7996
add_subdirectory(vendor/llama.cpp)
@@ -128,7 +145,7 @@ if (LLAMA_BUILD)
128145
# Building llava
129146
add_subdirectory(vendor/llama.cpp/examples/llava)
130147
set_target_properties(llava_shared PROPERTIES OUTPUT_NAME "llava")
131-
# Set CUDA_ARCHITECTURES to OFF on Windows
148+
132149
if (WIN32)
133150
set_target_properties(llava_shared PROPERTIES CUDA_ARCHITECTURES OFF)
134151
endif()

Makefile

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,15 @@ build.debug:
2121
--config-settings=cmake.args="-DCMAKE_BUILD_TYPE=Debug;-DCMAKE_C_FLAGS='-ggdb -O0';-DCMAKE_CXX_FLAGS='-ggdb -O0'" \
2222
--editable .
2323

24+
build.debug.extra:
25+
python3 -m pip install \
26+
--verbose \
27+
--config-settings=cmake.verbose=true \
28+
--config-settings=logging.level=INFO \
29+
--config-settings=install.strip=false \
30+
--config-settings=cmake.args="-DCMAKE_BUILD_TYPE=Debug;-DCMAKE_C_FLAGS='-fsanitize=address -ggdb -O0';-DCMAKE_CXX_FLAGS='-fsanitize=address -ggdb -O0'" \
31+
--editable .
32+
2433
build.cuda:
2534
CMAKE_ARGS="-DGGML_CUDA=on" python3 -m pip install --verbose -e .
2635

@@ -46,7 +55,7 @@ build.rpc:
4655
CMAKE_ARGS="-DGGML_RPC=on" python3 -m pip install --verbose -e .
4756

4857
build.sdist:
49-
python3 -m build --sdist
58+
python3 -m build --sdist --verbose
5059

5160
deploy.pypi:
5261
python3 -m twine upload dist/*
@@ -56,23 +65,23 @@ deploy.gh-docs:
5665
mkdocs gh-deploy
5766

5867
test:
59-
python3 -m pytest
68+
python3 -m pytest --full-trace -v
6069

6170
docker:
6271
docker build -t llama-cpp-python:latest -f docker/simple/Dockerfile .
6372

6473
run-server:
65-
uvicorn --factory llama.server:app --host ${HOST} --port ${PORT}
74+
python3 -m llama_cpp.server --model ${MODEL}
6675

6776
clean:
6877
- cd vendor/llama.cpp && make clean
6978
- cd vendor/llama.cpp && rm libllama.so
7079
- rm -rf _skbuild
71-
- rm llama_cpp/*.so
72-
- rm llama_cpp/*.dylib
73-
- rm llama_cpp/*.metal
74-
- rm llama_cpp/*.dll
75-
- rm llama_cpp/*.lib
80+
- rm llama_cpp/lib/*.so
81+
- rm llama_cpp/lib/*.dylib
82+
- rm llama_cpp/lib/*.metal
83+
- rm llama_cpp/lib/*.dll
84+
- rm llama_cpp/lib/*.lib
7685

7786
.PHONY: \
7887
update \

0 commit comments

Comments
 (0)