Skip to content

Commit 3d776cd

Browse files
committed
switch to llama.cpp fork and llama : expose C API to get layer device type
1 parent c3debdf commit 3d776cd

File tree

3 files changed

+23
-3
lines changed

3 files changed

+23
-3
lines changed

.gitmodules

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
[submodule "vendor/llama.cpp"]
22
path = vendor/llama.cpp
3-
url = https://github.com/ggerganov/llama.cpp.git
3+
url = http://github.com/inference-sh/llama.cpp

llama_cpp/_internals.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import os
44
import ctypes
5+
from enum import Enum
56

67
from typing import (
78
Dict,
@@ -26,7 +27,13 @@
2627

2728

2829
# Python wrappers over llama.h structs
29-
30+
class LlamaBackendDev(Enum):
31+
# CPU device using system memory
32+
CPU = 0
33+
# GPU device using dedicated memory
34+
GPU = 1
35+
# accelerator devices intended to be used together with the CPU backend (e.g. BLAS or AMX)
36+
ACCEL = 2
3037

3138
class LlamaModel:
3239
"""Intermediate Python wrapper for a llama.cpp llama_model.
@@ -95,7 +102,13 @@ def n_ctx_train(self) -> int:
95102
return llama_cpp.llama_model_n_ctx_train(self.model)
96103

97104
def n_embd(self) -> int:
98-
return llama_cpp.llama_model_n_embd(self.model)
105+
return llama_cpp.llama_n_embd(self.model)
106+
107+
def n_layer(self) -> int:
108+
return llama_cpp.llama_n_layer(self.model)
109+
110+
def dev_layer(self, il: int) -> LlamaBackendDev:
111+
return LlamaBackendDev(llama_cpp.llama_model_dev_layer(self.model, il))
99112

100113
def rope_freq_scale_train(self) -> float:
101114
return llama_cpp.llama_model_rope_freq_scale_train(self.model)

llama_cpp/llama.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,13 @@ def ctx(self) -> llama_cpp.llama_context_p:
487487
@property
488488
def model(self) -> llama_cpp.llama_model_p:
489489
return self._model.model
490+
491+
@property
492+
def n_layer(self) -> int:
493+
return self._model.n_layer()
494+
495+
def dev_layer(self, il: int) -> internals.LlamaBackendDev:
496+
return self._model.dev_layer(il)
490497

491498
@property
492499
def _input_ids(self) -> npt.NDArray[np.intc]:

0 commit comments

Comments
 (0)