Skip to content

Commit aa3b7a9

Browse files
ngxsonggerganov
andauthored
arg: add --cache-list argument to list cached models (#17073)
* arg: add --cache-list argument to list cached models * new manifest naming format * improve naming * Update common/arg.cpp Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
1 parent 333f259 commit aa3b7a9

File tree

5 files changed

+117
-9
lines changed

5 files changed

+117
-9
lines changed

common/arg.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -740,6 +740,20 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
740740
exit(0);
741741
}
742742
));
743+
add_opt(common_arg(
744+
{"-cl", "--cache-list"},
745+
"show list of models in cache",
746+
[](common_params &) {
747+
printf("model cache directory: %s\n", fs_get_cache_directory().c_str());
748+
auto models = common_list_cached_models();
749+
printf("number of models in cache: %zu\n", models.size());
750+
for (size_t i = 0; i < models.size(); i++) {
751+
auto & model = models[i];
752+
printf("%4d. %s\n", (int) i + 1, model.to_string().c_str());
753+
}
754+
exit(0);
755+
}
756+
));
743757
add_opt(common_arg(
744758
{"--completion-bash"},
745759
"print source-able bash completion script for llama.cpp",

common/common.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -908,6 +908,39 @@ std::string fs_get_cache_file(const std::string & filename) {
908908
return cache_directory + filename;
909909
}
910910

911+
std::vector<common_file_info> fs_list_files(const std::string & path) {
912+
std::vector<common_file_info> files;
913+
if (path.empty()) return files;
914+
915+
std::filesystem::path dir(path);
916+
if (!std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) {
917+
return files;
918+
}
919+
920+
for (const auto & entry : std::filesystem::directory_iterator(dir)) {
921+
try {
922+
// Only include regular files (skip directories)
923+
const auto & p = entry.path();
924+
if (std::filesystem::is_regular_file(p)) {
925+
common_file_info info;
926+
info.path = p.string();
927+
info.name = p.filename().string();
928+
try {
929+
info.size = static_cast<size_t>(std::filesystem::file_size(p));
930+
} catch (const std::filesystem::filesystem_error &) {
931+
info.size = 0;
932+
}
933+
files.push_back(std::move(info));
934+
}
935+
} catch (const std::filesystem::filesystem_error &) {
936+
// skip entries we cannot inspect
937+
continue;
938+
}
939+
}
940+
941+
return files;
942+
}
943+
911944

912945
//
913946
// Model utils

common/common.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,13 @@ bool fs_create_directory_with_parents(const std::string & path);
611611
std::string fs_get_cache_directory();
612612
std::string fs_get_cache_file(const std::string & filename);
613613

614+
struct common_file_info {
615+
std::string path;
616+
std::string name;
617+
size_t size = 0; // in bytes
618+
};
619+
std::vector<common_file_info> fs_list_files(const std::string & path);
620+
614621
//
615622
// Model utils
616623
//

common/download.cpp

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,22 @@ using json = nlohmann::ordered_json;
5050
// downloader
5151
//
5252

53+
// validate repo name format: owner/repo
54+
static bool validate_repo_name(const std::string & repo) {
55+
static const std::regex repo_regex(R"(^[A-Za-z0-9_.\-]+\/[A-Za-z0-9_.\-]+$)");
56+
return std::regex_match(repo, repo_regex);
57+
}
58+
59+
static std::string get_manifest_path(const std::string & repo, const std::string & tag) {
60+
// we use "=" to avoid clashing with other component, while still being allowed on windows
61+
std::string fname = "manifest=" + repo + "=" + tag + ".json";
62+
if (!validate_repo_name(repo)) {
63+
throw std::runtime_error("error: repo name must be in the format 'owner/repo'");
64+
}
65+
string_replace_all(fname, "/", "=");
66+
return fs_get_cache_file(fname);
67+
}
68+
5369
static std::string read_file(const std::string & fname) {
5470
std::ifstream file(fname);
5571
if (!file) {
@@ -829,17 +845,13 @@ common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, cons
829845
// Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
830846
// User-Agent header is already set in common_remote_get_content, no need to set it here
831847

832-
// we use "=" to avoid clashing with other component, while still being allowed on windows
833-
std::string cached_response_fname = "manifest=" + hf_repo + "=" + tag + ".json";
834-
string_replace_all(cached_response_fname, "/", "_");
835-
std::string cached_response_path = fs_get_cache_file(cached_response_fname);
836-
837848
// make the request
838849
common_remote_params params;
839850
params.headers = headers;
840851
long res_code = 0;
841852
std::string res_str;
842853
bool use_cache = false;
854+
std::string cached_response_path = get_manifest_path(hf_repo, tag);
843855
if (!offline) {
844856
try {
845857
auto res = common_remote_get_content(url, params);
@@ -895,6 +907,33 @@ common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, cons
895907
return { hf_repo, ggufFile, mmprojFile };
896908
}
897909

910+
std::vector<common_cached_model_info> common_list_cached_models() {
911+
std::vector<common_cached_model_info> models;
912+
const std::string cache_dir = fs_get_cache_directory();
913+
const std::vector<common_file_info> files = fs_list_files(cache_dir);
914+
for (const auto & file : files) {
915+
if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
916+
common_cached_model_info model_info;
917+
model_info.manifest_path = file.path;
918+
std::string fname = file.name;
919+
string_replace_all(fname, ".json", ""); // remove extension
920+
auto parts = string_split<std::string>(fname, '=');
921+
if (parts.size() == 4) {
922+
// expect format: manifest=<user>=<model>=<tag>=<other>
923+
model_info.user = parts[1];
924+
model_info.model = parts[2];
925+
model_info.tag = parts[3];
926+
} else {
927+
// invalid format
928+
continue;
929+
}
930+
model_info.size = 0; // TODO: get GGUF size, not manifest size
931+
models.push_back(model_info);
932+
}
933+
}
934+
return models;
935+
}
936+
898937
//
899938
// Docker registry functions
900939
//
@@ -959,6 +998,7 @@ std::string common_docker_resolve_model(const std::string & docker) {
959998
std::string token = common_docker_get_token(repo); // Get authentication token
960999

9611000
// Get manifest
1001+
// TODO: cache the manifest response so that it appears in the model list
9621002
const std::string url_prefix = "https://registry-1.docker.io/v2/" + repo;
9631003
std::string manifest_url = url_prefix + "/manifests/" + tag;
9641004
common_remote_params manifest_params;

common/download.h

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,23 @@ struct common_params_model;
88
// download functionalities
99
//
1010

11+
struct common_cached_model_info {
12+
std::string manifest_path;
13+
std::string user;
14+
std::string model;
15+
std::string tag;
16+
size_t size = 0; // GGUF size in bytes
17+
std::string to_string() const {
18+
return user + "/" + model + ":" + tag;
19+
}
20+
};
21+
1122
struct common_hf_file_res {
1223
std::string repo; // repo name with ":tag" removed
1324
std::string ggufFile;
1425
std::string mmprojFile;
1526
};
1627

17-
// resolve and download model from Docker registry
18-
// return local path to downloaded model file
19-
std::string common_docker_resolve_model(const std::string & docker);
20-
2128
/**
2229
* Allow getting the HF file from the HF repo with tag (like ollama), for example:
2330
* - bartowski/Llama-3.2-3B-Instruct-GGUF:q4
@@ -39,3 +46,10 @@ bool common_download_model(
3946
const common_params_model & model,
4047
const std::string & bearer_token,
4148
bool offline);
49+
50+
// returns list of cached models
51+
std::vector<common_cached_model_info> common_list_cached_models();
52+
53+
// resolve and download model from Docker registry
54+
// return local path to downloaded model file
55+
std::string common_docker_resolve_model(const std::string & docker);

0 commit comments

Comments
 (0)