@@ -562,6 +562,35 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
562562 break ;
563563 }
564564 params.lora_base = argv[i];
565+ } else if (arg == " --control-vector" ) {
566+ if (++i >= argc) {
567+ invalid_param = true ;
568+ break ;
569+ }
570+ params.control_vectors .push_back (std::make_tuple (argv[i], 1 .0f ));
571+ } else if (arg == " --control-vector-scaled" ) {
572+ if (++i >= argc) {
573+ invalid_param = true ;
574+ break ;
575+ }
576+ const char * control_vector = argv[i];
577+ if (++i >= argc) {
578+ invalid_param = true ;
579+ break ;
580+ }
581+ params.control_vectors .push_back (std::make_tuple (control_vector, std::stof (argv[i])));
582+ } else if (arg == " --control-vector-layer-range" ) {
583+ if (++i >= argc) {
584+ invalid_param = true ;
585+ break ;
586+ }
587+ int32_t start = std::stoi (argv[i]);
588+ if (++i >= argc) {
589+ invalid_param = true ;
590+ break ;
591+ }
592+ int32_t end = std::stoi (argv[i]);
593+ params.control_vector_layer_range = std::make_tuple (start, end);
565594 } else if (arg == " --mmproj" ) {
566595 if (++i >= argc) {
567596 invalid_param = true ;
@@ -1087,6 +1116,12 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
10871116 printf (" --lora FNAME apply LoRA adapter (implies --no-mmap)\n " );
10881117 printf (" --lora-scaled FNAME S apply LoRA adapter with user defined scaling S (implies --no-mmap)\n " );
10891118 printf (" --lora-base FNAME optional model to use as a base for the layers modified by the LoRA adapter\n " );
1119+ printf (" --control-vector FNAME\n " );
1120+ printf (" add a control vector\n " );
1121+ printf (" --control-vector-scaled FNAME S\n " );
1122+ printf (" add a control vector with user defined scaling S\n " );
1123+ printf (" --control-vector-layer-range START END\n " );
1124+ printf (" layer range to apply the control vector(s) to, start and end inclusive\n " );
10901125 printf (" -m FNAME, --model FNAME\n " );
10911126 printf (" model path (default: %s)\n " , params.model .c_str ());
10921127 printf (" -md FNAME, --model-draft FNAME\n " );
@@ -1351,6 +1386,35 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
13511386 return std::make_tuple (nullptr , nullptr );
13521387 }
13531388
1389+ if (!params.control_vectors .empty ()) {
1390+ int32_t layer_start, layer_end;
1391+ std::tie (layer_start, layer_end) = params.control_vector_layer_range ;
1392+
1393+ if (layer_start == 0 ) layer_start = 1 ;
1394+ if (layer_end == 0 ) layer_end = 31 ;
1395+
1396+ std::vector<float > control_vector;
1397+ int n_embd;
1398+ std::tie (control_vector, n_embd) = llama_control_vector_load (params.control_vectors );
1399+ if (n_embd == -1 ) {
1400+ llama_free (lctx);
1401+ llama_free_model (model);
1402+ return std::make_tuple (nullptr , nullptr );
1403+ }
1404+
1405+ int err = llama_control_vector_apply (lctx,
1406+ control_vector.data (),
1407+ control_vector.size (),
1408+ n_embd,
1409+ layer_start,
1410+ layer_end);
1411+ if (err) {
1412+ llama_free (lctx);
1413+ llama_free_model (model);
1414+ return std::make_tuple (nullptr , nullptr );
1415+ }
1416+ }
1417+
13541418 for (unsigned int i = 0 ; i < params.lora_adapter .size (); ++i) {
13551419 const std::string& lora_adapter = std::get<0 >(params.lora_adapter [i]);
13561420 float lora_scale = std::get<1 >(params.lora_adapter [i]);
@@ -1867,3 +1931,156 @@ void llama_embd_normalize(const float * inp, float * out, int n) {
18671931 }
18681932}
18691933
1934+ //
1935+ // Control vector utils
1936+ //
1937+
1938+ static std::tuple<std::vector<float >, int > llama_control_vector_load_one (const std::string & path, float strength) {
1939+ int n_tensors;
1940+ size_t n_bytes = 0 ;
1941+ uint32_t max_direction_layer = 0 ;
1942+ int n_embd = -1 ;
1943+
1944+ // calculate size of ctx needed for tensors, ensure tensors are f32, and find max layer
1945+ {
1946+ struct ggml_init_params meta_params = {
1947+ /* .mem_size = */ ggml_tensor_overhead () * 128 + ggml_graph_overhead (),
1948+ /* .mem_buffer = */ nullptr ,
1949+ /* .no_alloc = */ true ,
1950+ };
1951+ ggml_context * meta_ctx = ggml_init (meta_params);
1952+ struct gguf_init_params meta_gguf_params = {
1953+ /* .no_alloc = */ true ,
1954+ /* .ctx = */ &meta_ctx,
1955+ };
1956+ struct gguf_context * meta_ctx_gguf = gguf_init_from_file (path.c_str (), meta_gguf_params);
1957+ if (!meta_ctx_gguf) {
1958+ fprintf (stderr, " %s: failed to load control vector from %s\n " , __func__, path.c_str ());
1959+ ggml_free (meta_ctx);
1960+ return std::make_tuple (std::vector<float >(), -1 );
1961+ }
1962+
1963+ n_tensors = gguf_get_n_tensors (meta_ctx_gguf);
1964+ for (int i = 0 ; i < n_tensors; i++) {
1965+ std::string name = gguf_get_tensor_name (meta_ctx_gguf, i);
1966+
1967+ // split on '.'
1968+ size_t dotpos = name.find (' .' );
1969+ if (dotpos != std::string::npos && name.substr (0 , dotpos) == " direction" ) {
1970+ try {
1971+ uint32_t layer = std::stoi (name.substr (dotpos + 1 ));
1972+ if (layer == 0 ) {
1973+ fprintf (stderr, " %s: direction tensor invalid in %s\n " , __func__, path.c_str ());
1974+ ggml_free (meta_ctx);
1975+ gguf_free (meta_ctx_gguf);
1976+ return std::make_tuple (std::vector<float >(), -1 );
1977+ }
1978+ if (layer > max_direction_layer) {
1979+ max_direction_layer = layer;
1980+ }
1981+ } catch (...) {
1982+ fprintf (stderr, " %s: direction tensor invalid in %s\n " , __func__, path.c_str ());
1983+ ggml_free (meta_ctx);
1984+ gguf_free (meta_ctx_gguf);
1985+ return std::make_tuple (std::vector<float >(), -1 );
1986+ }
1987+ }
1988+
1989+ struct ggml_tensor * tensor_meta = ggml_get_tensor (meta_ctx, name.c_str ());
1990+ if (tensor_meta->type != GGML_TYPE_F32 || ggml_n_dims (tensor_meta) != 1 ) {
1991+ fprintf (stderr, " %s: direction tensor invalid in %s\n " , __func__, path.c_str ());
1992+ ggml_free (meta_ctx);
1993+ gguf_free (meta_ctx_gguf);
1994+ return std::make_tuple (std::vector<float >(), -1 );
1995+ }
1996+ if (n_embd == -1 ) {
1997+ n_embd = ggml_nelements (tensor_meta);
1998+ } else if (ggml_nelements (tensor_meta) != n_embd) {
1999+ fprintf (stderr, " %s: direction tensor sizes mismatched in %s\n " , __func__, path.c_str ());
2000+ ggml_free (meta_ctx);
2001+ gguf_free (meta_ctx_gguf);
2002+ return std::make_tuple (std::vector<float >(), -1 );
2003+ }
2004+ n_bytes += ggml_nbytes (tensor_meta);
2005+ }
2006+ ggml_free (meta_ctx);
2007+ gguf_free (meta_ctx_gguf);
2008+ }
2009+
2010+ if (n_tensors == 0 ) {
2011+ fprintf (stderr, " %s: no direction tensors found in %s\n " , __func__, path.c_str ());
2012+ return std::make_tuple (std::vector<float >(), -1 );
2013+ }
2014+
2015+ // load and scale tensors into final control vector context
2016+ struct ggml_init_params ggml_params = {
2017+ /* .mem_size = */ ggml_tensor_overhead () * n_tensors + n_bytes,
2018+ /* .mem_buffer = */ nullptr ,
2019+ /* .no_alloc = */ false ,
2020+ };
2021+ struct ggml_context * ctx = ggml_init (ggml_params);
2022+
2023+ struct gguf_init_params params = {
2024+ /* .no_alloc = */ false ,
2025+ /* .ctx = */ &ctx,
2026+ };
2027+ struct gguf_context * ctx_gguf = gguf_init_from_file (path.c_str (), params);
2028+ if (!ctx_gguf) {
2029+ fprintf (stderr, " %s: failed to load control vector from %s\n " , __func__, path.c_str ());
2030+ ggml_free (ctx);
2031+ return std::make_tuple (std::vector<float >(), -1 );
2032+ }
2033+
2034+ std::vector<float > vector;
2035+ for (uint32_t i = 1 ; i < max_direction_layer; i++) {
2036+ std::string name = " direction." + std::to_string (i);
2037+ ggml_tensor * tensor = ggml_get_tensor (ctx, name.c_str ());
2038+ if (tensor) {
2039+ const float * data = (const float *) tensor->data ;
2040+ for (int i = 0 ; i < n_embd; i++) {
2041+ vector.push_back (data[i] * strength);
2042+ }
2043+ } else {
2044+ vector.insert (vector.end (), n_embd, 0 .); // as a filler
2045+ }
2046+ }
2047+
2048+ return std::make_tuple (vector, n_embd);
2049+ }
2050+
2051+ std::tuple<std::vector<float >, int > llama_control_vector_load (const std::vector<std::tuple<std::string, float >> & vectors) {
2052+ std::vector<float > vector;
2053+ int n_embd = -1 ;
2054+
2055+ for (const auto & pair : vectors) {
2056+ std::string path;
2057+ float strength;
2058+ std::tie (path, strength) = pair;
2059+
2060+ std::vector<float > v;
2061+ int v_n_embd;
2062+ std::tie (v, v_n_embd) = llama_control_vector_load_one (path, strength);
2063+
2064+ if (v_n_embd == -1 ) {
2065+ return std::make_tuple (std::vector<float >(), -1 );
2066+ }
2067+ if (n_embd != -1 && (n_embd != v_n_embd || v.size () != vector.size ())) {
2068+ fprintf (stderr, " %s: control vector in %s does not match previous vector dimensions\n " , __func__, path.c_str ());
2069+ return std::make_tuple (std::vector<float >(), -1 );
2070+ }
2071+
2072+ if (n_embd == -1 ) {
2073+ vector = std::move (v);
2074+ n_embd = v_n_embd;
2075+ } else {
2076+ for (size_t i = 0 ; i < vector.size (); i++) {
2077+ vector[i] += v[i];
2078+ }
2079+ }
2080+ }
2081+
2082+ if (n_embd == -1 ) {
2083+ fprintf (stderr, " %s: no vectors passed\n " , __func__);
2084+ }
2085+ return std::make_tuple (vector, n_embd);
2086+ }
0 commit comments