@@ -1393,32 +1393,26 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
13931393 if (layer_start == 0 ) layer_start = 1 ;
13941394 if (layer_end == 0 ) layer_end = 31 ;
13951395
1396- struct llama_control_vector * vector = nullptr ;
1397-
1398- for (const auto & t : params.control_vectors ) {
1399- std::string path;
1400- float strength;
1401- std::tie (path, strength) = t;
1402-
1403- fprintf (stderr, " %s: loading control vector from %s\n " , __func__, path.c_str ());
1404- struct llama_control_vector * temp = llama_control_vector_load (path.c_str ());
1405- if (temp == nullptr ) {
1406- fprintf (stderr, " %s: error: failed to load control vector from %s\n " , __func__, path.c_str ());
1407- llama_free (lctx);
1408- llama_free_model (model);
1409- return std::make_tuple (nullptr , nullptr );
1410- }
1411- llama_control_vector_scale (temp, strength);
1412-
1413- if (vector == nullptr ) {
1414- vector = temp;
1415- } else {
1416- llama_control_vector_add (vector, temp);
1417- llama_control_vector_free (temp);
1418- }
1396+ std::vector<float > control_vector;
1397+ int n_embd;
1398+ std::tie (control_vector, n_embd) = llama_control_vector_load (params.control_vectors );
1399+ if (n_embd == -1 ) {
1400+ llama_free (lctx);
1401+ llama_free_model (model);
1402+ return std::make_tuple (nullptr , nullptr );
14191403 }
14201404
1421- llama_apply_control_vector (lctx, vector, layer_start, layer_end);
1405+ int err = llama_control_vector_apply (lctx,
1406+ control_vector.data (),
1407+ control_vector.size (),
1408+ n_embd,
1409+ layer_start,
1410+ layer_end);
1411+ if (err) {
1412+ llama_free (lctx);
1413+ llama_free_model (model);
1414+ return std::make_tuple (nullptr , nullptr );
1415+ }
14221416 }
14231417
14241418 for (unsigned int i = 0 ; i < params.lora_adapter .size (); ++i) {
@@ -1937,3 +1931,156 @@ void llama_embd_normalize(const float * inp, float * out, int n) {
19371931 }
19381932}
19391933
1934+ //
1935+ // Control vector utils
1936+ //
1937+
1938+ static std::tuple<std::vector<float >, int > llama_control_vector_load_one (const std::string & path, float strength) {
1939+ int n_tensors;
1940+ size_t n_bytes = 0 ;
1941+ uint32_t max_direction_layer = 0 ;
1942+ int n_embd = -1 ;
1943+
1944+ // calculate size of ctx needed for tensors, ensure tensors are f32, and find max layer
1945+ {
1946+ struct ggml_init_params meta_params = {
1947+ /* .mem_size = */ ggml_tensor_overhead () * 128 + ggml_graph_overhead (),
1948+ /* .mem_buffer = */ nullptr ,
1949+ /* .no_alloc = */ true ,
1950+ };
1951+ ggml_context * meta_ctx = ggml_init (meta_params);
1952+ struct gguf_init_params meta_gguf_params = {
1953+ /* .no_alloc = */ true ,
1954+ /* .ctx = */ &meta_ctx,
1955+ };
1956+ struct gguf_context * meta_ctx_gguf = gguf_init_from_file (path.c_str (), meta_gguf_params);
1957+ if (!meta_ctx_gguf) {
1958+ fprintf (stderr, " %s: failed to load control vector from %s\n " , __func__, path.c_str ());
1959+ ggml_free (meta_ctx);
1960+ return std::make_tuple (std::vector<float >(), -1 );
1961+ }
1962+
1963+ n_tensors = gguf_get_n_tensors (meta_ctx_gguf);
1964+ for (int i = 0 ; i < n_tensors; i++) {
1965+ std::string name = gguf_get_tensor_name (meta_ctx_gguf, i);
1966+
1967+ // split on '.'
1968+ size_t dotpos = name.find (' .' );
1969+ if (dotpos != std::string::npos && name.substr (0 , dotpos) == " direction" ) {
1970+ try {
1971+ uint32_t layer = std::stoi (name.substr (dotpos + 1 ));
1972+ if (layer == 0 ) {
1973+ fprintf (stderr, " %s: direction tensor invalid in %s\n " , __func__, path.c_str ());
1974+ ggml_free (meta_ctx);
1975+ gguf_free (meta_ctx_gguf);
1976+ return std::make_tuple (std::vector<float >(), -1 );
1977+ }
1978+ if (layer > max_direction_layer) {
1979+ max_direction_layer = layer;
1980+ }
1981+ } catch (...) {
1982+ fprintf (stderr, " %s: direction tensor invalid in %s\n " , __func__, path.c_str ());
1983+ ggml_free (meta_ctx);
1984+ gguf_free (meta_ctx_gguf);
1985+ return std::make_tuple (std::vector<float >(), -1 );
1986+ }
1987+ }
1988+
1989+ struct ggml_tensor * tensor_meta = ggml_get_tensor (meta_ctx, name.c_str ());
1990+ if (tensor_meta->type != GGML_TYPE_F32 || ggml_n_dims (tensor_meta) != 1 ) {
1991+ fprintf (stderr, " %s: direction tensor invalid in %s\n " , __func__, path.c_str ());
1992+ ggml_free (meta_ctx);
1993+ gguf_free (meta_ctx_gguf);
1994+ return std::make_tuple (std::vector<float >(), -1 );
1995+ }
1996+ if (n_embd == -1 ) {
1997+ n_embd = ggml_nelements (tensor_meta);
1998+ } else if (ggml_nelements (tensor_meta) != n_embd) {
1999+ fprintf (stderr, " %s: direction tensor sizes mismatched in %s\n " , __func__, path.c_str ());
2000+ ggml_free (meta_ctx);
2001+ gguf_free (meta_ctx_gguf);
2002+ return std::make_tuple (std::vector<float >(), -1 );
2003+ }
2004+ n_bytes += ggml_nbytes (tensor_meta);
2005+ }
2006+ ggml_free (meta_ctx);
2007+ gguf_free (meta_ctx_gguf);
2008+ }
2009+
2010+ if (n_tensors == 0 ) {
2011+ fprintf (stderr, " %s: no direction tensors found in %s\n " , __func__, path.c_str ());
2012+ return std::make_tuple (std::vector<float >(), -1 );
2013+ }
2014+
2015+ // load and scale tensors into final control vector context
2016+ struct ggml_init_params ggml_params = {
2017+ /* .mem_size = */ ggml_tensor_overhead () * n_tensors + n_bytes,
2018+ /* .mem_buffer = */ nullptr ,
2019+ /* .no_alloc = */ false ,
2020+ };
2021+ struct ggml_context * ctx = ggml_init (ggml_params);
2022+
2023+ struct gguf_init_params params = {
2024+ /* .no_alloc = */ false ,
2025+ /* .ctx = */ &ctx,
2026+ };
2027+ struct gguf_context * ctx_gguf = gguf_init_from_file (path.c_str (), params);
2028+ if (!ctx_gguf) {
2029+ fprintf (stderr, " %s: failed to load control vector from %s\n " , __func__, path.c_str ());
2030+ ggml_free (ctx);
2031+ return std::make_tuple (std::vector<float >(), -1 );
2032+ }
2033+
2034+ std::vector<float > vector;
2035+ for (uint32_t i = 1 ; i < max_direction_layer; i++) {
2036+ std::string name = " direction." + std::to_string (i);
2037+ ggml_tensor * tensor = ggml_get_tensor (ctx, name.c_str ());
2038+ if (tensor) {
2039+ const float * data = (const float *) tensor->data ;
2040+ for (int i = 0 ; i < n_embd; i++) {
2041+ vector.push_back (data[i] * strength);
2042+ }
2043+ } else {
2044+ vector.insert (vector.end (), n_embd, 0 .); // as a filler
2045+ }
2046+ }
2047+
2048+ return std::make_tuple (vector, n_embd);
2049+ }
2050+
2051+ std::tuple<std::vector<float >, int > llama_control_vector_load (const std::vector<std::tuple<std::string, float >> & vectors) {
2052+ std::vector<float > vector;
2053+ int n_embd = -1 ;
2054+
2055+ for (const auto & pair : vectors) {
2056+ std::string path;
2057+ float strength;
2058+ std::tie (path, strength) = pair;
2059+
2060+ std::vector<float > v;
2061+ int v_n_embd;
2062+ std::tie (v, v_n_embd) = llama_control_vector_load_one (path, strength);
2063+
2064+ if (v_n_embd == -1 ) {
2065+ return std::make_tuple (std::vector<float >(), -1 );
2066+ }
2067+ if (n_embd != -1 && (n_embd != v_n_embd || v.size () != vector.size ())) {
2068+ fprintf (stderr, " %s: control vector in %s does not match previous vector dimensions\n " , __func__, path.c_str ());
2069+ return std::make_tuple (std::vector<float >(), -1 );
2070+ }
2071+
2072+ if (n_embd == -1 ) {
2073+ vector = std::move (v);
2074+ n_embd = v_n_embd;
2075+ } else {
2076+ for (size_t i = 0 ; i < vector.size (); i++) {
2077+ vector[i] += v[i];
2078+ }
2079+ }
2080+ }
2081+
2082+ if (n_embd == -1 ) {
2083+ fprintf (stderr, " %s: no vectors passed\n " , __func__);
2084+ }
2085+ return std::make_tuple (vector, n_embd);
2086+ }
0 commit comments