@@ -21,6 +21,7 @@ use anyhow::Context;
2121use futures:: future;
2222use futures:: future:: try_join_all;
2323use itertools:: Itertools ;
24+ use once_cell:: sync:: OnceCell ;
2425use quickwit_common:: shared_consts:: SPLIT_FIELDS_FILE_NAME ;
2526use quickwit_common:: uri:: Uri ;
2627use quickwit_metastore:: SplitMetadata ;
@@ -37,6 +38,19 @@ use crate::search_job_placer::group_jobs_by_index_id;
3738use crate :: service:: SearcherContext ;
3839use crate :: { ClusterClient , SearchError , SearchJob , list_relevant_splits, resolve_index_patterns} ;
3940
41+ /// QW_FIELD_LIST_SIZE_LIMIT defines a hard limit on the number of fields that
42+ /// can be returned (error otherwise).
43+ ///
44+ /// Having many fields can happen when a user is creating fields dynamically in
45+ /// a JSON type with random field names. This leads to huge memory consumption
46+ /// when building the response. This is a workaround until a way is found to
47+ /// prune the long tail of rare fields.
48+ fn get_field_list_size_limit ( ) -> usize {
49+ static FIELD_LIST_SIZE_LIMIT : OnceCell < usize > = OnceCell :: new ( ) ;
50+ * FIELD_LIST_SIZE_LIMIT
51+ . get_or_init ( || quickwit_common:: get_from_env ( "QW_FIELD_LIST_SIZE_LIMIT" , 100_000 ) )
52+ }
53+
4054/// Get the list of splits for the request which we need to scan.
4155pub async fn get_fields_from_split (
4256 searcher_context : & SearcherContext ,
@@ -184,6 +198,12 @@ fn merge_leaf_list_fields(
184198 flush_group ( & mut responses, & mut current_group) ;
185199 }
186200 }
201+ if responses. len ( ) >= get_field_list_size_limit ( ) {
202+ return Err ( SearchError :: Internal ( format ! (
203+ "list fields response exceeded {} fields" ,
204+ get_field_list_size_limit( )
205+ ) ) ) ;
206+ }
187207 current_group. push ( entry) ;
188208 }
189209 if !current_group. is_empty ( ) {
0 commit comments