2929from typing import TYPE_CHECKING , Optional , Dict , Any , cast
3030from collections import defaultdict
3131
32- from chromadb .api .types import Where
32+ from chromadb .api .types import OneOrMany , Where , maybe_cast_one_to_many
3333
3434if TYPE_CHECKING :
3535 from chromadb .api .models .Collection import Collection
@@ -121,7 +121,9 @@ def detach_statistics_function(
121121
122122
123123def get_statistics (
124- collection : "Collection" , stats_collection_name : str , key : Optional [str ] = None
124+ collection : "Collection" ,
125+ stats_collection_name : str ,
126+ keys : Optional [OneOrMany [str ]] = None ,
125127) -> Dict [str , Any ]:
126128 """Get the current statistics for a collection.
127129
@@ -131,8 +133,9 @@ def get_statistics(
131133 Args:
132134 collection: The collection to get statistics for
133135 stats_collection_name: Name of the statistics collection to read from.
134- key: Optional metadata key to filter statistics for. If provided,
135- only returns statistics for that specific key.
136+ keys: Optional metadata key(s) to filter statistics for. Can be a single key
137+ string or a list of keys. If provided, only returns statistics for
138+ those specific keys.
136139
137140 Returns:
138141 Dict[str, Any]: A dictionary with the structure:
@@ -174,7 +177,22 @@ def get_statistics(
174177 "total_count": 2
175178 }
176179 }
180+
181+ Raises:
182+ ValueError: If more than 30 keys are provided in the keys filter.
177183 """
184+ # Normalize keys to list
185+ keys_list = maybe_cast_one_to_many (keys )
186+
187+ # Validate keys count to avoid issues with large $in queries
188+ MAX_KEYS = 30
189+ if keys_list is not None and len (keys_list ) > MAX_KEYS :
190+ raise ValueError (
191+ f"Too many keys provided: { len (keys_list )} . "
192+ f"Maximum allowed is { MAX_KEYS } keys per request. "
193+ "Consider calling get_statistics multiple times with smaller key batches."
194+ )
195+
178196 # Import here to avoid circular dependency
179197 from chromadb .api .models .Collection import Collection
180198
@@ -198,10 +216,10 @@ def get_statistics(
198216 summary : Dict [str , Any ] = {}
199217
200218 offset = 0
201- # When filtering by key , also include "summary" entries to get total_count
219+ # When filtering by keys , also include "summary" entries to get total_count
202220 where_filter : Optional [Where ] = (
203- cast (Where , {"$or " : [{ "key " : key }, { "key" : "summary" }] })
204- if key is not None
221+ cast (Where , {"key " : { "$in " : keys_list + [ "summary" ]} })
222+ if keys_list is not None
205223 else None
206224 )
207225
0 commit comments