From c488a390e8ab19a3c8267b0f70f7b1802a1cea95 Mon Sep 17 00:00:00 2001 From: Andrzej Pijanowski Date: Wed, 19 Nov 2025 14:26:06 +0100 Subject: [PATCH 1/6] feat: add queryables cache and optional validation for search parameters --- stac_fastapi/core/stac_fastapi/core/core.py | 15 ++ .../stac_fastapi/sfeos_helpers/queryables.py | 131 +++++++++++++ .../tests/api/test_api_query_validation.py | 67 +++++++ stac_fastapi/tests/data/test_collection.json | 5 + .../tests/sfeos_helpers/test_queryables.py | 178 ++++++++++++++++++ 5 files changed, 396 insertions(+) create mode 100644 stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py create mode 100644 stac_fastapi/tests/api/test_api_query_validation.py create mode 100644 stac_fastapi/tests/sfeos_helpers/test_queryables.py diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py index 56d2333fc..f4d1075f2 100644 --- a/stac_fastapi/core/stac_fastapi/core/core.py +++ b/stac_fastapi/core/stac_fastapi/core/core.py @@ -39,6 +39,11 @@ BulkTransactionMethod, Items, ) +from stac_fastapi.sfeos_helpers.queryables import ( + get_properties_from_cql2_filter, + initialize_queryables_cache, + validate_queryables, +) from stac_fastapi.types import stac as stac_types from stac_fastapi.types.conformance import BASE_CONFORMANCE_CLASSES from stac_fastapi.types.core import AsyncBaseCoreClient @@ -88,6 +93,10 @@ class CoreClient(AsyncBaseCoreClient): title: str = attr.ib(default="stac-fastapi") description: str = attr.ib(default="stac-fastapi") + def __attrs_post_init__(self): + """Initialize the queryables cache.""" + initialize_queryables_cache(self.database) + def _landing_page( self, base_url: str, @@ -815,6 +824,8 @@ async def post_search( ) if hasattr(search_request, "query") and getattr(search_request, "query"): + query_fields = set(getattr(search_request, "query").keys()) + await validate_queryables(query_fields) for field_name, expr in getattr(search_request, "query").items(): field = "properties__" + field_name for op, value in expr.items(): @@ -833,7 +844,11 @@ async def post_search( if cql2_filter is not None: try: + query_fields = get_properties_from_cql2_filter(cql2_filter) + await validate_queryables(query_fields) search = await self.database.apply_cql2_filter(search, cql2_filter) + except HTTPException: + raise except Exception as e: raise HTTPException( status_code=400, detail=f"Error with cql2 filter: {e}" diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py new file mode 100644 index 000000000..fb1065b3e --- /dev/null +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py @@ -0,0 +1,131 @@ +"""A module for managing queryable attributes.""" + +import asyncio +import os +import time +from typing import Any, Dict, List, Optional, Set + +from fastapi import HTTPException + +from stac_fastapi.core.base_database_logic import BaseDatabaseLogic + + +class QueryablesCache: + """A thread-safe, time-based cache for queryable properties.""" + + def __init__(self, database_logic: Any): + """ + Initialize the QueryablesCache. + + Args: + database_logic: An instance of a class with a `get_queryables_mapping` method. + """ + self._db_logic = database_logic + self._cache: Dict[str, List[str]] = {} + self._all_queryables: Set[str] = set() + self._last_updated: float = 0 + self._lock = asyncio.Lock() + self.validation_enabled: bool = False + self.cache_ttl: int = 3600 # How often to refresh cache (in seconds) + self.reload_settings() + + def reload_settings(self): + """Reload settings from environment variables.""" + self.validation_enabled = ( + os.getenv("VALIDATE_QUERYABLES", "false").lower() == "true" + ) + self.cache_ttl = int(os.getenv("QUERYABLES_CACHE_TTL", "3600")) + + async def _update_cache(self): + """Update the cache with the latest queryables from the database.""" + if not self.validation_enabled: + return + + async with self._lock: + if (time.time() - self._last_updated < self.cache_ttl) and self._cache: + return + + queryables_mapping = await self._db_logic.get_queryables_mapping() + all_queryables_set = set(queryables_mapping.keys()) + + self._all_queryables = all_queryables_set + + self._cache = {"*": list(all_queryables_set)} + self._last_updated = time.time() + + async def get_all_queryables(self) -> Set[str]: + """ + Return a set of all queryable attributes across all collections. + + This method will update the cache if it's stale or has been cleared. + """ + if not self.validation_enabled: + return set() + + if (time.time() - self._last_updated >= self.cache_ttl) or not self._cache: + await self._update_cache() + return self._all_queryables + + async def validate(self, fields: Set[str]) -> None: + """ + Validate if the provided fields are queryable. + + Raises HTTPException if invalid fields are found. + """ + if not self.validation_enabled: + return + + allowed_fields = await self.get_all_queryables() + invalid_fields = fields - allowed_fields + if invalid_fields: + raise HTTPException( + status_code=400, + detail=f"Invalid query fields: {', '.join(invalid_fields)}. Allowed fields are: {', '.join(allowed_fields)}", + ) + + +_queryables_cache_instance: Optional[QueryablesCache] = None + + +def initialize_queryables_cache(database_logic: BaseDatabaseLogic): + """ + Initialize the global queryables cache. + + :param database_logic: An instance of DatabaseLogic. + """ + global _queryables_cache_instance + if _queryables_cache_instance is None: + _queryables_cache_instance = QueryablesCache(database_logic) + + +async def all_queryables() -> Set[str]: + """Get all queryable properties from the cache.""" + if _queryables_cache_instance is None: + raise Exception("Queryables cache not initialized.") + return await _queryables_cache_instance.get_all_queryables() + + +async def validate_queryables(fields: Set[str]) -> None: + """Validate if the provided fields are queryable.""" + if _queryables_cache_instance is None: + return + await _queryables_cache_instance.validate(fields) + + +def reload_queryables_settings(): + """Reload queryables settings from environment variables.""" + if _queryables_cache_instance: + _queryables_cache_instance.reload_settings() + + +def get_properties_from_cql2_filter(cql2_filter: Dict[str, Any]) -> Set[str]: + """Recursively extract property names from a CQL2 filter.""" + props: Set[str] = set() + if "op" in cql2_filter and "args" in cql2_filter: + for arg in cql2_filter["args"]: + if isinstance(arg, dict): + if "op" in arg: + props.update(get_properties_from_cql2_filter(arg)) + elif "property" in arg: + props.add(arg["property"]) + return props diff --git a/stac_fastapi/tests/api/test_api_query_validation.py b/stac_fastapi/tests/api/test_api_query_validation.py new file mode 100644 index 000000000..e926699b5 --- /dev/null +++ b/stac_fastapi/tests/api/test_api_query_validation.py @@ -0,0 +1,67 @@ +import json +import os +from unittest import mock + +import pytest + +from stac_fastapi.sfeos_helpers.queryables import reload_queryables_settings + + +@pytest.fixture(autouse=True) +def enable_validation(): + with mock.patch.dict(os.environ, {"VALIDATE_QUERYABLES": "true"}): + reload_queryables_settings() + yield + reload_queryables_settings() + + +@pytest.mark.asyncio +async def test_search_post_query_valid_param(app_client, ctx): + """Test POST /search with a valid query parameter""" + query = {"query": {"eo:cloud_cover": {"lt": 10}}} + resp = await app_client.post("/search", json=query) + assert resp.status_code == 200 + + +@pytest.mark.asyncio +async def test_search_post_query_invalid_param(app_client, ctx): + """Test POST /search with an invalid query parameter""" + query = {"query": {"invalid_param": {"eq": "test"}}} + resp = await app_client.post("/search", json=query) + assert resp.status_code == 400 + resp_json = resp.json() + assert "Invalid query fields: invalid_param" in resp_json["detail"] + + +@pytest.mark.asyncio +async def test_item_collection_get_filter_valid_param(app_client, ctx): + """Test GET /collections/{collection_id}/items with a valid filter parameter""" + collection_id = ctx.item["collection"] + filter_body = { + "op": "<", + "args": [{"property": "eo:cloud_cover"}, 10], + } + params = { + "filter-lang": "cql2-json", + "filter": json.dumps(filter_body), + } + resp = await app_client.get(f"/collections/{collection_id}/items", params=params) + assert resp.status_code == 200 + + +@pytest.mark.asyncio +async def test_item_collection_get_filter_invalid_param(app_client, ctx): + """Test GET /collections/{collection_id}/items with an invalid filter parameter""" + collection_id = ctx.item["collection"] + filter_body = { + "op": "=", + "args": [{"property": "invalid_param"}, "test"], + } + params = { + "filter-lang": "cql2-json", + "filter": json.dumps(filter_body), + } + resp = await app_client.get(f"/collections/{collection_id}/items", params=params) + assert resp.status_code == 400 + resp_json = resp.json() + assert "Invalid query fields: invalid_param" in resp_json["detail"] diff --git a/stac_fastapi/tests/data/test_collection.json b/stac_fastapi/tests/data/test_collection.json index 32a7d254b..dda5b8a0e 100644 --- a/stac_fastapi/tests/data/test_collection.json +++ b/stac_fastapi/tests/data/test_collection.json @@ -6,6 +6,11 @@ "type": "Collection", "description": "Landat 8 imagery radiometrically calibrated and orthorectified using gound points and Digital Elevation Model (DEM) data to correct relief displacement.", "stac_version": "1.0.0", + "queryables": { + "eo:cloud_cover": { + "$ref": "https://stac-extensions.github.io/eo/v1.0.0/schema.json#/definitions/fields/properties/eo:cloud_cover" + } + }, "license": "PDDL-1.0", "summaries": { "platform": [ diff --git a/stac_fastapi/tests/sfeos_helpers/test_queryables.py b/stac_fastapi/tests/sfeos_helpers/test_queryables.py new file mode 100644 index 000000000..3ff882254 --- /dev/null +++ b/stac_fastapi/tests/sfeos_helpers/test_queryables.py @@ -0,0 +1,178 @@ +import os +import time +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from fastapi import HTTPException + +import stac_fastapi.sfeos_helpers.queryables as queryables_module +from stac_fastapi.sfeos_helpers.queryables import ( + QueryablesCache, + all_queryables, + get_properties_from_cql2_filter, + initialize_queryables_cache, + reload_queryables_settings, + validate_queryables, +) + + +class TestQueryablesCache: + @pytest.fixture + def mock_db_logic(self): + db_logic = MagicMock() + db_logic.get_queryables_mapping = AsyncMock( + return_value={"prop1": "type1", "prop2": "type2"} + ) + return db_logic + + @pytest.fixture + def queryables_cache(self, mock_db_logic): + with patch.dict( + os.environ, {"VALIDATE_QUERYABLES": "true", "QUERYABLES_CACHE_TTL": "60"} + ): + cache = QueryablesCache(mock_db_logic) + return cache + + def test_init(self, mock_db_logic): + with patch.dict( + os.environ, {"VALIDATE_QUERYABLES": "true", "QUERYABLES_CACHE_TTL": "120"} + ): + cache = QueryablesCache(mock_db_logic) + assert cache.validation_enabled is True + assert cache.cache_ttl == 120 + + def test_reload_settings(self, queryables_cache): + with patch.dict( + os.environ, {"VALIDATE_QUERYABLES": "false", "QUERYABLES_CACHE_TTL": "300"} + ): + queryables_cache.reload_settings() + assert queryables_cache.validation_enabled is False + assert queryables_cache.cache_ttl == 300 + + @pytest.mark.asyncio + async def test_get_all_queryables_updates_cache( + self, queryables_cache, mock_db_logic + ): + queryables = await queryables_cache.get_all_queryables() + assert queryables == {"prop1", "prop2"} + mock_db_logic.get_queryables_mapping.assert_called_once() + + @pytest.mark.asyncio + async def test_get_all_queryables_uses_cache(self, queryables_cache, mock_db_logic): + await queryables_cache.get_all_queryables() + mock_db_logic.get_queryables_mapping.assert_called_once() + + # Should use cache now + await queryables_cache.get_all_queryables() + mock_db_logic.get_queryables_mapping.assert_called_once() + + @pytest.mark.asyncio + async def test_get_all_queryables_refresh_after_ttl( + self, queryables_cache, mock_db_logic + ): + await queryables_cache.get_all_queryables() + mock_db_logic.get_queryables_mapping.assert_called_once() + + # Simulate time passing + queryables_cache._last_updated = time.time() - queryables_cache.cache_ttl - 1 + + await queryables_cache.get_all_queryables() + assert mock_db_logic.get_queryables_mapping.call_count == 2 + + @pytest.mark.asyncio + async def test_get_all_queryables_disabled(self, queryables_cache): + queryables_cache.validation_enabled = False + queryables = await queryables_cache.get_all_queryables() + assert queryables == set() + + @pytest.mark.asyncio + async def test_validate_valid_fields(self, queryables_cache): + await queryables_cache.validate({"prop1"}) + + @pytest.mark.asyncio + async def test_validate_invalid_fields(self, queryables_cache): + with pytest.raises(HTTPException) as excinfo: + await queryables_cache.validate({"invalid_prop"}) + assert excinfo.value.status_code == 400 + assert "Invalid query fields: invalid_prop" in str(excinfo.value.detail) + + @pytest.mark.asyncio + async def test_validate_disabled(self, queryables_cache): + queryables_cache.validation_enabled = False + await queryables_cache.validate({"invalid_prop"}) + + +class TestGlobalFunctions: + @pytest.fixture(autouse=True) + def reset_global_cache(self): + original = queryables_module._queryables_cache_instance + queryables_module._queryables_cache_instance = None + yield + + queryables_module._queryables_cache_instance = original + + def test_initialize_queryables_cache(self): + db_logic = MagicMock() + initialize_queryables_cache(db_logic) + assert queryables_module._queryables_cache_instance is not None + assert queryables_module._queryables_cache_instance._db_logic == db_logic + + @pytest.mark.asyncio + async def test_all_queryables_not_initialized(self): + with pytest.raises(Exception) as excinfo: + await all_queryables() + assert "Queryables cache not initialized" in str(excinfo.value) + + @pytest.mark.asyncio + async def test_all_queryables(self): + db_logic = MagicMock() + db_logic.get_queryables_mapping = AsyncMock(return_value={"p1": "t1"}) + initialize_queryables_cache(db_logic) + + queryables_module._queryables_cache_instance.validation_enabled = True + + res = await all_queryables() + assert res == {"p1"} + + @pytest.mark.asyncio + async def test_validate_queryables(self): + db_logic = MagicMock() + db_logic.get_queryables_mapping = AsyncMock(return_value={"p1": "t1"}) + initialize_queryables_cache(db_logic) + queryables_module._queryables_cache_instance.validation_enabled = True + + await validate_queryables({"p1"}) + + with pytest.raises(HTTPException): + await validate_queryables({"invalid"}) + + def test_reload_queryables_settings(self): + db_logic = MagicMock() + initialize_queryables_cache(db_logic) + + with patch.dict(os.environ, {"VALIDATE_QUERYABLES": "false"}): + reload_queryables_settings() + assert ( + queryables_module._queryables_cache_instance.validation_enabled is False + ) + + +def test_get_properties_from_cql2_filter(): + # Simple prop + cql2 = {"op": "=", "args": [{"property": "prop1"}, "value"]} + props = get_properties_from_cql2_filter(cql2) + assert props == {"prop1"} + + # Nested props + cql2_nested = { + "op": "and", + "args": [ + {"op": "=", "args": [{"property": "prop1"}, "v1"]}, + {"op": "<", "args": [{"property": "prop2"}, 10]}, + ], + } + props = get_properties_from_cql2_filter(cql2_nested) + assert props == {"prop1", "prop2"} + + # Empty/invalid + assert get_properties_from_cql2_filter({}) == set() From 7434493db28b7a4d0a9c258d2e31c3f46be150f4 Mon Sep 17 00:00:00 2001 From: Andrzej Pijanowski Date: Wed, 19 Nov 2025 14:56:36 +0100 Subject: [PATCH 2/6] fix: remove unnecessary info from error message for invalid query fields in QueryablesCache --- .../sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py index fb1065b3e..2eb4d99c7 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py @@ -80,7 +80,7 @@ async def validate(self, fields: Set[str]) -> None: if invalid_fields: raise HTTPException( status_code=400, - detail=f"Invalid query fields: {', '.join(invalid_fields)}. Allowed fields are: {', '.join(allowed_fields)}", + detail=f"Invalid query fields: {', '.join(invalid_fields)}.", ) From 15ad1b9f1922a5be5f25c9db07b46040a069f2c6 Mon Sep 17 00:00:00 2001 From: Andrzej Pijanowski Date: Wed, 19 Nov 2025 15:19:21 +0100 Subject: [PATCH 3/6] feat: add queryables validation and cache configuration to README --- README.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/README.md b/README.md index f2a7f498e..6b041f942 100644 --- a/README.md +++ b/README.md @@ -369,6 +369,8 @@ You can customize additional settings in your `.env` file: | `USE_DATETIME_NANOS` | Enables nanosecond precision handling for `datetime` field searches as per the `date_nanos` type. When `False`, it uses 3 millisecond precision as per the type `date`. | `true` | Optional | | `EXCLUDED_FROM_QUERYABLES` | Comma-separated list of fully qualified field names to exclude from the queryables endpoint and filtering. Use full paths like `properties.auth:schemes,properties.storage:schemes`. Excluded fields and their nested children will not be exposed in queryables. | None | Optional | | `EXCLUDED_FROM_ITEMS` | Specifies fields to exclude from STAC item responses. Supports comma-separated field names and dot notation for nested fields (e.g., `private_data,properties.confidential,assets.internal`). | `None` | Optional | +| `VALIDATE_QUERYABLES` | Enable validation of query parameters against the collection's queryables. If set to `true`, the API will reject queries containing fields that are not defined in the collection's queryables. | `false` | Optional | +| `QUERYABLES_CACHE_TTL` | Time-to-live (in seconds) for the queryables cache. Used when `VALIDATE_QUERYABLES` is enabled. | `3600` | Optional | > [!NOTE] @@ -424,6 +426,28 @@ EXCLUDED_FROM_QUERYABLES="properties.auth:schemes,properties.storage:schemes,pro - Excluded fields and their nested children will be skipped during field traversal - Both the field itself and any nested properties will be excluded +## Queryables Validation + +SFEOS supports validating query parameters against the collection's defined queryables. This ensures that users only query fields that are explicitly exposed and indexed. + +**Configuration:** + +To enable queryables validation, set the following environment variables: + +```bash +VALIDATE_QUERYABLES=true +QUERYABLES_CACHE_TTL=3600 # Optional, defaults to 3600 seconds (1 hour) +``` + +**Behavior:** + +- When enabled, the API maintains a cache of all queryable fields across all collections. +- Search requests (both GET and POST) are checked against this cache. +- If a request contains a query parameter or filter field that is not in the list of allowed queryables, the API returns a `400 Bad Request` error with a message indicating the invalid field(s). +- The cache is automatically refreshed based on the `QUERYABLES_CACHE_TTL` setting. + +This feature helps prevent queries on unindexed fields which could lead to poor performance or unexpected results. + ## Datetime-Based Index Management ### Overview From 5de02fcb14af9152020f9aaae32dbd7e7f814f76 Mon Sep 17 00:00:00 2001 From: Andrzej Pijanowski Date: Thu, 20 Nov 2025 09:46:43 +0100 Subject: [PATCH 4/6] Update CHANGELOG.md --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e66e2b972..124568b61 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added +- Environment variable `VALIDATE_QUERYABLES` to enable/disable validation of queryables in search/filter requests. When set to `true`, search requests will be validated against the defined queryables, returning an error for any unsupported fields. Defaults to `false` for backward compatibility.[#532](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/532) + +- Environment variable `QUERYABLES_CACHE_TTL` to configure the TTL (in seconds) for caching queryables. Default is `3600` seconds (1 hour) to balance performance and freshness of queryables data. [#532](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/532) + - Added retry with back-off logic for Redis related functions. [#528](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/528) + - Added nanosecond precision datetime filtering that ensures nanosecond precision support in filtering by datetime. This is configured via the `USE_DATETIME_NANOS` environment variable, while maintaining microseconds compatibility for datetime precision. [#529](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/529) ### Changed From 3c3b5cb6cf83ec3e5574edabe85b6cbc1b34f028 Mon Sep 17 00:00:00 2001 From: Andrzej Pijanowski Date: Thu, 27 Nov 2025 11:04:50 +0100 Subject: [PATCH 5/6] feat: add support for excluding queryables from validation --- README.md | 3 +- .../stac_fastapi/sfeos_helpers/queryables.py | 15 ++++++++++ .../tests/api/test_api_query_validation.py | 29 +++++++++++++++++++ 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6b041f942..19f184d8a 100644 --- a/README.md +++ b/README.md @@ -445,8 +445,9 @@ QUERYABLES_CACHE_TTL=3600 # Optional, defaults to 3600 seconds (1 hour) - Search requests (both GET and POST) are checked against this cache. - If a request contains a query parameter or filter field that is not in the list of allowed queryables, the API returns a `400 Bad Request` error with a message indicating the invalid field(s). - The cache is automatically refreshed based on the `QUERYABLES_CACHE_TTL` setting. +- **Interaction with `EXCLUDED_FROM_QUERYABLES`**: If `VALIDATE_QUERYABLES` is enabled, fields listed in `EXCLUDED_FROM_QUERYABLES` will also be considered invalid for filtering. This effectively enforces the exclusion of these fields from search queries. -This feature helps prevent queries on unindexed fields which could lead to poor performance or unexpected results. +This feature helps prevent queries on non-queryable fields which could lead to unnecessary load on the database. ## Datetime-Based Index Management diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py index 2eb4d99c7..c7fa56b32 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py @@ -27,6 +27,7 @@ def __init__(self, database_logic: Any): self._lock = asyncio.Lock() self.validation_enabled: bool = False self.cache_ttl: int = 3600 # How often to refresh cache (in seconds) + self.excluded_queryables: Set[str] = set() self.reload_settings() def reload_settings(self): @@ -36,6 +37,17 @@ def reload_settings(self): ) self.cache_ttl = int(os.getenv("QUERYABLES_CACHE_TTL", "3600")) + excluded = os.getenv("EXCLUDED_FROM_QUERYABLES", "") + self.excluded_queryables = set() + if excluded: + for field in excluded.split(","): + field = field.strip() + if field: + # Remove 'properties.' prefix if present + if field.startswith("properties."): + field = field[11:] + self.excluded_queryables.add(field) + async def _update_cache(self): """Update the cache with the latest queryables from the database.""" if not self.validation_enabled: @@ -48,6 +60,9 @@ async def _update_cache(self): queryables_mapping = await self._db_logic.get_queryables_mapping() all_queryables_set = set(queryables_mapping.keys()) + if self.excluded_queryables: + all_queryables_set = all_queryables_set - self.excluded_queryables + self._all_queryables = all_queryables_set self._cache = {"*": list(all_queryables_set)} diff --git a/stac_fastapi/tests/api/test_api_query_validation.py b/stac_fastapi/tests/api/test_api_query_validation.py index e926699b5..ba8239003 100644 --- a/stac_fastapi/tests/api/test_api_query_validation.py +++ b/stac_fastapi/tests/api/test_api_query_validation.py @@ -65,3 +65,32 @@ async def test_item_collection_get_filter_invalid_param(app_client, ctx): assert resp.status_code == 400 resp_json = resp.json() assert "Invalid query fields: invalid_param" in resp_json["detail"] + + +@pytest.mark.asyncio +async def test_validate_queryables_excluded(app_client, ctx): + """Test that excluded queryables are rejected when validation is enabled.""" + + excluded_field = "eo:cloud_cover" + + with mock.patch.dict( + os.environ, + { + "VALIDATE_QUERYABLES": "true", + "EXCLUDED_FROM_QUERYABLES": excluded_field, + "QUERYABLES_CACHE_TTL": "0", + }, + ): + reload_queryables_settings() + + query = {"query": {excluded_field: {"lt": 10}}} + resp = await app_client.post("/search", json=query) + assert resp.status_code == 400 + assert "Invalid query fields" in resp.json()["detail"] + assert excluded_field in resp.json()["detail"] + + query = {"query": {"id": {"eq": "test-item"}}} + resp = await app_client.post("/search", json=query) + assert resp.status_code == 200 + + reload_queryables_settings() From a46665e63c4d0a77723372541948df9bc320f762 Mon Sep 17 00:00:00 2001 From: Andrzej Pijanowski Date: Mon, 1 Dec 2025 10:57:43 +0100 Subject: [PATCH 6/6] feat: move queryables cache and validation logic to core --- README.md | 2 +- .../stac_fastapi/core/base_database_logic.py | 5 ++ stac_fastapi/core/stac_fastapi/core/core.py | 15 +++-- .../stac_fastapi/core}/queryables.py | 38 +----------- .../tests/api/test_api_query_validation.py | 30 +++++++-- .../test_queryables.py | 62 +------------------ 6 files changed, 40 insertions(+), 112 deletions(-) rename stac_fastapi/{sfeos_helpers/stac_fastapi/sfeos_helpers => core/stac_fastapi/core}/queryables.py (76%) rename stac_fastapi/tests/{sfeos_helpers => core}/test_queryables.py (64%) diff --git a/README.md b/README.md index 19f184d8a..36ca2b3df 100644 --- a/README.md +++ b/README.md @@ -367,7 +367,7 @@ You can customize additional settings in your `.env` file: | `STAC_INDEX_ASSETS` | Controls if Assets are indexed when added to Elasticsearch/Opensearch. This allows asset fields to be included in search queries. | `false` | Optional | | `USE_DATETIME` | Configures the datetime search behavior in SFEOS. When enabled, searches both datetime field and falls back to start_datetime/end_datetime range for items with null datetime. When disabled, searches only by start_datetime/end_datetime range. | `true` | Optional | | `USE_DATETIME_NANOS` | Enables nanosecond precision handling for `datetime` field searches as per the `date_nanos` type. When `False`, it uses 3 millisecond precision as per the type `date`. | `true` | Optional | -| `EXCLUDED_FROM_QUERYABLES` | Comma-separated list of fully qualified field names to exclude from the queryables endpoint and filtering. Use full paths like `properties.auth:schemes,properties.storage:schemes`. Excluded fields and their nested children will not be exposed in queryables. | None | Optional | +| `EXCLUDED_FROM_QUERYABLES` | Comma-separated list of fully qualified field names to exclude from the queryables endpoint and filtering. Use full paths like `properties.auth:schemes,properties.storage:schemes`. Excluded fields and their nested children will not be exposed in queryables. If `VALIDATE_QUERYABLES` is enabled, these fields will also be considered invalid for filtering. | None | Optional | | `EXCLUDED_FROM_ITEMS` | Specifies fields to exclude from STAC item responses. Supports comma-separated field names and dot notation for nested fields (e.g., `private_data,properties.confidential,assets.internal`). | `None` | Optional | | `VALIDATE_QUERYABLES` | Enable validation of query parameters against the collection's queryables. If set to `true`, the API will reject queries containing fields that are not defined in the collection's queryables. | `false` | Optional | | `QUERYABLES_CACHE_TTL` | Time-to-live (in seconds) for the queryables cache. Used when `VALIDATE_QUERYABLES` is enabled. | `3600` | Optional | diff --git a/stac_fastapi/core/stac_fastapi/core/base_database_logic.py b/stac_fastapi/core/stac_fastapi/core/base_database_logic.py index 105fdf925..1ed5265b7 100644 --- a/stac_fastapi/core/stac_fastapi/core/base_database_logic.py +++ b/stac_fastapi/core/stac_fastapi/core/base_database_logic.py @@ -138,3 +138,8 @@ async def delete_collection( ) -> None: """Delete a collection from the database.""" pass + + @abc.abstractmethod + async def get_queryables_mapping(self, collection_id: str = "*") -> Dict[str, Any]: + """Retrieve mapping of Queryables for search.""" + pass diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py index 56bff9b28..fad0a979f 100644 --- a/stac_fastapi/core/stac_fastapi/core/core.py +++ b/stac_fastapi/core/stac_fastapi/core/core.py @@ -24,6 +24,10 @@ from stac_fastapi.core.base_settings import ApiBaseSettings from stac_fastapi.core.datetime_utils import format_datetime_range from stac_fastapi.core.models.links import PagingLinks +from stac_fastapi.core.queryables import ( + QueryablesCache, + get_properties_from_cql2_filter, +) from stac_fastapi.core.redis_utils import redis_pagination_links from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer from stac_fastapi.core.session import Session @@ -39,11 +43,6 @@ BulkTransactionMethod, Items, ) -from stac_fastapi.sfeos_helpers.queryables import ( - get_properties_from_cql2_filter, - initialize_queryables_cache, - validate_queryables, -) from stac_fastapi.types import stac as stac_types from stac_fastapi.types.conformance import BASE_CONFORMANCE_CLASSES from stac_fastapi.types.core import AsyncBaseCoreClient @@ -95,7 +94,7 @@ class CoreClient(AsyncBaseCoreClient): def __attrs_post_init__(self): """Initialize the queryables cache.""" - initialize_queryables_cache(self.database) + self.queryables_cache = QueryablesCache(self.database) def _landing_page( self, @@ -826,7 +825,7 @@ async def post_search( if hasattr(search_request, "query") and getattr(search_request, "query"): query_fields = set(getattr(search_request, "query").keys()) - await validate_queryables(query_fields) + await self.queryables_cache.validate(query_fields) for field_name, expr in getattr(search_request, "query").items(): field = "properties__" + field_name for op, value in expr.items(): @@ -846,7 +845,7 @@ async def post_search( if cql2_filter is not None: try: query_fields = get_properties_from_cql2_filter(cql2_filter) - await validate_queryables(query_fields) + await self.queryables_cache.validate(query_fields) search = await self.database.apply_cql2_filter(search, cql2_filter) except HTTPException: raise diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py b/stac_fastapi/core/stac_fastapi/core/queryables.py similarity index 76% rename from stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py rename to stac_fastapi/core/stac_fastapi/core/queryables.py index c7fa56b32..63c63ba8f 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py +++ b/stac_fastapi/core/stac_fastapi/core/queryables.py @@ -3,12 +3,10 @@ import asyncio import os import time -from typing import Any, Dict, List, Optional, Set +from typing import Any, Dict, List, Set from fastapi import HTTPException -from stac_fastapi.core.base_database_logic import BaseDatabaseLogic - class QueryablesCache: """A thread-safe, time-based cache for queryable properties.""" @@ -99,40 +97,6 @@ async def validate(self, fields: Set[str]) -> None: ) -_queryables_cache_instance: Optional[QueryablesCache] = None - - -def initialize_queryables_cache(database_logic: BaseDatabaseLogic): - """ - Initialize the global queryables cache. - - :param database_logic: An instance of DatabaseLogic. - """ - global _queryables_cache_instance - if _queryables_cache_instance is None: - _queryables_cache_instance = QueryablesCache(database_logic) - - -async def all_queryables() -> Set[str]: - """Get all queryable properties from the cache.""" - if _queryables_cache_instance is None: - raise Exception("Queryables cache not initialized.") - return await _queryables_cache_instance.get_all_queryables() - - -async def validate_queryables(fields: Set[str]) -> None: - """Validate if the provided fields are queryable.""" - if _queryables_cache_instance is None: - return - await _queryables_cache_instance.validate(fields) - - -def reload_queryables_settings(): - """Reload queryables settings from environment variables.""" - if _queryables_cache_instance: - _queryables_cache_instance.reload_settings() - - def get_properties_from_cql2_filter(cql2_filter: Dict[str, Any]) -> Set[str]: """Recursively extract property names from a CQL2 filter.""" props: Set[str] = set() diff --git a/stac_fastapi/tests/api/test_api_query_validation.py b/stac_fastapi/tests/api/test_api_query_validation.py index ba8239003..a071a0294 100644 --- a/stac_fastapi/tests/api/test_api_query_validation.py +++ b/stac_fastapi/tests/api/test_api_query_validation.py @@ -4,15 +4,34 @@ import pytest -from stac_fastapi.sfeos_helpers.queryables import reload_queryables_settings +if os.getenv("BACKEND", "elasticsearch").lower() == "opensearch": + from stac_fastapi.opensearch.app import app_config +else: + from stac_fastapi.elasticsearch.app import app_config + + +def get_core_client(): + if os.getenv("BACKEND", "elasticsearch").lower() == "opensearch": + from stac_fastapi.opensearch.app import app_config + else: + from stac_fastapi.elasticsearch.app import app_config + return app_config["client"] + + +def reload_queryables_settings(): + client = get_core_client() + if hasattr(client, "queryables_cache"): + client.queryables_cache.reload_settings() @pytest.fixture(autouse=True) def enable_validation(): + + client = app_config["client"] with mock.patch.dict(os.environ, {"VALIDATE_QUERYABLES": "true"}): - reload_queryables_settings() + client.queryables_cache.reload_settings() yield - reload_queryables_settings() + client.queryables_cache.reload_settings() @pytest.mark.asyncio @@ -72,6 +91,7 @@ async def test_validate_queryables_excluded(app_client, ctx): """Test that excluded queryables are rejected when validation is enabled.""" excluded_field = "eo:cloud_cover" + client = app_config["client"] with mock.patch.dict( os.environ, @@ -81,7 +101,7 @@ async def test_validate_queryables_excluded(app_client, ctx): "QUERYABLES_CACHE_TTL": "0", }, ): - reload_queryables_settings() + client.queryables_cache.reload_settings() query = {"query": {excluded_field: {"lt": 10}}} resp = await app_client.post("/search", json=query) @@ -93,4 +113,4 @@ async def test_validate_queryables_excluded(app_client, ctx): resp = await app_client.post("/search", json=query) assert resp.status_code == 200 - reload_queryables_settings() + client.queryables_cache.reload_settings() diff --git a/stac_fastapi/tests/sfeos_helpers/test_queryables.py b/stac_fastapi/tests/core/test_queryables.py similarity index 64% rename from stac_fastapi/tests/sfeos_helpers/test_queryables.py rename to stac_fastapi/tests/core/test_queryables.py index 3ff882254..10a742049 100644 --- a/stac_fastapi/tests/sfeos_helpers/test_queryables.py +++ b/stac_fastapi/tests/core/test_queryables.py @@ -5,14 +5,9 @@ import pytest from fastapi import HTTPException -import stac_fastapi.sfeos_helpers.queryables as queryables_module -from stac_fastapi.sfeos_helpers.queryables import ( +from stac_fastapi.core.queryables import ( QueryablesCache, - all_queryables, get_properties_from_cql2_filter, - initialize_queryables_cache, - reload_queryables_settings, - validate_queryables, ) @@ -102,61 +97,6 @@ async def test_validate_disabled(self, queryables_cache): await queryables_cache.validate({"invalid_prop"}) -class TestGlobalFunctions: - @pytest.fixture(autouse=True) - def reset_global_cache(self): - original = queryables_module._queryables_cache_instance - queryables_module._queryables_cache_instance = None - yield - - queryables_module._queryables_cache_instance = original - - def test_initialize_queryables_cache(self): - db_logic = MagicMock() - initialize_queryables_cache(db_logic) - assert queryables_module._queryables_cache_instance is not None - assert queryables_module._queryables_cache_instance._db_logic == db_logic - - @pytest.mark.asyncio - async def test_all_queryables_not_initialized(self): - with pytest.raises(Exception) as excinfo: - await all_queryables() - assert "Queryables cache not initialized" in str(excinfo.value) - - @pytest.mark.asyncio - async def test_all_queryables(self): - db_logic = MagicMock() - db_logic.get_queryables_mapping = AsyncMock(return_value={"p1": "t1"}) - initialize_queryables_cache(db_logic) - - queryables_module._queryables_cache_instance.validation_enabled = True - - res = await all_queryables() - assert res == {"p1"} - - @pytest.mark.asyncio - async def test_validate_queryables(self): - db_logic = MagicMock() - db_logic.get_queryables_mapping = AsyncMock(return_value={"p1": "t1"}) - initialize_queryables_cache(db_logic) - queryables_module._queryables_cache_instance.validation_enabled = True - - await validate_queryables({"p1"}) - - with pytest.raises(HTTPException): - await validate_queryables({"invalid"}) - - def test_reload_queryables_settings(self): - db_logic = MagicMock() - initialize_queryables_cache(db_logic) - - with patch.dict(os.environ, {"VALIDATE_QUERYABLES": "false"}): - reload_queryables_settings() - assert ( - queryables_module._queryables_cache_instance.validation_enabled is False - ) - - def test_get_properties_from_cql2_filter(): # Simple prop cql2 = {"op": "=", "args": [{"property": "prop1"}, "value"]}