From c69dcf5735a17ea31afbb3a17a6fcea6ac945c91 Mon Sep 17 00:00:00 2001 From: Yuri Zmytrakov Date: Fri, 31 Oct 2025 10:47:38 +0100 Subject: [PATCH 1/2] fix: Adding a key for technical/private data in SFEOS --- .../core/stac_fastapi/core/serializers.py | 13 ++++- .../core/stac_fastapi/core/utilities.py | 17 ++++++ stac_fastapi/tests/api/test_api.py | 53 +++++++++++++++++++ 3 files changed, 81 insertions(+), 2 deletions(-) diff --git a/stac_fastapi/core/stac_fastapi/core/serializers.py b/stac_fastapi/core/stac_fastapi/core/serializers.py index 973de18d8..0e156ce4c 100644 --- a/stac_fastapi/core/stac_fastapi/core/serializers.py +++ b/stac_fastapi/core/stac_fastapi/core/serializers.py @@ -2,6 +2,7 @@ import abc import logging +import os from copy import deepcopy from typing import Any, List, Optional @@ -10,7 +11,7 @@ from stac_fastapi.core.datetime_utils import now_to_rfc3339_str from stac_fastapi.core.models.links import CollectionLinks -from stac_fastapi.core.utilities import get_bool_env +from stac_fastapi.core.utilities import get_bool_env, get_excluded_from_items from stac_fastapi.types import stac as stac_types from stac_fastapi.types.links import ItemLinks, resolve_links @@ -108,7 +109,7 @@ def db_to_stac(cls, item: dict, base_url: str) -> stac_types.Item: else: assets = item.get("assets", {}) - return stac_types.Item( + stac_item = stac_types.Item( type="Feature", stac_version=item.get("stac_version", ""), stac_extensions=item.get("stac_extensions", []), @@ -121,6 +122,14 @@ def db_to_stac(cls, item: dict, base_url: str) -> stac_types.Item: assets=assets, ) + excluded_fields = os.getenv("EXCLUDED_FROM_ITEMS") + if excluded_fields: + for field_path in excluded_fields.split(","): + if field_path := field_path.strip(): + get_excluded_from_items(stac_item, field_path) + + return stac_item + class CollectionSerializer(Serializer): """Serialization methods for STAC collections.""" diff --git a/stac_fastapi/core/stac_fastapi/core/utilities.py b/stac_fastapi/core/stac_fastapi/core/utilities.py index de6536567..24a588856 100644 --- a/stac_fastapi/core/stac_fastapi/core/utilities.py +++ b/stac_fastapi/core/stac_fastapi/core/utilities.py @@ -178,3 +178,20 @@ def dict_deep_update(merge_to: Dict[str, Any], merge_from: Dict[str, Any]) -> No dict_deep_update(merge_to[k], merge_from[k]) else: merge_to[k] = v + + +def get_excluded_from_items(obj: dict, field_path: str) -> None: + """Remove a field from items. + + The field is removed in-place from the dictionary if it exists. + If any intermediate path does not exist or is not a dictionary, + the function returns without making any changes. + """ + *path, final = field_path.split(".") + current = obj + for part in path: + current = current.get(part, {}) + if not isinstance(current, dict): + return + + current.pop(final, None) diff --git a/stac_fastapi/tests/api/test_api.py b/stac_fastapi/tests/api/test_api.py index 0b0733825..04f3a1521 100644 --- a/stac_fastapi/tests/api/test_api.py +++ b/stac_fastapi/tests/api/test_api.py @@ -1657,3 +1657,56 @@ async def test_use_datetime_false(app_client, load_test_data, txn_client, monkey assert "test-item-datetime-only" not in found_ids assert "test-item-start-end-only" in found_ids + + +@pytest.mark.asyncio +async def test_hide_private_data_from_item(app_client, txn_client, load_test_data): + os.environ["EXCLUDED_FROM_ITEMS"] = "private_data,properties.private_data" + + test_collection = load_test_data("test_collection.json") + test_collection_id = "test-collection-private-data" + test_collection["id"] = test_collection_id + await create_collection(txn_client, test_collection) + + test_item = load_test_data("test_item.json") + test_item_id = "test-item-private-data" + test_item["id"] = test_item_id + test_item["collection"] = test_collection_id + test_item["private_data"] = {"secret": "sensitive_info"} + test_item["properties"]["private_data"] = {"secret": "sensitive_info"} + await create_item(txn_client, test_item) + + # Test /collections/{collection_id}/items + resp = await app_client.get(f"/collections/{test_collection_id}/items") + assert resp.status_code == 200 + resp_json = resp.json() + item = resp_json["features"][0] + assert "private_data" not in item + assert "private_data" not in item["properties"] + + # Test /collections/{collection_id}/items/{item_id} + resp = await app_client.get( + f"/collections/{test_collection_id}/items/{test_item_id}" + ) + assert resp.status_code == 200 + resp_json = resp.json() + assert "private_data" not in resp_json + assert "private_data" not in resp_json["properties"] + + # Test GET /search + resp = await app_client.get(f"/search?collections={test_collection_id}") + assert resp.status_code == 200 + resp_json = resp.json() + item = resp_json["features"][0] + assert "private_data" not in item + assert "private_data" not in item["properties"] + + # Test POST /search + resp = await app_client.post("/search", json={"collections": [test_collection_id]}) + assert resp.status_code == 200 + resp_json = resp.json() + item = resp_json["features"][0] + assert "private_data" not in item + assert "private_data" not in item["properties"] + + del os.environ["EXCLUDED_FROM_ITEMS"] From 92f0ca389c73e66b2a460fbe9c46005381431268 Mon Sep 17 00:00:00 2001 From: Yuri Zmytrakov Date: Sun, 2 Nov 2025 17:10:54 +0100 Subject: [PATCH 2/2] docs: updated readme and changelog about excluded_from_items env var --- CHANGELOG.md | 2 ++ README.md | 2 ++ 2 files changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 49ec8c12a..84eef89ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added +- Environment variable `EXCLUDED_FROM_ITEMS` to exclude specific fields from items endpoint response. Supports comma-separated list of fully qualified field names (e.g., `properties.auth:schemes,properties.storage:schemes`) [#518](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/518) + ### Changed ### Fixed diff --git a/README.md b/README.md index fb2bdd027..0c379ae2f 100644 --- a/README.md +++ b/README.md @@ -342,6 +342,8 @@ You can customize additional settings in your `.env` file: | `STAC_INDEX_ASSETS` | Controls if Assets are indexed when added to Elasticsearch/Opensearch. This allows asset fields to be included in search queries. | `false` | Optional | | `USE_DATETIME` | Configures the datetime search behavior in SFEOS. When enabled, searches both datetime field and falls back to start_datetime/end_datetime range for items with null datetime. When disabled, searches only by start_datetime/end_datetime range. | `true` | Optional | | `EXCLUDED_FROM_QUERYABLES` | Comma-separated list of fully qualified field names to exclude from the queryables endpoint and filtering. Use full paths like `properties.auth:schemes,properties.storage:schemes`. Excluded fields and their nested children will not be exposed in queryables. | None | Optional | +| `EXCLUDED_FROM_ITEMS` | Specifies fields to exclude from STAC item responses. Supports comma-separated field names and dot notation for nested fields (e.g., `private_data,properties.confidential,assets.internal`). | `None` | Optional | + > [!NOTE] > The variables `ES_HOST`, `ES_PORT`, `ES_USE_SSL`, `ES_VERIFY_CERTS` and `ES_TIMEOUT` apply to both Elasticsearch and OpenSearch backends, so there is no need to rename the key names to `OS_` even if you're using OpenSearch.