Skip to content

Commit b726926

Browse files
YuriZmytrakovYuri Zmytrakov
andauthored
fix: Adding a key for technical/private data in SFEOS (#518)
**Description:** Add the `HIDE_PRIVATE_DATA` and `PRIVATE_DATA_FIELD` environment variables to toggle inclusion of private or technical fields in SFEOS responses: - `true`: hides private data - `false`: returns full response Improves control over sensitive data exposure while maintaining backward compatibility. **PR Checklist:** - [x] Code is formatted and linted (run `pre-commit run --all-files`) - [x] Tests pass (run `make test`) - [x] Documentation has been updated to reflect changes, if applicable - [x] Changes are added to the changelog --------- Co-authored-by: Yuri Zmytrakov <yzmytrakovNB@yzmytrakovNB.local>
1 parent 234b5bf commit b726926

File tree

5 files changed

+85
-2
lines changed

5 files changed

+85
-2
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
99

1010
### Added
1111

12+
- Environment variable `EXCLUDED_FROM_ITEMS` to exclude specific fields from items endpoint response. Supports comma-separated list of fully qualified field names (e.g., `properties.auth:schemes,properties.storage:schemes`) [#518](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/518)
13+
1214
### Changed
1315

1416
### Fixed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,8 @@ You can customize additional settings in your `.env` file:
369369
| `STAC_INDEX_ASSETS` | Controls if Assets are indexed when added to Elasticsearch/Opensearch. This allows asset fields to be included in search queries. | `false` | Optional |
370370
| `USE_DATETIME` | Configures the datetime search behavior in SFEOS. When enabled, searches both datetime field and falls back to start_datetime/end_datetime range for items with null datetime. When disabled, searches only by start_datetime/end_datetime range. | `true` | Optional |
371371
| `EXCLUDED_FROM_QUERYABLES` | Comma-separated list of fully qualified field names to exclude from the queryables endpoint and filtering. Use full paths like `properties.auth:schemes,properties.storage:schemes`. Excluded fields and their nested children will not be exposed in queryables. | None | Optional |
372+
| `EXCLUDED_FROM_ITEMS` | Specifies fields to exclude from STAC item responses. Supports comma-separated field names and dot notation for nested fields (e.g., `private_data,properties.confidential,assets.internal`). | `None` | Optional |
373+
372374

373375
> [!NOTE]
374376
> The variables `ES_HOST`, `ES_PORT`, `ES_USE_SSL`, `ES_VERIFY_CERTS` and `ES_TIMEOUT` apply to both Elasticsearch and OpenSearch backends, so there is no need to rename the key names to `OS_` even if you're using OpenSearch.

stac_fastapi/core/stac_fastapi/core/serializers.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import abc
44
import logging
5+
import os
56
from copy import deepcopy
67
from typing import Any, List, Optional
78

@@ -10,7 +11,7 @@
1011

1112
from stac_fastapi.core.datetime_utils import now_to_rfc3339_str
1213
from stac_fastapi.core.models.links import CollectionLinks
13-
from stac_fastapi.core.utilities import get_bool_env
14+
from stac_fastapi.core.utilities import get_bool_env, get_excluded_from_items
1415
from stac_fastapi.types import stac as stac_types
1516
from stac_fastapi.types.links import ItemLinks, resolve_links
1617

@@ -108,7 +109,7 @@ def db_to_stac(cls, item: dict, base_url: str) -> stac_types.Item:
108109
else:
109110
assets = item.get("assets", {})
110111

111-
return stac_types.Item(
112+
stac_item = stac_types.Item(
112113
type="Feature",
113114
stac_version=item.get("stac_version", ""),
114115
stac_extensions=item.get("stac_extensions", []),
@@ -121,6 +122,14 @@ def db_to_stac(cls, item: dict, base_url: str) -> stac_types.Item:
121122
assets=assets,
122123
)
123124

125+
excluded_fields = os.getenv("EXCLUDED_FROM_ITEMS")
126+
if excluded_fields:
127+
for field_path in excluded_fields.split(","):
128+
if field_path := field_path.strip():
129+
get_excluded_from_items(stac_item, field_path)
130+
131+
return stac_item
132+
124133

125134
class CollectionSerializer(Serializer):
126135
"""Serialization methods for STAC collections."""

stac_fastapi/core/stac_fastapi/core/utilities.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,3 +178,20 @@ def dict_deep_update(merge_to: Dict[str, Any], merge_from: Dict[str, Any]) -> No
178178
dict_deep_update(merge_to[k], merge_from[k])
179179
else:
180180
merge_to[k] = v
181+
182+
183+
def get_excluded_from_items(obj: dict, field_path: str) -> None:
184+
"""Remove a field from items.
185+
186+
The field is removed in-place from the dictionary if it exists.
187+
If any intermediate path does not exist or is not a dictionary,
188+
the function returns without making any changes.
189+
"""
190+
*path, final = field_path.split(".")
191+
current = obj
192+
for part in path:
193+
current = current.get(part, {})
194+
if not isinstance(current, dict):
195+
return
196+
197+
current.pop(final, None)

stac_fastapi/tests/api/test_api.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1657,3 +1657,56 @@ async def test_use_datetime_false(app_client, load_test_data, txn_client, monkey
16571657

16581658
assert "test-item-datetime-only" not in found_ids
16591659
assert "test-item-start-end-only" in found_ids
1660+
1661+
1662+
@pytest.mark.asyncio
1663+
async def test_hide_private_data_from_item(app_client, txn_client, load_test_data):
1664+
os.environ["EXCLUDED_FROM_ITEMS"] = "private_data,properties.private_data"
1665+
1666+
test_collection = load_test_data("test_collection.json")
1667+
test_collection_id = "test-collection-private-data"
1668+
test_collection["id"] = test_collection_id
1669+
await create_collection(txn_client, test_collection)
1670+
1671+
test_item = load_test_data("test_item.json")
1672+
test_item_id = "test-item-private-data"
1673+
test_item["id"] = test_item_id
1674+
test_item["collection"] = test_collection_id
1675+
test_item["private_data"] = {"secret": "sensitive_info"}
1676+
test_item["properties"]["private_data"] = {"secret": "sensitive_info"}
1677+
await create_item(txn_client, test_item)
1678+
1679+
# Test /collections/{collection_id}/items
1680+
resp = await app_client.get(f"/collections/{test_collection_id}/items")
1681+
assert resp.status_code == 200
1682+
resp_json = resp.json()
1683+
item = resp_json["features"][0]
1684+
assert "private_data" not in item
1685+
assert "private_data" not in item["properties"]
1686+
1687+
# Test /collections/{collection_id}/items/{item_id}
1688+
resp = await app_client.get(
1689+
f"/collections/{test_collection_id}/items/{test_item_id}"
1690+
)
1691+
assert resp.status_code == 200
1692+
resp_json = resp.json()
1693+
assert "private_data" not in resp_json
1694+
assert "private_data" not in resp_json["properties"]
1695+
1696+
# Test GET /search
1697+
resp = await app_client.get(f"/search?collections={test_collection_id}")
1698+
assert resp.status_code == 200
1699+
resp_json = resp.json()
1700+
item = resp_json["features"][0]
1701+
assert "private_data" not in item
1702+
assert "private_data" not in item["properties"]
1703+
1704+
# Test POST /search
1705+
resp = await app_client.post("/search", json={"collections": [test_collection_id]})
1706+
assert resp.status_code == 200
1707+
resp_json = resp.json()
1708+
item = resp_json["features"][0]
1709+
assert "private_data" not in item
1710+
assert "private_data" not in item["properties"]
1711+
1712+
del os.environ["EXCLUDED_FROM_ITEMS"]

0 commit comments

Comments
 (0)