Skip to content

Commit 43fd5d8

Browse files
YuriZmytrakovYuri Zmytrakov
andauthored
fix: ensure datetime uses nano seconds (#529)
**Description:** Change `datetime` field type from `date` to `date_nanos` to ensure millisecond precision in datetime filter instead of truncation, for example `024999Z` becomes `024Z`. Also enforce 1970-2262 date boundaries required by the date_nanos resolution. **PR Checklist:** - [x] Code is formatted and linted (run `pre-commit run --all-files`) - [x] Tests pass (run `make test`) - [x] Documentation has been updated to reflect changes, if applicable - [x] Changes are added to the changelog --------- Co-authored-by: Yuri Zmytrakov <yzmytrakovNB@yzmytrakovNB.local>
1 parent c1a7bc1 commit 43fd5d8

File tree

6 files changed

+167
-52
lines changed

6 files changed

+167
-52
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
99

1010
### Added
1111

12+
- Added nanosecond precision datetime filtering that ensures nanosecond precision support in filtering by datetime. This is configured via the `USE_DATETIME_NANOS` environment variable, while maintaining microseconds compatibility for datetime precision. [#529](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/529)
13+
1214
### Changed
1315

1416
### Fixed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,7 @@ You can customize additional settings in your `.env` file:
366366
| `STAC_DEFAULT_ITEM_LIMIT` | Configures the default number of STAC items returned when no limit parameter is specified in the request. | `10` | Optional |
367367
| `STAC_INDEX_ASSETS` | Controls if Assets are indexed when added to Elasticsearch/Opensearch. This allows asset fields to be included in search queries. | `false` | Optional |
368368
| `USE_DATETIME` | Configures the datetime search behavior in SFEOS. When enabled, searches both datetime field and falls back to start_datetime/end_datetime range for items with null datetime. When disabled, searches only by start_datetime/end_datetime range. | `true` | Optional |
369+
| `USE_DATETIME_NANOS` | Enables nanosecond precision handling for `datetime` field searches as per the `date_nanos` type. When `False`, it uses 3 millisecond precision as per the type `date`. | `true` | Optional |
369370
| `EXCLUDED_FROM_QUERYABLES` | Comma-separated list of fully qualified field names to exclude from the queryables endpoint and filtering. Use full paths like `properties.auth:schemes,properties.storage:schemes`. Excluded fields and their nested children will not be exposed in queryables. | None | Optional |
370371
| `EXCLUDED_FROM_ITEMS` | Specifies fields to exclude from STAC item responses. Supports comma-separated field names and dot notation for nested fields (e.g., `private_data,properties.confidential,assets.internal`). | `None` | Optional |
371372

stac_fastapi/core/stac_fastapi/core/datetime_utils.py

Lines changed: 66 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from datetime import datetime, timezone
44

5+
from stac_fastapi.core.utilities import get_bool_env
56
from stac_fastapi.types.rfc3339 import rfc3339_str_to_datetime
67

78

@@ -15,27 +16,71 @@ def format_datetime_range(date_str: str) -> str:
1516
Returns:
1617
str: A string formatted as 'YYYY-MM-DDTHH:MM:SSZ/YYYY-MM-DDTHH:MM:SSZ', with '..' used if any element is None.
1718
"""
18-
19-
def normalize(dt):
20-
"""Normalize datetime string and preserve millisecond precision."""
21-
dt = dt.strip()
22-
if not dt or dt == "..":
23-
return ".."
24-
dt_obj = rfc3339_str_to_datetime(dt)
25-
dt_utc = dt_obj.astimezone(timezone.utc)
26-
return dt_utc.isoformat(timespec="milliseconds").replace("+00:00", "Z")
27-
28-
if not isinstance(date_str, str):
29-
return "../.."
30-
31-
if "/" not in date_str:
32-
return f"{normalize(date_str)}/{normalize(date_str)}"
33-
34-
try:
35-
start, end = date_str.split("/", 1)
36-
except Exception:
37-
return "../.."
38-
return f"{normalize(start)}/{normalize(end)}"
19+
use_datetime_nanos = get_bool_env("USE_DATETIME_NANOS", default=True)
20+
21+
if use_datetime_nanos:
22+
MIN_DATE_NANOS = datetime(1970, 1, 1, tzinfo=timezone.utc)
23+
MAX_DATE_NANOS = datetime(2262, 4, 11, 23, 47, 16, 854775, tzinfo=timezone.utc)
24+
25+
def normalize(dt):
26+
"""Normalize datetime string and preserve nano second precision."""
27+
dt = dt.strip()
28+
if not dt or dt == "..":
29+
return ".."
30+
dt_utc = rfc3339_str_to_datetime(dt).astimezone(timezone.utc)
31+
if dt_utc < MIN_DATE_NANOS:
32+
dt_utc = MIN_DATE_NANOS
33+
if dt_utc > MAX_DATE_NANOS:
34+
dt_utc = MAX_DATE_NANOS
35+
return dt_utc.isoformat(timespec="auto").replace("+00:00", "Z")
36+
37+
if not isinstance(date_str, str):
38+
return f"{MIN_DATE_NANOS.isoformat(timespec='auto').replace('+00:00','Z')}/{MAX_DATE_NANOS.isoformat(timespec='auto').replace('+00:00','Z')}"
39+
40+
if "/" not in date_str:
41+
return f"{normalize(date_str)}/{normalize(date_str)}"
42+
43+
try:
44+
start, end = date_str.split("/", 1)
45+
except Exception:
46+
return f"{MIN_DATE_NANOS.isoformat(timespec='auto').replace('+00:00','Z')}/{MAX_DATE_NANOS.isoformat(timespec='auto').replace('+00:00','Z')}"
47+
48+
normalized_start = normalize(start)
49+
normalized_end = normalize(end)
50+
51+
if normalized_start == "..":
52+
normalized_start = MIN_DATE_NANOS.isoformat(timespec="auto").replace(
53+
"+00:00", "Z"
54+
)
55+
if normalized_end == "..":
56+
normalized_end = MAX_DATE_NANOS.isoformat(timespec="auto").replace(
57+
"+00:00", "Z"
58+
)
59+
60+
return f"{normalized_start}/{normalized_end}"
61+
62+
else:
63+
64+
def normalize(dt):
65+
"""Normalize datetime string and preserve millisecond precision."""
66+
dt = dt.strip()
67+
if not dt or dt == "..":
68+
return ".."
69+
dt_obj = rfc3339_str_to_datetime(dt)
70+
dt_utc = dt_obj.astimezone(timezone.utc)
71+
return dt_utc.isoformat(timespec="milliseconds").replace("+00:00", "Z")
72+
73+
if not isinstance(date_str, str):
74+
return "../.."
75+
76+
if "/" not in date_str:
77+
return f"{normalize(date_str)}/{normalize(date_str)}"
78+
79+
try:
80+
start, end = date_str.split("/", 1)
81+
except Exception:
82+
return "../.."
83+
return f"{normalize(start)}/{normalize(end)}"
3984

4085

4186
# Borrowed from pystac - https://github.com/stac-utils/pystac/blob/f5e4cf4a29b62e9ef675d4a4dac7977b09f53c8f/pystac/utils.py#L370-L394

stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/datetime.py

Lines changed: 93 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,10 @@
88
import re
99
from datetime import date
1010
from datetime import datetime as datetime_type
11+
from datetime import timezone
1112
from typing import Dict, Optional, Union
1213

14+
from stac_fastapi.core.utilities import get_bool_env
1315
from stac_fastapi.types.rfc3339 import DateTimeType
1416

1517
logger = logging.getLogger(__name__)
@@ -37,38 +39,103 @@ def return_date(
3739
always containing 'gte' and 'lte' keys.
3840
"""
3941
result: Dict[str, Optional[str]] = {"gte": None, "lte": None}
40-
42+
use_datetime_nanos = get_bool_env("USE_DATETIME_NANOS", default=True)
4143
if interval is None:
4244
return result
4345

44-
if isinstance(interval, str):
45-
if "/" in interval:
46-
parts = interval.split("/")
47-
result["gte"] = (
48-
parts[0] if parts[0] != ".." else datetime_type.min.isoformat() + "Z"
49-
)
50-
result["lte"] = (
51-
parts[1]
52-
if len(parts) > 1 and parts[1] != ".."
53-
else datetime_type.max.isoformat() + "Z"
46+
if use_datetime_nanos:
47+
MIN_DATE_NANOS = datetime_type(1970, 1, 1, tzinfo=timezone.utc)
48+
MAX_DATE_NANOS = datetime_type(
49+
2262, 4, 11, 23, 47, 16, 854775, tzinfo=timezone.utc
50+
)
51+
52+
if isinstance(interval, str):
53+
if "/" in interval:
54+
parts = interval.split("/")
55+
result["gte"] = (
56+
parts[0] if parts[0] != ".." else MIN_DATE_NANOS.isoformat() + "Z"
57+
)
58+
result["lte"] = (
59+
parts[1]
60+
if len(parts) > 1 and parts[1] != ".."
61+
else MAX_DATE_NANOS.isoformat() + "Z"
62+
)
63+
else:
64+
converted_time = interval if interval != ".." else None
65+
result["gte"] = result["lte"] = converted_time
66+
return result
67+
68+
if isinstance(interval, datetime_type):
69+
dt_utc = (
70+
interval.astimezone(timezone.utc)
71+
if interval.tzinfo
72+
else interval.replace(tzinfo=timezone.utc)
5473
)
55-
else:
56-
converted_time = interval if interval != ".." else None
57-
result["gte"] = result["lte"] = converted_time
74+
if dt_utc < MIN_DATE_NANOS:
75+
dt_utc = MIN_DATE_NANOS
76+
elif dt_utc > MAX_DATE_NANOS:
77+
dt_utc = MAX_DATE_NANOS
78+
datetime_iso = dt_utc.isoformat()
79+
result["gte"] = result["lte"] = datetime_iso
80+
elif isinstance(interval, tuple):
81+
start, end = interval
82+
# Ensure datetimes are converted to UTC and formatted with 'Z'
83+
if start:
84+
start_utc = (
85+
start.astimezone(timezone.utc)
86+
if start.tzinfo
87+
else start.replace(tzinfo=timezone.utc)
88+
)
89+
if start_utc < MIN_DATE_NANOS:
90+
start_utc = MIN_DATE_NANOS
91+
elif start_utc > MAX_DATE_NANOS:
92+
start_utc = MAX_DATE_NANOS
93+
result["gte"] = start_utc.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
94+
if end:
95+
end_utc = (
96+
end.astimezone(timezone.utc)
97+
if end.tzinfo
98+
else end.replace(tzinfo=timezone.utc)
99+
)
100+
if end_utc < MIN_DATE_NANOS:
101+
end_utc = MIN_DATE_NANOS
102+
elif end_utc > MAX_DATE_NANOS:
103+
end_utc = MAX_DATE_NANOS
104+
result["lte"] = end_utc.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
105+
58106
return result
59107

60-
if isinstance(interval, datetime_type):
61-
datetime_iso = interval.isoformat()
62-
result["gte"] = result["lte"] = datetime_iso
63-
elif isinstance(interval, tuple):
64-
start, end = interval
65-
# Ensure datetimes are converted to UTC and formatted with 'Z'
66-
if start:
67-
result["gte"] = start.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
68-
if end:
69-
result["lte"] = end.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
70-
71-
return result
108+
else:
109+
if isinstance(interval, str):
110+
if "/" in interval:
111+
parts = interval.split("/")
112+
result["gte"] = (
113+
parts[0]
114+
if parts[0] != ".."
115+
else datetime_type.min.isoformat() + "Z"
116+
)
117+
result["lte"] = (
118+
parts[1]
119+
if len(parts) > 1 and parts[1] != ".."
120+
else datetime_type.max.isoformat() + "Z"
121+
)
122+
else:
123+
converted_time = interval if interval != ".." else None
124+
result["gte"] = result["lte"] = converted_time
125+
return result
126+
127+
if isinstance(interval, datetime_type):
128+
datetime_iso = interval.isoformat()
129+
result["gte"] = result["lte"] = datetime_iso
130+
elif isinstance(interval, tuple):
131+
start, end = interval
132+
# Ensure datetimes are converted to UTC and formatted with 'Z'
133+
if start:
134+
result["gte"] = start.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
135+
if end:
136+
result["lte"] = end.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
137+
138+
return result
72139

73140

74141
def extract_date(date_str: str) -> date:

stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ class Geometry(Protocol): # noqa
142142
"type": "object",
143143
"properties": {
144144
# Common https://github.com/radiantearth/stac-spec/blob/master/item-spec/common-metadata.md
145-
"datetime": {"type": "date"},
145+
"datetime": {"type": "date_nanos"},
146146
"start_datetime": {"type": "date"},
147147
"end_datetime": {"type": "date"},
148148
"created": {"type": "date"},

stac_fastapi/tests/api/test_api.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -608,10 +608,10 @@ async def test_datetime_bad_interval(app_client, txn_client, ctx):
608608
await create_item(txn_client, third_item)
609609

610610
dt_formats = [
611-
"1920-02-04T12:30:22+00:00/1920-02-06T12:30:22+00:00",
612-
"1920-02-04T12:30:22.00Z/1920-02-06T12:30:22.00Z",
613-
"1920-02-04T12:30:22Z/1920-02-06T12:30:22Z",
614-
"1920-02-04T12:30:22.00+00:00/1920-02-06T12:30:22.00+00:00",
611+
"1970-02-04T12:30:22+00:00/1970-02-06T12:30:22+00:00",
612+
"1970-02-04T12:30:22.00Z/1970-02-06T12:30:22.00Z",
613+
"1970-02-04T12:30:22Z/1970-02-06T12:30:22Z",
614+
"1970-02-04T12:30:22.00+00:00/1970-02-06T12:30:22.00+00:00",
615615
]
616616

617617
for dt in dt_formats:

0 commit comments

Comments
 (0)