Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
([#3882](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3882))
- `opentelemetry-instrumentation-aiohttp-server`: delay initialization of tracer, meter and excluded urls to instrumentation for testability
([#3836](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3836))
- `opentelemetry-instrumentation-elasticsearch`: Enhance elasticsearch query body sanitization
([#3919](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3919))


## Version 1.38.0/0.59b0 (2025-10-16)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,52 +13,26 @@
# limitations under the License.
import json

sanitized_keys = (
"message",
"should",
"filter",
"query",
"queries",
"intervals",
"match",
)
sanitized_value = "?"


# pylint: disable=C0103
def _flatten_dict(d, parent_key=""):
items = []
for k, v in d.items():
new_key = parent_key + "." + k if parent_key else k
# recursive call _flatten_dict for a non-empty dict value
if isinstance(v, dict) and v:
items.extend(_flatten_dict(v, new_key).items())
else:
items.append((new_key, v))
return dict(items)


def _unflatten_dict(d):
res = {}
for k, v in d.items():
keys = k.split(".")
d = res
for key in keys[:-1]:
if key not in d:
d[key] = {}
d = d[key]
d[keys[-1]] = v
return res
def _mask_leaf_nodes(obj):
"""
Recursively traverses JSON structure and masks leaf node values.
Leaf nodes are final values that are no longer dict or list.
"""
if isinstance(obj, dict):
return {key: _mask_leaf_nodes(value) for key, value in obj.items()}
elif isinstance(obj, list):
return [_mask_leaf_nodes(item) for item in obj]
else:
# Mask leaf node
return sanitized_value


def sanitize_body(body) -> str:
if isinstance(body, str):
body = json.loads(body)

flatten_body = _flatten_dict(body)

for key in flatten_body:
if key.endswith(sanitized_keys):
flatten_body[key] = sanitized_value

return str(_unflatten_dict(flatten_body))
masked_body = _mask_leaf_nodes(body)
return str(masked_body)
Original file line number Diff line number Diff line change
Expand Up @@ -44,22 +44,149 @@
}
}

term_query = {
"query": {
"bool": {
"must": [
{"term": {"user_email": "john.doe@company.com"}},
{"term": {"ssn": "123-45-6789"}},
{"term": {"credit_card": "4111-1111-1111-1111"}},
]
}
}
}

interval_query_sanitized = {
"query": {
"intervals": {
"my_text": {"all_of": {"ordered": True, "intervals": "?"}}
"my_text": {
"all_of": {
"ordered": "?",
"intervals": [
{
"match": {
"query": "?",
"max_gaps": "?",
"ordered": "?",
}
},
{
"any_of": {
"intervals": [
{"match": {"query": "?"}},
{"match": {"query": "?"}},
]
}
},
],
}
}
}
}
}

match_query_sanitized = {"query": {"match": {"message": {"query": "?"}}}}

filter_query_sanitized = {
"query": {
"bool": {
"must": [
{"match": {"title": "Search"}},
{"match": {"content": "Elasticsearch"}},
{"match": {"title": "?"}},
{"match": {"content": "?"}},
],
"filter": "?",
"filter": [
{"term": {"status": "?"}},
{"range": {"publish_date": {"gte": "?"}}},
],
}
}
}

term_query_sanitized = {
"query": {
"bool": {
"must": [
{"term": {"user_email": "?"}},
{"term": {"ssn": "?"}},
{"term": {"credit_card": "?"}},
]
}
}
}

aggregation_query = {
"query": {"match_all": {}},
"aggs": {
"price_ranges": {
"range": {
"field": "price",
"ranges": [
{"to": 50, "key": "cheap"},
{"from": 50, "to": 100, "key": "medium"},
{"from": 100, "key": "expensive"},
],
}
},
"avg_price": {"avg": {"field": "price"}},
"top_tags": {
"terms": {"field": "tags", "size": 10, "order": {"_count": "desc"}}
},
},
}

aggregation_query_sanitized = {
"query": {"match_all": {}},
"aggs": {
"price_ranges": {
"range": {
"field": "?",
"ranges": [
{"to": "?", "key": "?"},
{"from": "?", "to": "?", "key": "?"},
{"from": "?", "key": "?"},
],
}
},
"avg_price": {"avg": {"field": "?"}},
"top_tags": {
"terms": {"field": "?", "size": "?", "order": {"_count": "?"}}
},
},
}

script_query = {
"query": {
"script": {
"script": {
"source": "doc['price'].value > params.threshold",
"lang": "painless",
"params": {"threshold": 100},
}
}
},
"script_fields": {
"discounted_price": {
"script": {
"source": "doc['price'].value * params.discount",
"params": {"discount": 0.9},
}
}
},
}

script_query_sanitized = {
"query": {
"script": {
"script": {
"source": "?",
"lang": "?",
"params": {"threshold": "?"},
}
}
},
"script_fields": {
"discounted_price": {
"script": {"source": "?", "params": {"discount": "?"}}
}
},
}
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,18 @@ def test_body_sanitization(self, _):
sanitize_body(json.dumps(sanitization_queries.interval_query)),
str(sanitization_queries.interval_query_sanitized),
)
self.assertEqual(
sanitize_body(sanitization_queries.term_query),
str(sanitization_queries.term_query_sanitized),
)
self.assertEqual(
sanitize_body(sanitization_queries.aggregation_query),
str(sanitization_queries.aggregation_query_sanitized),
)
self.assertEqual(
sanitize_body(sanitization_queries.script_query),
str(sanitization_queries.script_query_sanitized),
)

def test_bulk(self, request_mock):
request_mock.return_value = helpers.mock_response("{}")
Expand Down
Loading