diff --git a/CHANGELOG.md b/CHANGELOG.md index e206925782..b36b689b60 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,6 +42,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ([#3882](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3882)) - `opentelemetry-instrumentation-aiohttp-server`: delay initialization of tracer, meter and excluded urls to instrumentation for testability ([#3836](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3836)) +- `opentelemetry-instrumentation-elasticsearch`: Enhance elasticsearch query body sanitization + ([#3919](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3919)) + ## Version 1.38.0/0.59b0 (2025-10-16) diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py index 97f2bc3b87..2b116789f3 100644 --- a/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py +++ b/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py @@ -13,52 +13,25 @@ # limitations under the License. import json -sanitized_keys = ( - "message", - "should", - "filter", - "query", - "queries", - "intervals", - "match", -) sanitized_value = "?" -# pylint: disable=C0103 -def _flatten_dict(d, parent_key=""): - items = [] - for k, v in d.items(): - new_key = parent_key + "." + k if parent_key else k - # recursive call _flatten_dict for a non-empty dict value - if isinstance(v, dict) and v: - items.extend(_flatten_dict(v, new_key).items()) - else: - items.append((new_key, v)) - return dict(items) - - -def _unflatten_dict(d): - res = {} - for k, v in d.items(): - keys = k.split(".") - d = res - for key in keys[:-1]: - if key not in d: - d[key] = {} - d = d[key] - d[keys[-1]] = v - return res +def _mask_leaf_nodes(obj): + """ + Recursively traverses JSON structure and masks leaf node values. + Leaf nodes are final values that are no longer dict or list. + """ + if isinstance(obj, dict): + return {key: _mask_leaf_nodes(value) for key, value in obj.items()} + if isinstance(obj, list): + return [_mask_leaf_nodes(item) for item in obj] + # Mask leaf node + return sanitized_value def sanitize_body(body) -> str: if isinstance(body, str): body = json.loads(body) - flatten_body = _flatten_dict(body) - - for key in flatten_body: - if key.endswith(sanitized_keys): - flatten_body[key] = sanitized_value - - return str(_unflatten_dict(flatten_body)) + masked_body = _mask_leaf_nodes(body) + return str(masked_body) diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/helpers_es6.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/helpers_es6.py index 8169eb25c4..d6959e77e2 100644 --- a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/helpers_es6.py +++ b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/helpers_es6.py @@ -18,11 +18,11 @@ class Index: "doc": { "properties": { "title": { - "analyzer": "snowball", - "fields": {"raw": {"type": "keyword"}}, - "type": "text", + "analyzer": "?", + "fields": {"raw": {"type": "?"}}, + "type": "?", }, - "body": {"analyzer": "snowball", "type": "text"}, + "body": {"analyzer": "?", "type": "?"}, } } } diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/helpers_es7.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/helpers_es7.py index 377173f7ac..be3dbeb92b 100644 --- a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/helpers_es7.py +++ b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/helpers_es7.py @@ -17,11 +17,11 @@ class Index: "mappings": { "properties": { "title": { - "analyzer": "snowball", - "fields": {"raw": {"type": "keyword"}}, - "type": "text", + "analyzer": "?", + "fields": {"raw": {"type": "?"}}, + "type": "?", }, - "body": {"analyzer": "snowball", "type": "text"}, + "body": {"analyzer": "?", "type": "?"}, } } } diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/helpers_es8.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/helpers_es8.py index a450be68ec..6627cab68d 100644 --- a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/helpers_es8.py +++ b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/helpers_es8.py @@ -29,11 +29,11 @@ class Index: "mappings": { "properties": { "title": { - "analyzer": "snowball", - "fields": {"raw": {"type": "keyword"}}, - "type": "text", + "analyzer": "?", + "fields": {"raw": {"type": "?"}}, + "type": "?", }, - "body": {"analyzer": "snowball", "type": "text"}, + "body": {"analyzer": "?", "type": "?"}, } } } diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/sanitization_queries.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/sanitization_queries.py index 234e24433e..dc47775d81 100644 --- a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/sanitization_queries.py +++ b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/sanitization_queries.py @@ -44,22 +44,149 @@ } } +term_query = { + "query": { + "bool": { + "must": [ + {"term": {"user_email": "john.doe@company.com"}}, + {"term": {"ssn": "123-45-6789"}}, + {"term": {"credit_card": "4111-1111-1111-1111"}}, + ] + } + } +} + interval_query_sanitized = { "query": { "intervals": { - "my_text": {"all_of": {"ordered": True, "intervals": "?"}} + "my_text": { + "all_of": { + "ordered": "?", + "intervals": [ + { + "match": { + "query": "?", + "max_gaps": "?", + "ordered": "?", + } + }, + { + "any_of": { + "intervals": [ + {"match": {"query": "?"}}, + {"match": {"query": "?"}}, + ] + } + }, + ], + } + } } } } + match_query_sanitized = {"query": {"match": {"message": {"query": "?"}}}} + filter_query_sanitized = { "query": { "bool": { "must": [ - {"match": {"title": "Search"}}, - {"match": {"content": "Elasticsearch"}}, + {"match": {"title": "?"}}, + {"match": {"content": "?"}}, ], - "filter": "?", + "filter": [ + {"term": {"status": "?"}}, + {"range": {"publish_date": {"gte": "?"}}}, + ], + } + } +} + +term_query_sanitized = { + "query": { + "bool": { + "must": [ + {"term": {"user_email": "?"}}, + {"term": {"ssn": "?"}}, + {"term": {"credit_card": "?"}}, + ] } } } + +aggregation_query = { + "query": {"match_all": {}}, + "aggs": { + "price_ranges": { + "range": { + "field": "price", + "ranges": [ + {"to": 50, "key": "cheap"}, + {"from": 50, "to": 100, "key": "medium"}, + {"from": 100, "key": "expensive"}, + ], + } + }, + "avg_price": {"avg": {"field": "price"}}, + "top_tags": { + "terms": {"field": "tags", "size": 10, "order": {"_count": "desc"}} + }, + }, +} + +aggregation_query_sanitized = { + "query": {"match_all": {}}, + "aggs": { + "price_ranges": { + "range": { + "field": "?", + "ranges": [ + {"to": "?", "key": "?"}, + {"from": "?", "to": "?", "key": "?"}, + {"from": "?", "key": "?"}, + ], + } + }, + "avg_price": {"avg": {"field": "?"}}, + "top_tags": { + "terms": {"field": "?", "size": "?", "order": {"_count": "?"}} + }, + }, +} + +script_query = { + "query": { + "script": { + "script": { + "source": "doc['price'].value > params.threshold", + "lang": "painless", + "params": {"threshold": 100}, + } + } + }, + "script_fields": { + "discounted_price": { + "script": { + "source": "doc['price'].value * params.discount", + "params": {"discount": 0.9}, + } + } + }, +} + +script_query_sanitized = { + "query": { + "script": { + "script": { + "source": "?", + "lang": "?", + "params": {"threshold": "?"}, + } + } + }, + "script_fields": { + "discounted_price": { + "script": {"source": "?", "params": {"discount": "?"}} + } + }, +} diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py index fc2a4d3f44..d8386a2f4e 100644 --- a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py +++ b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py @@ -83,7 +83,9 @@ class TestElasticsearchIntegration(TestBase): "elasticsearch.url": "/test-index/_search", "elasticsearch.method": helpers.dsl_search_method, "elasticsearch.target": "test-index", - DB_STATEMENT: str({"query": {"bool": {"filter": "?"}}}), + DB_STATEMENT: str( + {"query": {"bool": {"filter": [{"term": {"author": "?"}}]}}} + ), } create_attributes = { @@ -419,8 +421,8 @@ def test_dsl_index(self, request_mock): self.assertEqual( literal_eval(span.attributes[DB_STATEMENT]), { - "body": "A few words here, a few words there", - "title": "About searching", + "body": "?", + "title": "?", }, ) @@ -582,6 +584,18 @@ def test_body_sanitization(self, _): sanitize_body(json.dumps(sanitization_queries.interval_query)), str(sanitization_queries.interval_query_sanitized), ) + self.assertEqual( + sanitize_body(sanitization_queries.term_query), + str(sanitization_queries.term_query_sanitized), + ) + self.assertEqual( + sanitize_body(sanitization_queries.aggregation_query), + str(sanitization_queries.aggregation_query_sanitized), + ) + self.assertEqual( + sanitize_body(sanitization_queries.script_query), + str(sanitization_queries.script_query_sanitized), + ) def test_bulk(self, request_mock): request_mock.return_value = helpers.mock_response("{}")