Skip to content

Commit 8388003

Browse files
Neehar DuvvuriNeehar Duvvuri
authored andcommitted
implement scrubber for sensitive information
1 parent 201af13 commit 8388003

File tree

1 file changed

+26
-6
lines changed
  • sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate

1 file changed

+26
-6
lines changed

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1058,6 +1058,7 @@ def _log_events_to_app_insights(
10581058
events: List[Dict[str, Any]],
10591059
log_attributes: Dict[str, Any],
10601060
data_source_item: Optional[Dict[str, Any]] = None,
1061+
resource=None, # Resource to attach to LogRecords
10611062
) -> None:
10621063
"""
10631064
Log independent events directly to App Insights using OpenTelemetry logging.
@@ -1071,6 +1072,8 @@ def _log_events_to_app_insights(
10711072
:type log_attributes: Dict[str, Any]
10721073
:param data_source_item: Data source item containing trace_id, response_id, conversation_id
10731074
:type data_source_item: Optional[Dict[str, Any]]
1075+
:param resource: Resource to attach to LogRecords for anonymization
1076+
:type resource: Optional[Resource]
10741077
"""
10751078

10761079
from opentelemetry.sdk._logs import LogRecord
@@ -1155,12 +1158,16 @@ def _log_events_to_app_insights(
11551158
if agent_id:
11561159
log_attributes["gen_ai.agent.id"] = agent_id
11571160

1158-
# Create a LogRecord and emit it
1161+
# Anonymize IP address to prevent Azure GeoIP enrichment and location tracking
1162+
log_attributes["http.client_ip"] = "0.0.0.0"
1163+
1164+
# Create a LogRecord and emit it, passing the resource for anonymization
11591165
log_record = LogRecord(
11601166
timestamp=time.time_ns(),
11611167
observed_timestamp=time.time_ns(),
11621168
body=EVALUATION_EVENT_NAME,
11631169
attributes=log_attributes,
1170+
resource=resource, # Pass the anonymized resource
11641171
)
11651172
if trace_id:
11661173
log_record.trace_id = trace_id
@@ -1195,18 +1202,30 @@ def emit_eval_result_events_to_app_insights(app_insights_config: AppInsightsConf
11951202
return
11961203

11971204
try:
1198-
# Configure OpenTelemetry logging
1199-
logger_provider = LoggerProvider()
1205+
# Configure OpenTelemetry logging with anonymized Resource attributes
1206+
from opentelemetry.sdk.resources import Resource
1207+
from opentelemetry.semconv.resource import ResourceAttributes
1208+
1209+
# Create a resource with minimal attributes to prevent sensitive data collection
1210+
# SERVICE_INSTANCE_ID maps to cloud_RoleInstance in Azure Monitor and prevents
1211+
# Azure Monitor from auto-detecting the device hostname
1212+
anonymized_resource = Resource.create({
1213+
ResourceAttributes.SERVICE_NAME: "unknown",
1214+
ResourceAttributes.SERVICE_INSTANCE_ID: "unknown",
1215+
})
1216+
1217+
logger_provider = LoggerProvider(resource=anonymized_resource)
12001218
_logs.set_logger_provider(logger_provider)
12011219

12021220
# Create Azure Monitor log exporter
12031221
azure_log_exporter = AzureMonitorLogExporter(connection_string=app_insights_config["connection_string"])
12041222

1205-
# Add the exporter to the logger provider
1223+
# Add the Azure Monitor exporter to the logger provider
12061224
logger_provider.add_log_record_processor(BatchLogRecordProcessor(azure_log_exporter))
12071225

1208-
# Create a logger
1209-
otel_logger = _logs.get_logger(__name__)
1226+
# Create a logger from OUR configured logger_provider (not the global one)
1227+
# This ensures the logger uses our anonymized resource
1228+
otel_logger = logger_provider.get_logger(__name__)
12101229

12111230
# Initialize base log attributes with extra_attributes if present, otherwise empty dict
12121231
base_log_attributes = app_insights_config.get("extra_attributes", {})
@@ -1230,6 +1249,7 @@ def emit_eval_result_events_to_app_insights(app_insights_config: AppInsightsConf
12301249
events=result["results"],
12311250
log_attributes=log_attributes,
12321251
data_source_item=result["datasource_item"] if "datasource_item" in result else None,
1252+
resource=anonymized_resource, # Pass the anonymized resource
12331253
)
12341254
# Force flush to ensure events are sent
12351255
logger_provider.force_flush()

0 commit comments

Comments
 (0)