Skip to content

Commit 6dd416c

Browse files
roagaJesse-Box
authored andcommitted
ref(explorer): improve profile indexing query (#103014)
Improves `get_profiles_for_trace` to more reliably get all unique profiles and their full time ranges. Before we were likely to miss them since we just sampled 50 spans from the start of the trace. Also simplifies the logic after the query a lot. And should help avoid ReadTimeouts and exceeding Snuba quota
1 parent e010ac5 commit 6dd416c

File tree

3 files changed

+186
-133
lines changed

3 files changed

+186
-133
lines changed

src/sentry/seer/explorer/index_data.py

Lines changed: 37 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ def _fetch_and_process_profile(
233233
concurrently from multiple threads.
234234
235235
Args:
236-
profile_info: Dictionary containing profile metadata (profile_id, span_id, etc.)
236+
profile_info: Dictionary containing profile metadata (profile_id, is_continuous, start_ts, end_ts)
237237
organization_id: Organization ID
238238
project_id: Project ID
239239
trace_id: Trace ID for logging
@@ -242,7 +242,6 @@ def _fetch_and_process_profile(
242242
ProfileData if successful, None otherwise
243243
"""
244244
profile_id = profile_info["profile_id"]
245-
span_id = profile_info["span_id"]
246245
transaction_name = profile_info["transaction_name"]
247246
is_continuous = profile_info["is_continuous"]
248247
start_ts = profile_info["start_ts"]
@@ -275,7 +274,6 @@ def _fetch_and_process_profile(
275274
if execution_tree:
276275
return ProfileData(
277276
profile_id=profile_id,
278-
span_id=span_id,
279277
transaction_name=transaction_name,
280278
execution_tree=execution_tree,
281279
project_id=project_id,
@@ -297,7 +295,7 @@ def _fetch_and_process_profile(
297295

298296
def get_profiles_for_trace(trace_id: str, project_id: int) -> TraceProfiles | None:
299297
"""
300-
Get profiles for a given trace, with one profile per unique span/transaction.
298+
Get profiles for a given trace, supporting both transaction and continuous profiles.
301299
302300
Args:
303301
trace_id: The trace ID to find profiles for
@@ -328,129 +326,77 @@ def get_profiles_for_trace(trace_id: str, project_id: int) -> TraceProfiles | No
328326
auto_fields=True,
329327
)
330328

331-
# Step 1: Find spans in the trace that have profile data - using same constraint as flamegraph
332-
profiling_constraint = "(has:profile.id) or (has:profiler.id has:thread.id)"
329+
# Use aggregation query to get unique profile IDs and trace time range
330+
# Query for both transaction profiles (profile.id) and continuous profiles (profiler.id)
333331
profiles_result = Spans.run_table_query(
334332
params=snuba_params,
335-
query_string=f"trace:{trace_id} project.id:{project_id} {profiling_constraint}",
333+
query_string=f"trace:{trace_id} project.id:{project_id} (has:profile.id OR has:profiler.id)",
336334
selected_columns=[
337-
"span_id",
338335
"profile.id",
339336
"profiler.id",
340-
"thread.id",
341-
"transaction",
342-
"span.op",
343-
"is_transaction",
344-
"precise.start_ts",
345-
"precise.finish_ts",
337+
"min(precise.start_ts)",
338+
"max(precise.finish_ts)",
346339
],
347-
orderby=["precise.start_ts"],
340+
orderby=[],
348341
offset=0,
349-
limit=50,
342+
limit=5,
350343
referrer=Referrer.SEER_RPC,
351344
config=config,
352345
sampling_mode="NORMAL",
353346
)
354347

355-
# Step 2: Collect all profiles and merge those with same profile_id and is_continuous
356-
all_profiles = []
348+
profile_data = []
357349

358350
for row in profiles_result.get("data", []):
359-
span_id = row.get("span_id")
360351
profile_id = row.get("profile.id") # Transaction profiles
361352
profiler_id = row.get("profiler.id") # Continuous profiles
362-
transaction_name = row.get("transaction")
363-
start_ts = row.get("precise.start_ts")
364-
end_ts = row.get("precise.finish_ts")
365-
366-
logger.info(
367-
"Iterating over span to get profiles",
368-
extra={
369-
"span_id": span_id,
370-
"profile_id": profile_id,
371-
"profiler_id": profiler_id,
372-
"transaction_name": transaction_name,
373-
},
374-
)
375-
376-
if not span_id:
377-
logger.info(
378-
"Span doesn't have an id, skipping",
379-
extra={"span_id": span_id},
380-
)
381-
continue
353+
start_ts = row.get("min(precise.start_ts)")
354+
end_ts = row.get("max(precise.finish_ts)")
382355

383-
# Use profile.id first (transaction profiles), fallback to profiler.id (continuous profiles)
384-
actual_profile_id = profile_id or profiler_id
356+
actual_profile_id = profiler_id or profile_id
385357
if not actual_profile_id:
386-
logger.info(
387-
"Span doesn't have a profile or profiler id, skipping",
388-
extra={"span_id": span_id},
389-
)
390358
continue
391359

392-
# Determine if this is a continuous profile (profiler.id without profile.id)
393-
is_continuous = profile_id is None and profiler_id is not None
360+
is_continuous = profiler_id is not None
394361

395-
all_profiles.append(
362+
profile_data.append(
396363
{
397-
"span_id": span_id,
398364
"profile_id": actual_profile_id,
399-
"transaction_name": transaction_name,
400365
"is_continuous": is_continuous,
401366
"start_ts": start_ts,
402367
"end_ts": end_ts,
403368
}
404369
)
405370

406-
# Merge profiles with same profile_id and is_continuous
407-
# Use the earliest start_ts and latest end_ts for merged profiles
408-
profile_groups = {}
409-
for profile in all_profiles:
410-
key = (profile["profile_id"], profile["is_continuous"])
411-
412-
if key not in profile_groups:
413-
profile_groups[key] = {
414-
"span_id": profile["span_id"], # Keep the first span_id
415-
"profile_id": profile["profile_id"],
416-
"transaction_name": profile["transaction_name"],
417-
"is_continuous": profile["is_continuous"],
418-
"start_ts": profile["start_ts"],
419-
"end_ts": profile["end_ts"],
420-
}
421-
else:
422-
# Merge time ranges - use earliest start and latest end
423-
existing = profile_groups[key]
424-
if profile["start_ts"] and (
425-
existing["start_ts"] is None or profile["start_ts"] < existing["start_ts"]
426-
):
427-
existing["start_ts"] = profile["start_ts"]
428-
if profile["end_ts"] and (
429-
existing["end_ts"] is None or profile["end_ts"] > existing["end_ts"]
430-
):
431-
existing["end_ts"] = profile["end_ts"]
432-
433-
unique_profiles = list(profile_groups.values())
434-
435-
logger.info(
436-
"Merged profiles",
437-
extra={
438-
"original_count": len(all_profiles),
439-
"merged_count": len(unique_profiles),
440-
},
441-
)
442-
443-
if not unique_profiles:
371+
if not profile_data:
444372
logger.info(
445373
"No profiles found for trace",
446374
extra={"trace_id": trace_id, "project_id": project_id},
447375
)
448376
return None
449377

450-
# Step 3: Fetch and process profiles in parallel
378+
logger.info(
379+
"Found unique profiles for trace",
380+
extra={
381+
"trace_id": trace_id,
382+
"profile_count": len(profile_data),
383+
},
384+
)
385+
386+
# Fetch and process profiles in parallel
451387
processed_profiles = []
388+
profiles_to_fetch = [
389+
{
390+
"profile_id": p["profile_id"],
391+
"transaction_name": None,
392+
"is_continuous": p["is_continuous"],
393+
"start_ts": p["start_ts"],
394+
"end_ts": p["end_ts"],
395+
}
396+
for p in profile_data
397+
]
452398

453-
with ThreadPoolExecutor(max_workers=min(len(unique_profiles), 10)) as executor:
399+
with ThreadPoolExecutor(max_workers=min(len(profiles_to_fetch), 5)) as executor:
454400
future_to_profile = {
455401
executor.submit(
456402
_fetch_and_process_profile,
@@ -459,7 +405,7 @@ def get_profiles_for_trace(trace_id: str, project_id: int) -> TraceProfiles | No
459405
project_id,
460406
trace_id,
461407
): profile_info
462-
for profile_info in unique_profiles
408+
for profile_info in profiles_to_fetch
463409
}
464410

465411
for future in as_completed(future_to_profile):

src/sentry/seer/sentry_data_models.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ class ExecutionTreeNode(BaseModel):
5858

5959
class ProfileData(BaseModel):
6060
profile_id: str
61-
span_id: str
6261
transaction_name: str | None
6362
execution_tree: list[ExecutionTreeNode]
6463
project_id: int

0 commit comments

Comments
 (0)