Skip to content

Commit 96b0f60

Browse files
Mihir-MavalankarJesse-Box
authored andcommitted
feat(autofix): Duplicate check for automation runs (#102933)
## PR Details + For the V0 launch of triage signals we are changing the conditions for starting an autofix run. + More details explanation here: https://www.notion.so/sentry/Triage-Signals-V0-Technical-Implementation-Details-2a18b10e4b5d8086a7ceddaf4194849a?source=copy_link#2a18b10e4b5d80d2ab2ce33d12db52e0 + As mentioned in the doc we can't rely on seer_fixability_score alone going forward and hence this PR adds cache based check + seer_autofix_last_triggered as a check. + A new column is overkill for this. @wedamija was right to point it out [in this PR.](#102898) Adding a redis cache based check is enough.
1 parent 6dd416c commit 96b0f60

File tree

2 files changed

+144
-1
lines changed

2 files changed

+144
-1
lines changed

src/sentry/tasks/post_process.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1628,7 +1628,7 @@ def kick_off_seer_automation(job: PostProcessJob) -> None:
16281628
event = job["event"]
16291629
group = event.group
16301630

1631-
# Only run on issues with no existing scan
1631+
# Only run on issues with no existing scan - TODO: Update condition for triage signals V0
16321632
if group.seer_fixability_score is not None:
16331633
return
16341634

@@ -1660,6 +1660,13 @@ def kick_off_seer_automation(job: PostProcessJob) -> None:
16601660
):
16611661
return
16621662

1663+
# Check if automation has already been queued or completed for this group
1664+
# seer_autofix_last_triggered is set when trigger_autofix is successfully started.
1665+
# Use cache with short TTL to hold lock for a short since it takes a few minutes to set seer_autofix_last_triggeredes
1666+
cache_key = f"seer_automation_queued:{group.id}"
1667+
if cache.get(cache_key) or group.seer_autofix_last_triggered is not None:
1668+
return
1669+
16631670
# Don't run if there's already a task in progress for this issue
16641671
lock_key, lock_name = get_issue_summary_lock_key(group.id)
16651672
lock = locks.get(lock_key, duration=1, name=lock_name)
@@ -1684,6 +1691,11 @@ def kick_off_seer_automation(job: PostProcessJob) -> None:
16841691
if is_seer_scanner_rate_limited(project, group.organization):
16851692
return
16861693

1694+
# cache.add uses Redis SETNX which atomically sets the key only if it doesn't exist
1695+
# Returns False if another process already set the key, ensuring only one process proceeds
1696+
if not cache.add(cache_key, True, timeout=600): # 10 minute
1697+
return
1698+
16871699
start_seer_automation.delay(group.id)
16881700

16891701

tests/sentry/tasks/test_post_process.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3053,6 +3053,137 @@ def test_kick_off_seer_automation_with_hide_ai_features_enabled(
30533053

30543054
mock_start_seer_automation.assert_not_called()
30553055

3056+
@patch(
3057+
"sentry.seer.seer_setup.get_seer_org_acknowledgement",
3058+
return_value=True,
3059+
)
3060+
@patch("sentry.tasks.autofix.start_seer_automation.delay")
3061+
@with_feature("organizations:gen-ai-features")
3062+
def test_kick_off_seer_automation_skips_when_seer_autofix_last_triggered_set(
3063+
self, mock_start_seer_automation, mock_get_seer_org_acknowledgement
3064+
):
3065+
"""Test that automation is skipped when group.seer_autofix_last_triggered is already set"""
3066+
self.project.update_option("sentry:seer_scanner_automation", True)
3067+
event = self.create_event(
3068+
data={"message": "testing"},
3069+
project_id=self.project.id,
3070+
)
3071+
3072+
# Set seer_autofix_last_triggered on the group to simulate autofix already triggered
3073+
group = event.group
3074+
group.seer_autofix_last_triggered = timezone.now()
3075+
group.save()
3076+
3077+
self.call_post_process_group(
3078+
is_new=True,
3079+
is_regression=False,
3080+
is_new_group_environment=True,
3081+
event=event,
3082+
)
3083+
3084+
mock_start_seer_automation.assert_not_called()
3085+
3086+
@patch(
3087+
"sentry.seer.seer_setup.get_seer_org_acknowledgement",
3088+
return_value=True,
3089+
)
3090+
@patch("sentry.tasks.autofix.start_seer_automation.delay")
3091+
@with_feature("organizations:gen-ai-features")
3092+
def test_kick_off_seer_automation_skips_when_cache_key_exists(
3093+
self, mock_start_seer_automation, mock_get_seer_org_acknowledgement
3094+
):
3095+
"""Test that automation is skipped when cache key indicates it's already queued"""
3096+
self.project.update_option("sentry:seer_scanner_automation", True)
3097+
event = self.create_event(
3098+
data={"message": "testing"},
3099+
project_id=self.project.id,
3100+
)
3101+
3102+
# Set cache key to simulate automation already queued
3103+
cache_key = f"seer_automation_queued:{event.group.id}"
3104+
cache.set(cache_key, True, timeout=600)
3105+
3106+
self.call_post_process_group(
3107+
is_new=True,
3108+
is_regression=False,
3109+
is_new_group_environment=True,
3110+
event=event,
3111+
)
3112+
3113+
mock_start_seer_automation.assert_not_called()
3114+
3115+
# Cleanup
3116+
cache.delete(cache_key)
3117+
3118+
@patch(
3119+
"sentry.seer.seer_setup.get_seer_org_acknowledgement",
3120+
return_value=True,
3121+
)
3122+
@patch("sentry.tasks.autofix.start_seer_automation.delay")
3123+
@with_feature("organizations:gen-ai-features")
3124+
def test_kick_off_seer_automation_uses_atomic_cache_add(
3125+
self, mock_start_seer_automation, mock_get_seer_org_acknowledgement
3126+
):
3127+
"""Test that cache.add atomic operation prevents race conditions"""
3128+
self.project.update_option("sentry:seer_scanner_automation", True)
3129+
event = self.create_event(
3130+
data={"message": "testing"},
3131+
project_id=self.project.id,
3132+
)
3133+
3134+
cache_key = f"seer_automation_queued:{event.group.id}"
3135+
3136+
with patch("sentry.tasks.post_process.cache") as mock_cache:
3137+
# Simulate cache.get returning None (not in cache)
3138+
# but cache.add returning False (another process set it)
3139+
mock_cache.get.return_value = None
3140+
mock_cache.add.return_value = False
3141+
3142+
self.call_post_process_group(
3143+
is_new=True,
3144+
is_regression=False,
3145+
is_new_group_environment=True,
3146+
event=event,
3147+
)
3148+
3149+
# Should check cache but not call automation due to cache.add returning False
3150+
mock_cache.get.assert_called()
3151+
mock_cache.add.assert_called_once_with(cache_key, True, timeout=600)
3152+
mock_start_seer_automation.assert_not_called()
3153+
3154+
@patch(
3155+
"sentry.seer.seer_setup.get_seer_org_acknowledgement",
3156+
return_value=True,
3157+
)
3158+
@patch("sentry.tasks.autofix.start_seer_automation.delay")
3159+
@with_feature("organizations:gen-ai-features")
3160+
def test_kick_off_seer_automation_proceeds_when_cache_add_succeeds(
3161+
self, mock_start_seer_automation, mock_get_seer_org_acknowledgement
3162+
):
3163+
"""Test that automation proceeds when cache.add succeeds (no race condition)"""
3164+
self.project.update_option("sentry:seer_scanner_automation", True)
3165+
event = self.create_event(
3166+
data={"message": "testing"},
3167+
project_id=self.project.id,
3168+
)
3169+
3170+
# Ensure seer_autofix_last_triggered is not set
3171+
assert event.group.seer_autofix_last_triggered is None
3172+
3173+
self.call_post_process_group(
3174+
is_new=True,
3175+
is_regression=False,
3176+
is_new_group_environment=True,
3177+
event=event,
3178+
)
3179+
3180+
# Should successfully queue automation
3181+
mock_start_seer_automation.assert_called_once_with(event.group.id)
3182+
3183+
# Cleanup
3184+
cache_key = f"seer_automation_queued:{event.group.id}"
3185+
cache.delete(cache_key)
3186+
30563187

30573188
class PostProcessGroupErrorTest(
30583189
TestCase,

0 commit comments

Comments
 (0)