Skip to content

Commit 2cb3094

Browse files
tw4lSuaYoo
authored andcommitted
Track autoPausedEmailsSent state in db instead of crawl state
This is much more reliable, prevents duplicate emails as was sometimes happening before, and makes it easier to clear the state when a crawl is unpaused.
1 parent fb6428d commit 2cb3094

File tree

4 files changed

+38
-5
lines changed

4 files changed

+38
-5
lines changed

backend/btrixcloud/crawls.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -838,6 +838,11 @@ async def pause_crawl(
838838
if pause and not paused_at:
839839
paused_at = dt_now()
840840

841+
if not pause:
842+
# If unpausing, unset autoPausedEmailsSent so that we will send
843+
# emails again if quota is reached
844+
await self.set_auto_paused_emails_sent(crawl_id, org, False)
845+
841846
try:
842847
result = await self.crawl_manager.pause_resume_crawl(
843848
crawl_id, paused_at=paused_at
@@ -1210,6 +1215,7 @@ async def get_crawl_logs(
12101215
async def notify_org_admins_of_auto_paused_crawl(
12111216
self,
12121217
paused_reason: TYPE_AUTO_PAUSED_STATES,
1218+
crawl_id: str,
12131219
cid: UUID,
12141220
org: Organization,
12151221
):
@@ -1231,6 +1237,29 @@ async def notify_org_admins_of_auto_paused_crawl(
12311237
]
12321238
)
12331239

1240+
await self.set_auto_paused_emails_sent(crawl_id, org)
1241+
1242+
async def set_auto_paused_emails_sent(
1243+
self, crawl_id: str, org: Organization, emails_sent: bool = True
1244+
):
1245+
"""Set if auto-paused emails already sent"""
1246+
await self.crawls.find_one_and_update(
1247+
{"_id": crawl_id, "oid": org.id, "type": "crawl"},
1248+
{"$set": {"autoPausedEmailsSent": emails_sent}},
1249+
)
1250+
1251+
async def get_auto_paused_emails_sent(
1252+
self, crawl_id: str, org: Organization
1253+
) -> bool:
1254+
"""Return whether auto-paused emails already sent for crawl"""
1255+
res = await self.crawls.find_one(
1256+
{"_id": crawl_id, "oid": org.id, "type": "crawl"},
1257+
projection=["autoPausedEmailsSent"],
1258+
)
1259+
if res:
1260+
return res.get("autoPausedEmailsSent", False)
1261+
return False
1262+
12341263

12351264
# ============================================================================
12361265
async def recompute_crawl_file_count_and_size(crawls, crawl_id: str):

backend/btrixcloud/models.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1092,6 +1092,8 @@ class Crawl(BaseCrawl, CrawlConfigCore):
10921092

10931093
pendingSize: int = 0
10941094

1095+
autoPausedEmailsSent: bool = False
1096+
10951097

10961098
# ============================================================================
10971099
class CrawlCompleteIn(BaseModel):

backend/btrixcloud/operator/crawls.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1654,13 +1654,18 @@ async def update_crawl_state(
16541654
allowed_from=RUNNING_AND_WAITING_STATES,
16551655
)
16561656

1657-
if paused_state != "paused" and not status.autoPausedEmailsSent:
1657+
if (
1658+
paused_state != "paused"
1659+
and not await self.crawl_ops.get_auto_paused_emails_sent(
1660+
crawl.id, crawl.org
1661+
)
1662+
):
16581663
await self.crawl_ops.notify_org_admins_of_auto_paused_crawl(
16591664
paused_reason=paused_state,
1665+
crawl_id=crawl.id,
16601666
cid=crawl.cid,
16611667
org=crawl.org,
16621668
)
1663-
status.autoPausedEmailsSent = True
16641669

16651670
return status
16661671

backend/btrixcloud/operator/models.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,3 @@ class CrawlStatus(BaseModel):
272272

273273
# last state
274274
last_state: TYPE_ALL_CRAWL_STATES = Field(default="starting", exclude=True)
275-
276-
# email sent to org admins because crawl was auto-paused
277-
autoPausedEmailsSent: bool = False

0 commit comments

Comments
 (0)