Skip to content

Commit b8db930

Browse files
committed
Using context
1 parent 638b8ea commit b8db930

File tree

2 files changed

+63
-34
lines changed

2 files changed

+63
-34
lines changed

scripts/check-urls.py

Lines changed: 15 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
Check them with CURL
2121
"""
2222

23-
JOIN_TIMEOUT_SEC: int = 120
23+
JOIN_TIMEOUT_SEC = 120
2424

2525
CURL_EXIT_CODES_AND_HTTP_CODES: dict[str, tuple[int, int | None]] = {
2626
"https://api.aspose.cloud/connect/token": (CURL_EXIT_CODES.HTTP_RETURNED_ERROR, 400),
@@ -36,15 +36,15 @@
3636
re.compile(r"^https://github\.com/(?P<user>[^/]+)/(?P<repo>[^/]+)/(?:blob|issues)/\S+$"),
3737
]
3838

39-
URLS_TO_IGNORE: frozenset[str] = frozenset(
39+
URLS_TO_IGNORE = frozenset(
4040
[
4141
"https://api.aspose.cloud",
4242
"https://www.aspose.cloud/404",
4343
"https://www.aspose.cloud/404/",
4444
]
4545
)
4646

47-
IGNORE_DOMAINS: Subdomains = Subdomains(
47+
IGNORE_DOMAINS = Subdomains(
4848
[
4949
".android.com",
5050
".apache.org",
@@ -81,10 +81,10 @@
8181
]
8282
)
8383

84-
URL_END_CHARS: str = r",#\)\"'<>\*\s\\"
85-
URL_RE_PATTERN: str = r"(https*://[^{0}]+)[{0}]?".format(URL_END_CHARS)
84+
URL_END_CHARS = r",#\)\"'<>\*\s\\"
85+
URL_RE_PATTERN = r"(https*://[^{0}]+)[{0}]?".format(URL_END_CHARS)
8686
# print(URL_RE_PATTERN)
87-
EXTRACT_URL_REGEX: re.Pattern[str] = re.compile(URL_RE_PATTERN, re.MULTILINE)
87+
EXTRACT_URL_REGEX = re.compile(URL_RE_PATTERN, re.MULTILINE)
8888

8989
# URL : [Files]
9090
EXTRACTED_URLS_WITH_FILES: dict[str, list[str]] = {k: [] for k in URLS_TO_IGNORE}
@@ -128,7 +128,7 @@ def url_extractor(text: str, filename: str) -> typing.Generator[str, None, None]
128128
EXTRACTED_URLS_WITH_FILES[url].append(filename)
129129

130130

131-
FILES_TO_IGNORE: frozenset[str] = frozenset(
131+
FILES_TO_IGNORE = frozenset(
132132
[
133133
".jar",
134134
".jar",
@@ -153,7 +153,7 @@ def text_extractor(files: list[str]) -> typing.Generator[tuple[str, str], None,
153153
raise
154154

155155

156-
JOB_SUMMARY: JobSummary = JobSummary(os.environ.get("GITHUB_STEP_SUMMARY", "step_summary.md"))
156+
JOB_SUMMARY = JobSummary(os.environ.get("GITHUB_STEP_SUMMARY", "step_summary.md"))
157157
JOB_SUMMARY.add_header("Test all URLs")
158158

159159

@@ -162,32 +162,15 @@ def main(files: list[str]) -> int:
162162
expectations=CURL_EXIT_CODES_AND_HTTP_CODES,
163163
)
164164

165-
# Setup signal handlers for graceful shutdown
166-
def _handle_signal(_sig: int, _frame: typing.Any) -> None:
167-
url_checker.stop()
168-
169-
with contextlib.suppress(Exception):
170-
signal.signal(signal.SIGINT, _handle_signal)
171-
signal.signal(signal.SIGTERM, _handle_signal)
172-
173-
checker = threading.Thread(target=url_checker.run, daemon=True)
174-
checker.start()
175-
176-
for filename, text in text_extractor(files):
177-
for url in url_extractor(text, filename):
178-
# print("In:", url)
179-
url_checker.add_url(url)
180-
url_checker.close()
181-
checker.join(timeout=JOIN_TIMEOUT_SEC)
182-
if checker.is_alive():
183-
print(
184-
f"URL checker did not finish within {JOIN_TIMEOUT_SEC}s; exiting early.",
185-
file=sys.stderr,
186-
flush=True,
187-
)
165+
with url_checker.start() as checker:
166+
for filename, text in text_extractor(files):
167+
for url in url_extractor(text, filename):
168+
checker.add_url(url)
169+
checker.wait(JOIN_TIMEOUT_SEC)
170+
results = url_checker.results
188171

189172
# Collect results and write summary
190-
for res in url_checker.results:
173+
for res in results:
191174
if res.ok:
192175
JOB_SUMMARY.add_success(res.url)
193176
else:

scripts/url_checker.py

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
import time
44
from dataclasses import dataclass
55
from queue import Queue, Empty
6-
from typing import Callable, Optional
6+
from typing import Callable, Optional, Iterable
7+
from types import TracebackType
8+
import threading
79

810
from curl_wrapper import CurlWrapper, EXIT_CODES
911

@@ -39,12 +41,16 @@ def __init__(
3941
self.stop_event = False
4042
self.next_report_age_sec = 5
4143
self.results: list[CheckResult] = []
44+
self._thread: threading.Thread | None = None
45+
self._closed: bool = False
4246

4347
def add_url(self, url: str) -> None:
4448
self.queue.put_nowait(url)
4549

4650
def close(self) -> None:
47-
self.queue.put_nowait(None)
51+
if not self._closed:
52+
self._closed = True
53+
self.queue.put_nowait(None)
4854

4955
def stop(self) -> None:
5056
self.stop_event = True
@@ -95,6 +101,46 @@ def run(self) -> None:
95101
time.sleep(0.2)
96102
print("Worker finished")
97103

104+
# Context management and user-friendly API
105+
def start(self) -> "UrlChecker":
106+
if self._thread is not None:
107+
return self
108+
self._thread = threading.Thread(target=self.run, daemon=True)
109+
self._thread.start()
110+
return self
111+
112+
def __enter__(self) -> "UrlChecker":
113+
return self.start()
114+
115+
def __exit__(
116+
self,
117+
exc_type: type[BaseException] | None,
118+
exc: BaseException | None,
119+
tb: TracebackType | None,
120+
) -> None:
121+
# Ensure we signal end of input and wait for completion
122+
self.close()
123+
self.wait()
124+
125+
def wait(self, join_timeout_sec: float | None = None) -> None:
126+
# Ensure end-of-input signaled before waiting
127+
self.close()
128+
t = self._thread
129+
if t is None:
130+
return
131+
if join_timeout_sec is not None:
132+
t.join(timeout=join_timeout_sec)
133+
if t.is_alive():
134+
# Try to stop gracefully and inform user
135+
self.stop()
136+
print(
137+
f"URL checker did not finish within {join_timeout_sec}s; exiting early.",
138+
file=sys.stderr,
139+
flush=True,
140+
)
141+
else:
142+
t.join()
143+
98144
def _process_finished(self, task: CurlWrapper) -> None:
99145
expected_ret_code, expected_http_code = self.expectations.get(task.url, (0, None))
100146

0 commit comments

Comments
 (0)