Skip to content

Commit e9582f2

Browse files
chore(profiling): fix flaky test_upload_resets_profile test (#15178)
## Description Some tests try to read the newest profile and act on it. One such test, `test_upload_resets_profile`, is flaky on MacOS (but strangely not on Linux in the CI). It sometimes reads an empty profile (which is expected), and sometimes does not (the empty-file assert is not triggered and test fails). The current way of finding the latest file in `pprof_utils.parse_newest_profile` is error-prone because the latest file `ctime`, metadata change time, is not always a true "latest" file; this can happen for a variety of reasons, e.g. one file gets written faster than the other one, because it's smaller ("empty"). _Example:_ * `file.foo.1` created at `t0` (sequence: 1) * `file.foo.0` created at `t1` (sequence: 0) Logically, `file_1` is the latest file, because that is the sequence number assigned to it; however, currently we think it is `file_0` (t1 > t0). ### What changed ### * Instead of sorting by `ctime`, we sort by the "logical" time (i.e. the file's seq_num) ## Testing * multiple consecutive local runs on my Mac pass reliably (10+ tries)
1 parent 7782a53 commit e9582f2

File tree

2 files changed

+35
-36
lines changed

2 files changed

+35
-36
lines changed

tests/profiling/collector/pprof_utils.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,9 @@ def parse_newest_profile(filename_prefix: str) -> pprof_pb2.Profile:
142142
the newest profile that has given filename prefix.
143143
"""
144144
files = glob.glob(filename_prefix + ".*")
145-
# Sort files by creation timestamp (oldest first, newest last)
146-
files.sort(key=lambda f: os.path.getctime(f))
145+
# Sort files by logical timestamp (i.e. the sequence number, which is monotonically increasing);
146+
# this approach is more reliable than filesystem timestamps, especially when files are created rapidly.
147+
files.sort(key=lambda f: int(f.rsplit(".", 1)[-1]))
147148
filename = files[-1]
148149
with open(filename, "rb") as fp:
149150
dctx = zstd.ZstdDecompressor()

tests/profiling_v2/collector/test_threading.py

Lines changed: 32 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,47 +1022,45 @@ def test_upload_resets_profile(self) -> None:
10221022
with self.collector_class(capture_pct=100):
10231023
with self.lock_class(): # !CREATE! !ACQUIRE! !RELEASE! test_upload_resets_profile
10241024
pass
1025+
10251026
ddup.upload() # pyright: ignore[reportCallIssue]
10261027

10271028
linenos: LineNo = get_lock_linenos("test_upload_resets_profile", with_stmt=True)
10281029

1029-
try:
1030-
pprof: pprof_pb2.Profile = pprof_utils.parse_newest_profile(self.output_filename)
1031-
pprof_utils.assert_lock_events(
1032-
pprof,
1033-
expected_acquire_events=[
1034-
pprof_utils.LockAcquireEvent(
1035-
caller_name=self.test_name,
1036-
filename=os.path.basename(__file__),
1037-
linenos=linenos,
1038-
),
1039-
],
1040-
expected_release_events=[
1041-
pprof_utils.LockReleaseEvent(
1042-
caller_name=self.test_name,
1043-
filename=os.path.basename(__file__),
1044-
linenos=linenos,
1045-
),
1046-
],
1047-
)
1048-
except (AssertionError, KeyError) as e:
1049-
# This can be flaky due to timing or interference from other tests
1050-
pytest.skip(f"Profile validation failed (known flaky on some platforms): {e}")
1030+
pprof: pprof_pb2.Profile = pprof_utils.parse_newest_profile(self.output_filename)
1031+
pprof_utils.assert_lock_events(
1032+
pprof,
1033+
expected_acquire_events=[
1034+
pprof_utils.LockAcquireEvent(
1035+
caller_name=self.test_name,
1036+
filename=os.path.basename(__file__),
1037+
linenos=linenos,
1038+
),
1039+
],
1040+
expected_release_events=[
1041+
pprof_utils.LockReleaseEvent(
1042+
caller_name=self.test_name,
1043+
filename=os.path.basename(__file__),
1044+
linenos=linenos,
1045+
),
1046+
],
1047+
)
1048+
1049+
# Now we call upload() again, and we expect the profile to be empty
1050+
num_files_before_second_upload: int = len(glob.glob(self.output_filename + ".*"))
10511051

1052-
# Now we call upload() again without any new lock operations
1053-
# We expect the profile to be empty or contain no samples
10541052
ddup.upload() # pyright: ignore[reportCallIssue]
10551053

1056-
# Try to parse the newest profile - it should either not exist (no new file)
1057-
# or have no samples (which would raise AssertionError in parse_newest_profile)
1058-
try:
1059-
_ = pprof_utils.parse_newest_profile(self.output_filename)
1060-
# If we got here, a profile with samples exists
1061-
# This might be okay if other collectors are running
1062-
pytest.skip("Profile still has samples (possibly from other activity - known flaky)")
1063-
except (AssertionError, IndexError):
1064-
# Expected: no profile file or no samples
1065-
pass
1054+
num_files_after_second_upload: int = len(glob.glob(self.output_filename + ".*"))
1055+
1056+
# A new profile file should always be created (upload_seq increments)
1057+
assert (
1058+
num_files_after_second_upload - num_files_before_second_upload == 1
1059+
), f"Expected 1 new file, got {num_files_after_second_upload - num_files_before_second_upload}."
1060+
1061+
# The newest profile file should be empty (no samples), which causes an AssertionError
1062+
with pytest.raises(AssertionError, match="No samples found in profile"):
1063+
pprof_utils.parse_newest_profile(self.output_filename)
10661064

10671065
def test_lock_hash(self) -> None:
10681066
"""Test that __hash__ allows profiled locks to be used in sets and dicts."""

0 commit comments

Comments
 (0)