From 3e970e8f2fcfad3e0fe21833549ea6a46f5e8866 Mon Sep 17 00:00:00 2001 From: Yuxuan Hu Date: Tue, 4 Nov 2025 16:17:06 -0800 Subject: [PATCH] Add metrics for sampled policy age in replay buffer This adds three new metrics to track the policy age of episodes that are actually sampled from the replay buffer: - buffer/sample/avg_sampled_policy_age: Average age of sampled episodes - buffer/sample/max_sampled_policy_age: Maximum age of sampled episodes - buffer/sample/min_sampled_policy_age: Minimum age of sampled episodes This is distinct from the existing buffer/evict/avg_policy_age metric which tracks the age of all episodes remaining in the buffer after eviction. The new metrics provide visibility into whether training is using fresh data (low ages) or stale data (high ages) at sampling time. Test Plan: - Ran existing unit tests: python -m pytest tests/unit_tests/test_replay_buffer.py -v - All 8 tests passed --- src/forge/actors/replay_buffer.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/forge/actors/replay_buffer.py b/src/forge/actors/replay_buffer.py index 77b4f6f8a..22aabe292 100644 --- a/src/forge/actors/replay_buffer.py +++ b/src/forge/actors/replay_buffer.py @@ -120,6 +120,27 @@ async def sample( entry.sample_count += 1 sampled_episodes.append(entry.data) + # Calculate and record policy age metrics for sampled episodes + sampled_policy_ages = [ + curr_policy_version - ep.policy_version for ep in sampled_episodes + ] + if sampled_policy_ages: + record_metric( + "buffer/sample/avg_sampled_policy_age", + sum(sampled_policy_ages) / len(sampled_policy_ages), + Reduce.MEAN, + ) + record_metric( + "buffer/sample/max_sampled_policy_age", + max(sampled_policy_ages), + Reduce.MAX, + ) + record_metric( + "buffer/sample/min_sampled_policy_age", + min(sampled_policy_ages), + Reduce.MIN, + ) + # Reshape into (dp_size, bsz, ...) reshaped_episodes = [ sampled_episodes[dp_idx * self.batch_size : (dp_idx + 1) * self.batch_size]