From 362f0443a0cdee174a8c9673c93f27aa1f540f8e Mon Sep 17 00:00:00 2001 From: Budalebah Date: Thu, 6 Nov 2025 18:32:52 +0300 Subject: [PATCH] fix(zero-style): correct timestep handling in episode continuation - Fix off-by-one error in episode data API (timestep N should read line N+1) - Fix initial timestep in zero-style simulator (start from timestep+1 to avoid collision) - Remove TODO comment about incorrect inference_human timestep Fixes issue where human inference timestep was colliding with the last timestep in the loaded attribution log, causing incorrect training signals. --- web-ui/src/ZeroStyleApp.tsx | 2 +- web-ui/src/app/api/episode-data/route.ts | 10 +++++++--- .../simulators/StateServiceHumanSimulator.ts | 6 ++++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/web-ui/src/ZeroStyleApp.tsx b/web-ui/src/ZeroStyleApp.tsx index eb2532f..55d3993 100644 --- a/web-ui/src/ZeroStyleApp.tsx +++ b/web-ui/src/ZeroStyleApp.tsx @@ -96,7 +96,7 @@ function getZeroStyleParams(): { assistantNoiseTopK, }; } -// TODO: inference_human timestep is currently incorrect + function ZeroStyleApp() { const [problem, setProblem] = useState(null); const [initCode, setInitCode] = useState(""); diff --git a/web-ui/src/app/api/episode-data/route.ts b/web-ui/src/app/api/episode-data/route.ts index 0ddf8ec..7c075e2 100644 --- a/web-ui/src/app/api/episode-data/route.ts +++ b/web-ui/src/app/api/episode-data/route.ts @@ -52,14 +52,18 @@ export async function GET(request: NextRequest) { } // Get the specific timestep line - if (timestepNum > lines.length) { + // File structure: Line 0 = Header, Line 1 = timestep 0, Line 2 = timestep 1, etc. + // So timestep N is at line index N+1 + const lineIndex = timestepNum + 1; + + if (lineIndex >= lines.length) { return NextResponse.json( - { error: `Timestep ${timestepNum} exceeds available lines (${lines.length})` }, + { error: `Timestep ${timestepNum} exceeds available data (max timestep: ${lines.length - 2})` }, { status: 400 } ); } - const timestepLine = lines[timestepNum]; // Timestep[i] is on line i+1 + const timestepLine = lines[lineIndex]; const timestepData = JSON.parse(timestepLine); const text = timestepData.text || ''; diff --git a/web-ui/src/simulation/simulators/StateServiceHumanSimulator.ts b/web-ui/src/simulation/simulators/StateServiceHumanSimulator.ts index 72d9a5f..71dc24d 100644 --- a/web-ui/src/simulation/simulators/StateServiceHumanSimulator.ts +++ b/web-ui/src/simulation/simulators/StateServiceHumanSimulator.ts @@ -119,8 +119,10 @@ export class StateServiceHumanSimulator implements HumanSimulator { setInitialTimestep(timestep: number): void { console.log("🎯 Setting initial timestep to:", timestep); - this.timestep = timestep; - console.log("🎯 Timestep after setting:", this.timestep); + // The provided timestep represents the last completed state in the attribution log. + // The next inference should start from timestep + 1. + this.timestep = timestep + 1; + console.log("🎯 Next inference will start at timestep:", this.timestep); } getStats(): SimulationStats {