Skip to content

Commit 23cc5ce

Browse files
committed
Add a loss comparison script
ghstack-source-id: 7cac102 Pull-Request: #2029
1 parent 96ce554 commit 23cc5ce

File tree

2 files changed

+854
-0
lines changed

2 files changed

+854
-0
lines changed

.github/workflows/integration_test_8gpu_features.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,5 +76,11 @@ jobs:
7676
export TEST_WITH_ROCM=$([[ "${{ matrix.gpu-arch-type }}" == "rocm" ]] && echo 1 || echo 0)
7777
python -m tests.integration_tests.run_tests --test_suite features $RUNNER_TEMP/artifacts-to-be-uploaded --ngpu 8
7878
79+
# Verify the accuracy.
80+
export baseline_options='--parallelism.data_parallel_replicate_degree=1'
81+
export test_options='--parallelism.data_parallel_replicate_degree=4'
82+
python3 scripts/loss_compare.py . . --baseline-options=${baseline_options} --test-options=${test_options} --steps=10
83+
84+
# Cleanup the checkpoints so that we don't waste network bandwidth and time.
7985
rm -rf $RUNNER_TEMP/artifacts-to-be-uploaded/*/checkpoint
8086
rm -rf artifacts-to-be-uploaded/*/checkpoint

0 commit comments

Comments
 (0)