Skip to content

Commit a523c07

Browse files
committed
Add a loss comparison script
ghstack-source-id: 9a16e69 Pull-Request: #2029
1 parent fa99300 commit a523c07

File tree

2 files changed

+790
-0
lines changed

2 files changed

+790
-0
lines changed

.github/workflows/integration_test_8gpu_features.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,5 +76,10 @@ jobs:
7676
export TEST_WITH_ROCM=$([[ "${{ matrix.gpu-arch-type }}" == "rocm" ]] && echo 1 || echo 0)
7777
python -m tests.integration_tests.run_tests --test_suite features $RUNNER_TEMP/artifacts-to-be-uploaded --ngpu 8
7878
79+
# Verify the accuracy.
80+
export baseline_cmd='CONFIG_FILE="./torchtitan/models/llama3/train_configs/debug_model.toml" ./run_train.sh'
81+
export baseline_cmd='CONFIG_FILE="./torchtitan/models/llama3/train_configs/debug_model.toml" ./run_train.sh --parallelism.data_parallel_replicate_degree=2'
82+
python3 scripts/loss_compare.py . . --baseline-cmd=${baseline_cmd} --test-cmd=${test_cmd} --no-seed-checkpoint --steps=10
83+
7984
rm -rf $RUNNER_TEMP/artifacts-to-be-uploaded/*/checkpoint
8085
rm -rf artifacts-to-be-uploaded/*/checkpoint

0 commit comments

Comments
 (0)