77# This script can be used to launch a torchtitan float8 training run
88# with the given parameters,
99
10- # script arguments
11- LOCAL_BATCH_SIZE=${LOCAL_BATCH_SIZE:- 1}
12- STEPS=${STEPS:- 100}
13-
1410# temporary log file which is deleted after performance data is parsed out and metrics are calculated.
15- LOG_FILE=" /tmp/float8_training_log .txt"
11+ LOG_FILE=" /tmp/torchtitan_logs .txt"
1612
17- # validate user has specified torchtitan root directory
13+ # validate user has specified required args
1814if [ -z " ${TORCHTITAN_ROOT} " ]; then
19- echo " Error: TORCHTITAN environment variable is not set. Please set it before running this script."
20- echo " Usage: TORCHTITAN_ROOT=<directory> ./torchtitan_llama4.sh"
15+ echo " Error: TORCHTITAN_ROOT environment variable is not set. Please set it before running this script."
16+ echo " Usage: TORCHTITAN_ROOT=<directory> CONFIG_FILE=<model toml> ./moe.sh"
17+ echo " * EXTRA_ARGS: additional arguments to pass to the torchtitan training script."
18+ exit 1
19+ fi
20+
21+ if [ -z " ${CONFIG_FILE} " ]; then
22+ echo " Error: CONFIG_FILE environment variable is not set. Please set it before running this script."
23+ echo " Usage: TORCHTITAN_ROOT=<directory> CONFIG_FILE=<model toml> ./moe.sh"
2124 echo " * EXTRA_ARGS: additional arguments to pass to the torchtitan training script."
2225 exit 1
2326fi
@@ -29,7 +32,7 @@ original_dir=$(pwd)
2932cd ${TORCHTITAN_ROOT}
3033
3134# run the command with the specified arguments
32- CONFIG_FILE= " ./torchtitan/experiments/llama4/train_configs/debug_model.toml " ${TORCHTITAN_ROOT} /run_train.sh ${EXTRA_ARGS} 2>&1 | tee ${LOG_FILE}
35+ ${TORCHTITAN_ROOT} /run_train.sh ${EXTRA_ARGS} 2>&1 | tee ${LOG_FILE}
3336
3437# return to original working directory
3538cd $original_dir
0 commit comments