Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .buildkite/benchmarks/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ agents:
modules: climacommon/2025_05_15

env:
JULIA_NVTX_CALLBACKS: gc
OPENBLAS_NUM_THREADS: 1
OMPI_MCA_opal_warn_on_missing_libcuda: 0
SLURM_KILL_BAD_EXIT: 1
Expand Down
1 change: 0 additions & 1 deletion .buildkite/hierarchies/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ agents:
env:
JULIA_LOAD_PATH: "${JULIA_LOAD_PATH}:${BUILDKITE_BUILD_CHECKOUT_PATH}/.buildkite"
OPENBLAS_NUM_THREADS: 1
JULIA_NVTX_CALLBACKS: gc
OMPI_MCA_opal_warn_on_missing_libcuda: 0
JULIA_MAX_NUM_PRECOMPILE_FILES: 100
GKSwstype: 100
Expand Down
1 change: 0 additions & 1 deletion .buildkite/longruns/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ agents:
env:
JULIA_LOAD_PATH: "${JULIA_LOAD_PATH}:${BUILDKITE_BUILD_CHECKOUT_PATH}/.buildkite"
OPENBLAS_NUM_THREADS: 1
JULIA_NVTX_CALLBACKS: gc
OMPI_MCA_opal_warn_on_missing_libcuda: 0
JULIA_MAX_NUM_PRECOMPILE_FILES: 100
GKSwstype: 100
Expand Down
1 change: 0 additions & 1 deletion .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ env:
JULIA_LOAD_PATH: "${JULIA_LOAD_PATH}:${BUILDKITE_BUILD_CHECKOUT_PATH}/.buildkite"
JULIA_DEPOT_PATH: "${BUILDKITE_BUILD_PATH}/${BUILDKITE_PIPELINE_SLUG}/depot/cpu"
OPENBLAS_NUM_THREADS: 1
JULIA_NVTX_CALLBACKS: gc
OMPI_MCA_opal_warn_on_missing_libcuda: 0
JULIA_MAX_NUM_PRECOMPILE_FILES: 100
GKSwstype: 100
Expand Down
4 changes: 4 additions & 0 deletions experiments/ClimaEarth/cli_options.jl
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,10 @@ function argparse_settings()
help = "An optional YAML file used to overwrite the default model parameters."
arg_type = String
default = nothing
"--atmos_log_progress"
help = "Use the ClimaAtmos walltime logging callback instead of the default ClimaCoupler one [`false` (default), `true`]"
arg_type = Bool
default = false
"--albedo_model"
help = "Type of albedo model. [`ConstantAlbedo`, `RegressionFunctionAlbedo`, `CouplerAlbedo` (default)]"
arg_type = String
Expand Down
1 change: 1 addition & 0 deletions experiments/ClimaEarth/components/atmosphere/climaatmos.jl
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,7 @@ function get_atmos_config_dict(
# can pick up from where we have left. NOTE: This should not be needed, but
# there is no easy way to initialize ClimaAtmos with a different t_start
atmos_config["dt_save_state_to_disk"] = coupler_config["checkpoint_dt"]
atmos_config["log_progress"] = coupler_config["atmos_log_progress"]

# The Atmos `get_simulation` function expects the atmos config to contains its timestep size
# in the `dt` field. If there is a `dt_atmos` field in coupler_config, we add it to the atmos config as `dt`
Expand Down
8 changes: 7 additions & 1 deletion experiments/ClimaEarth/setup_run.jl
Original file line number Diff line number Diff line change
Expand Up @@ -495,7 +495,13 @@ function CoupledSimulation(config_dict::AbstractDict)
EveryCalendarDtSchedule(TimeManager.time_to_period(checkpoint_dt); start_date)
checkpoint_cb = TimeManager.Callback(schedule_checkpoint, Checkpointer.checkpoint_sims)

callbacks = (checkpoint_cb,)
# Don't use coupler walltime logging if atmos is using its own walltime logging is true
if config_dict["atmos_log_progress"]
callbacks = (checkpoint_cb,)
else
walltime_cb = TimeManager.capped_geometric_walltime_cb(t_start, t_end, Δt_cpl)
callbacks = (checkpoint_cb, walltime_cb)
end

#= Set up default AMIP diagnostics
Use ClimaDiagnostics for default AMIP diagnostics, which currently include turbulent energy fluxes.
Expand Down
25 changes: 25 additions & 0 deletions src/TimeManager.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ module TimeManager
import Dates
import ..Interfacer
import ..Utilities: time_to_seconds
import ClimaUtilities.OnlineLogging: WallTimeInfo, report_walltime

"""
time_to_period(s::String)
Expand Down Expand Up @@ -100,4 +101,28 @@ function (::NeverSchedule)(args...)
return false
end

"""
capped_geometric_walltime_cb(t_start, t_end, Δt_cpl)

Create a callback that reports walltime at when the number of steps taken is a power of 2, or
when the percent of the simulation that is completed is a multiple of 5. This skips the
first two steps to avoid compilation time noise.
"""
function capped_geometric_walltime_cb(t_start, t_end, Δt_cpl)
tot_steps = Int(ceil(float(t_end - t_start) / float(Δt_cpl)))
five_percent_steps = ceil(Int, 0.05 * tot_steps)
steps_taken = (integrator) -> float(integrator.t - t_start) / float(Δt_cpl)
walltime_report_cond =
(integrator) -> begin
nsteps = steps_taken(integrator)
# skip first two steps for compilation
(nsteps <= 2) && return false
return nsteps % five_percent_steps == 0 || ispow2(nsteps)
end
walltime_affect! = let wt = WallTimeInfo()
(coupled_sim) -> report_walltime(wt, coupled_sim.model_sims.atmos_sim.integrator)
end
return TimeManager.Callback(walltime_report_cond, walltime_affect!)
end

end
28 changes: 28 additions & 0 deletions test/utilities_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import Test: @testset, @test
import ClimaComms
ClimaComms.@import_required_backends
import ClimaCoupler: Utilities
import ClimaCoupler: TimeManager
import ClimaCore as CC

# Initialize MPI context, in case
Expand Down Expand Up @@ -71,4 +72,31 @@ for FT in (Float32, Float64)
)
@test Utilities.integral(ones(space3d)) == sum(ones(space3d))
end

@testset "WallTime Callback" begin
t_start = 0.0
t_end = 10.0
Δt_cpl = 0.1

cb = TimeManager.capped_geometric_walltime_cb(t_start, t_end, Δt_cpl)

# First two steps should not trigger
fake_integrator = (; t = t_start + Δt_cpl)
@test !cb.schedule(fake_integrator)
fake_integrator = (; t = t_start + Δt_cpl * 2)
@test !cb.schedule(fake_integrator)
# step 4, 8, 16 should trigger
fake_integrator = (; t = t_start + Δt_cpl * 4)
@test cb.schedule(fake_integrator)
fake_integrator = (; t = t_start + Δt_cpl * 8)
@test cb.schedule(fake_integrator)
fake_integrator = (; t = t_start + Δt_cpl * 14)
@test !cb.schedule(fake_integrator)
fake_integrator = (; t = t_start + Δt_cpl * 16)
@test cb.schedule(fake_integrator)

# 20% should trigger
fake_integrator = (; t = t_start + Δt_cpl * 20)
@test cb.schedule(fake_integrator)
end
end
Loading