Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions hyperactor_mesh/src/bootstrap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ declare_attrs! {
env_name: Some("HYPERACTOR_MESH_ENABLE_LOG_FORWARDING".to_string()),
py_name: None,
})
pub attr MESH_ENABLE_LOG_FORWARDING: bool = true;
pub attr MESH_ENABLE_LOG_FORWARDING: bool = false;

/// When `true`: if stdio is piped, each child's `StreamFwder`
/// also forwards lines to a host-scoped `FileAppender` managed by
Expand All @@ -124,7 +124,7 @@ declare_attrs! {
env_name: Some("HYPERACTOR_MESH_ENABLE_FILE_CAPTURE".to_string()),
py_name: None,
})
pub attr MESH_ENABLE_FILE_CAPTURE: bool = true;
pub attr MESH_ENABLE_FILE_CAPTURE: bool = false;

/// Maximum number of log lines retained in a proc's stderr/stdout
/// tail buffer. Used by [`StreamFwder`] when wiring child
Expand All @@ -133,7 +133,7 @@ declare_attrs! {
env_name: Some("HYPERACTOR_MESH_TAIL_LOG_LINES".to_string()),
py_name: None,
})
pub attr MESH_TAIL_LOG_LINES: usize = 100;
pub attr MESH_TAIL_LOG_LINES: usize = 0;

/// If enabled (default), bootstrap child processes install
/// `PR_SET_PDEATHSIG(SIGKILL)` so the kernel reaps them if the
Expand Down Expand Up @@ -3692,6 +3692,10 @@ mod tests {
#[tokio::test]
async fn exit_tail_is_attached_and_logged() {
hyperactor_telemetry::initialize_logging_for_test();

let lock = hyperactor::config::global::lock();
let _guard = lock.override_key(MESH_TAIL_LOG_LINES, 100);

// Spawn a child that writes to stderr then exits 7.
let mut cmd = Command::new("sh");
cmd.arg("-c")
Expand Down
4 changes: 4 additions & 0 deletions hyperactor_mesh/src/v1/host_mesh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1125,6 +1125,7 @@ mod tests {

use super::*;
use crate::Bootstrap;
use crate::bootstrap::MESH_TAIL_LOG_LINES;
use crate::resource::Status;
use crate::v1::ActorMesh;
use crate::v1::testactor;
Expand Down Expand Up @@ -1321,6 +1322,9 @@ mod tests {
#[tokio::test]
#[cfg(fbcode_build)]
async fn test_failing_proc_allocation() {
let lock = hyperactor::config::global::lock();
let _guard = lock.override_key(MESH_TAIL_LOG_LINES, 100);

let program = crate::testresource::get("monarch/hyperactor_mesh/bootstrap");

let hosts = vec![free_localhost_addr(), free_localhost_addr()];
Expand Down
10 changes: 7 additions & 3 deletions monarch_hyperactor/src/v1/logging.rs
Original file line number Diff line number Diff line change
Expand Up @@ -310,9 +310,9 @@ impl LoggingMeshClient {
// re-spawning infra, which we deliberately don't do at
// runtime.
(None, true) => {
return Err(PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(
"log forwarding disabled by config at startup; cannot enable streaming_to_client",
));
// return Err(PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(
// "log forwarding disabled by config at startup; cannot enable streaming_to_client",
// ));
}
}

Expand Down Expand Up @@ -592,6 +592,9 @@ mod tests {
);
}

/*
// Update (SF: 2025, 11, 13): We now ignore stream to client requests if
// log forwarding is enabled.
// (c) stream_to_client = true when forwarding was
// never spawned -> Err
let res = client_ref.set_mode(&py_instance, true, None, 10);
Expand All @@ -606,6 +609,7 @@ mod tests {
"unexpected err when enabling streaming with no forwarders: {msg}"
);
}
*/
});

drop(client_py); // See note "NOTE ON LIFECYCLE / CLEANUP"
Expand Down
4 changes: 2 additions & 2 deletions python/monarch/_src/actor/v1/proc_mesh.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,8 +365,8 @@ def rank_tensors(self) -> Dict[str, "Tensor"]:

async def logging_option(
self,
stream_to_client: bool = True,
aggregate_window_sec: int | None = 3,
stream_to_client: bool = False,
aggregate_window_sec: int | None = None,
level: int = logging.INFO,
) -> None:
"""
Expand Down
14 changes: 12 additions & 2 deletions python/tests/test_allocator.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,9 +254,19 @@ async def test_allocate_failure_message(self) -> None:
r"exited with code 1: Traceback \(most recent call last\).*",
):
with remote_process_allocator(
envs={"MONARCH_ERROR_DURING_BOOTSTRAP_FOR_TESTING": "1"}
envs={
"MONARCH_ERROR_DURING_BOOTSTRAP_FOR_TESTING": "1",
"HYPERACTOR_MESH_ENABLE_LOG_FORWARDING": "true",
"HYPERACTOR_MESH_ENABLE_FILE_CAPTURE": "true",
"HYPERACTOR_MESH_TAIL_LOG_LINES": "100",
}
) as host1, remote_process_allocator(
envs={"MONARCH_ERROR_DURING_BOOTSTRAP_FOR_TESTING": "1"}
envs={
"MONARCH_ERROR_DURING_BOOTSTRAP_FOR_TESTING": "1",
"HYPERACTOR_MESH_ENABLE_LOG_FORWARDING": "true",
"HYPERACTOR_MESH_ENABLE_FILE_CAPTURE": "true",
"HYPERACTOR_MESH_TAIL_LOG_LINES": "100",
}
) as host2:
allocator = RemoteAllocator(
world_id="test_remote_allocator",
Expand Down
Loading