Skip to content

Commit 8a42ca7

Browse files
: logging: default disable log forwarding (#1878)
Summary: this diff sets the following defaults for hyperactor-mesh global configuration values: - `MESH_ENABLE_LOG_FORWARDING=false` - `MESH_ENABLE_FILE_CAPTURE=false` - `MESH_TAIL_LOG_LINES=0` the effect of this is to disable log forwarding, prevent allocating resources for log forwarding, no file capture at the hyperactor mesh level (including no "exit tail" capture), in fact, under these defaults there is no interception of child process stdio at all. a [workplace post is planned to announce this change](https://fb.workplace.com/groups/1399849971389924/permalink/1602002844507968/) in default configuration. this diff is built on: D85783397 provide config for enabling/disabling hyperactor-mesh logging interception features, D85919326 for avoiding spinning up `LogForwardActor` meshes when log forwarding is enabled and D85969320 for some detailed testing. Reviewed By: zdevito, vidhyav Differential Revision: D86994420
1 parent adcdb8e commit 8a42ca7

File tree

6 files changed

+181
-15
lines changed

6 files changed

+181
-15
lines changed

hyperactor_mesh/src/bootstrap.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ declare_attrs! {
9999
env_name: Some("HYPERACTOR_MESH_ENABLE_LOG_FORWARDING".to_string()),
100100
py_name: None,
101101
})
102-
pub attr MESH_ENABLE_LOG_FORWARDING: bool = true;
102+
pub attr MESH_ENABLE_LOG_FORWARDING: bool = false;
103103

104104
/// When `true`: if stdio is piped, each child's `StreamFwder`
105105
/// also forwards lines to a host-scoped `FileAppender` managed by
@@ -124,7 +124,7 @@ declare_attrs! {
124124
env_name: Some("HYPERACTOR_MESH_ENABLE_FILE_CAPTURE".to_string()),
125125
py_name: None,
126126
})
127-
pub attr MESH_ENABLE_FILE_CAPTURE: bool = true;
127+
pub attr MESH_ENABLE_FILE_CAPTURE: bool = false;
128128

129129
/// Maximum number of log lines retained in a proc's stderr/stdout
130130
/// tail buffer. Used by [`StreamFwder`] when wiring child
@@ -133,7 +133,7 @@ declare_attrs! {
133133
env_name: Some("HYPERACTOR_MESH_TAIL_LOG_LINES".to_string()),
134134
py_name: None,
135135
})
136-
pub attr MESH_TAIL_LOG_LINES: usize = 100;
136+
pub attr MESH_TAIL_LOG_LINES: usize = 0;
137137

138138
/// If enabled (default), bootstrap child processes install
139139
/// `PR_SET_PDEATHSIG(SIGKILL)` so the kernel reaps them if the
@@ -3692,6 +3692,10 @@ mod tests {
36923692
#[tokio::test]
36933693
async fn exit_tail_is_attached_and_logged() {
36943694
hyperactor_telemetry::initialize_logging_for_test();
3695+
3696+
let lock = hyperactor::config::global::lock();
3697+
let _guard = lock.override_key(MESH_TAIL_LOG_LINES, 100);
3698+
36953699
// Spawn a child that writes to stderr then exits 7.
36963700
let mut cmd = Command::new("sh");
36973701
cmd.arg("-c")

hyperactor_mesh/src/v1/host_mesh.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1125,6 +1125,7 @@ mod tests {
11251125

11261126
use super::*;
11271127
use crate::Bootstrap;
1128+
use crate::bootstrap::MESH_TAIL_LOG_LINES;
11281129
use crate::resource::Status;
11291130
use crate::v1::ActorMesh;
11301131
use crate::v1::testactor;
@@ -1321,6 +1322,9 @@ mod tests {
13211322
#[tokio::test]
13221323
#[cfg(fbcode_build)]
13231324
async fn test_failing_proc_allocation() {
1325+
let lock = hyperactor::config::global::lock();
1326+
let _guard = lock.override_key(MESH_TAIL_LOG_LINES, 100);
1327+
13241328
let program = crate::testresource::get("monarch/hyperactor_mesh/bootstrap");
13251329

13261330
let hosts = vec![free_localhost_addr(), free_localhost_addr()];

monarch_hyperactor/src/v1/logging.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -310,9 +310,9 @@ impl LoggingMeshClient {
310310
// re-spawning infra, which we deliberately don't do at
311311
// runtime.
312312
(None, true) => {
313-
return Err(PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(
314-
"log forwarding disabled by config at startup; cannot enable streaming_to_client",
315-
));
313+
// return Err(PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(
314+
// "log forwarding disabled by config at startup; cannot enable streaming_to_client",
315+
// ));
316316
}
317317
}
318318

@@ -592,6 +592,9 @@ mod tests {
592592
);
593593
}
594594

595+
/*
596+
// Update (SF: 2025, 11, 13): We now ignore stream to client requests if
597+
// log forwarding is enabled.
595598
// (c) stream_to_client = true when forwarding was
596599
// never spawned -> Err
597600
let res = client_ref.set_mode(&py_instance, true, None, 10);
@@ -606,6 +609,7 @@ mod tests {
606609
"unexpected err when enabling streaming with no forwarders: {msg}"
607610
);
608611
}
612+
*/
609613
});
610614

611615
drop(client_py); // See note "NOTE ON LIFECYCLE / CLEANUP"

python/monarch/_src/actor/v1/proc_mesh.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -365,8 +365,8 @@ def rank_tensors(self) -> Dict[str, "Tensor"]:
365365

366366
async def logging_option(
367367
self,
368-
stream_to_client: bool = True,
369-
aggregate_window_sec: int | None = 3,
368+
stream_to_client: bool = False,
369+
aggregate_window_sec: int | None = None,
370370
level: int = logging.INFO,
371371
) -> None:
372372
"""

python/tests/test_allocator.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -254,9 +254,19 @@ async def test_allocate_failure_message(self) -> None:
254254
r"exited with code 1: Traceback \(most recent call last\).*",
255255
):
256256
with remote_process_allocator(
257-
envs={"MONARCH_ERROR_DURING_BOOTSTRAP_FOR_TESTING": "1"}
257+
envs={
258+
"MONARCH_ERROR_DURING_BOOTSTRAP_FOR_TESTING": "1",
259+
"HYPERACTOR_MESH_ENABLE_LOG_FORWARDING": "true",
260+
"HYPERACTOR_MESH_ENABLE_FILE_CAPTURE": "true",
261+
"HYPERACTOR_MESH_TAIL_LOG_LINES": "100",
262+
}
258263
) as host1, remote_process_allocator(
259-
envs={"MONARCH_ERROR_DURING_BOOTSTRAP_FOR_TESTING": "1"}
264+
envs={
265+
"MONARCH_ERROR_DURING_BOOTSTRAP_FOR_TESTING": "1",
266+
"HYPERACTOR_MESH_ENABLE_LOG_FORWARDING": "true",
267+
"HYPERACTOR_MESH_ENABLE_FILE_CAPTURE": "true",
268+
"HYPERACTOR_MESH_TAIL_LOG_LINES": "100",
269+
}
260270
) as host2:
261271
allocator = RemoteAllocator(
262272
world_id="test_remote_allocator",

0 commit comments

Comments
 (0)