Skip to content

Commit d72727c

Browse files
pzhan9facebook-github-bot
authored andcommitted
log ActorStatus change events (#1834)
Summary: The goal is that we can get a `ActorStatus` in the `status` column by searching scuba with: > actor_id=<name> and name=ActorStatus Differential Revision: D86792902
1 parent a03cc4a commit d72727c

File tree

1 file changed

+27
-13
lines changed

1 file changed

+27
-13
lines changed

hyperactor/src/proc.rs

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use std::hash::Hasher;
2121
use std::ops::Deref;
2222
use std::panic;
2323
use std::panic::AssertUnwindSafe;
24+
use std::panic::Location;
2425
use std::pin::Pin;
2526
use std::sync::Arc;
2627
use std::sync::OnceLock;
@@ -47,13 +48,15 @@ use tokio::sync::watch;
4748
use tokio::task::JoinHandle;
4849
use tracing::Instrument;
4950
use tracing::Level;
51+
use tracing::Span;
5052
use uuid::Uuid;
5153

5254
use crate as hyperactor;
5355
use crate::Actor;
5456
use crate::ActorRef;
5557
use crate::Handler;
5658
use crate::Message;
59+
use crate::Named as _;
5760
use crate::RemoteMessage;
5861
use crate::actor::ActorError;
5962
use crate::actor::ActorErrorKind;
@@ -769,7 +772,6 @@ impl Proc {
769772
}
770773

771774
/// Create a root allocation in the proc.
772-
#[hyperactor::instrument(fields(actor_name=name))]
773775
fn allocate_root_id(&self, name: &str) -> Result<ActorId, anyhow::Error> {
774776
let name = name.to_string();
775777
match self.state().roots.entry(name.to_string()) {
@@ -1004,8 +1006,21 @@ impl<A: Actor> Instance<A> {
10041006

10051007
/// Notify subscribers of a change in the actors status and bump counters with the duration which
10061008
/// the last status was active for.
1009+
#[track_caller]
10071010
fn change_status(&self, new: ActorStatus) {
1008-
self.status_tx.send_replace(new.clone());
1011+
let old = self.status_tx.send_replace(new.clone());
1012+
// Actor status changes between Idle and Processing when handling every
1013+
// message. It creates too many logs if we want to log these 2 states.
1014+
// Therefore we only log the 1st one after Initializing.
1015+
if old == ActorStatus::Initializing
1016+
|| !matches!(new, ActorStatus::Idle | ActorStatus::Processing(..))
1017+
{
1018+
tracing::info!(
1019+
name = "ActorStatus",
1020+
status = new.arm().unwrap_or("unknown"),
1021+
caller = %Location::caller(),
1022+
);
1023+
}
10091024
}
10101025

10111026
fn is_terminal(&self) -> bool {
@@ -1079,9 +1094,14 @@ impl<A: Actor> Instance<A> {
10791094
let instance_cell = self.cell.clone();
10801095
let actor_id = self.cell.actor_id().clone();
10811096
let actor_handle = ActorHandle::new(self.cell.clone(), self.ports.clone());
1082-
let actor_task_handle = A::spawn_server_task(panic_handler::with_backtrace_tracking(
1083-
self.serve(actor, actor_loop_receivers, work_rx),
1084-
));
1097+
let actor_task_handle = A::spawn_server_task(
1098+
panic_handler::with_backtrace_tracking(self.serve(
1099+
actor,
1100+
actor_loop_receivers,
1101+
work_rx,
1102+
))
1103+
.instrument(Span::current()),
1104+
);
10851105
tracing::debug!("{}: spawned with {:?}", actor_id, actor_task_handle);
10861106
instance_cell
10871107
.inner
@@ -1258,7 +1278,6 @@ impl<A: Actor> Instance<A> {
12581278
}
12591279

12601280
/// Initialize and run the actor until it fails or is stopped.
1261-
#[tracing::instrument(level = "info", skip_all, fields(actor_id = %self.self_id()))]
12621281
async fn run(
12631282
&mut self,
12641283
actor: &mut A,
@@ -1370,6 +1389,7 @@ impl<A: Actor> Instance<A> {
13701389
}
13711390
}
13721391

1392+
#[hyperactor::instrument(fields(actor_id = self.self_id().to_string(), actor_name = self.self_id().name()))]
13731393
async unsafe fn handle_message<M: Message>(
13741394
&mut self,
13751395
actor: &mut A,
@@ -1399,18 +1419,12 @@ impl<A: Actor> Instance<A> {
13991419
&headers,
14001420
self.self_id().to_string(),
14011421
);
1402-
let span = tracing::debug_span!(
1403-
"actor_status",
1404-
actor_id = self.self_id().to_string(),
1405-
actor_name = self.self_id().name(),
1406-
name = self.cell.status().borrow().to_string(),
1407-
);
14081422

14091423
let context = Context::new(self, headers);
14101424
// Pass a reference to the context to the handler, so that deref
14111425
// coercion allows the `this` argument to be treated exactly like
14121426
// &Instance<A>.
1413-
actor.handle(&context, message).instrument(span).await
1427+
actor.handle(&context, message).await
14141428
}
14151429

14161430
// Spawn on child on this instance. Currently used only by cap::CanSpawn.

0 commit comments

Comments
 (0)