Skip to content

Commit a70eeb0

Browse files
thomasballingerConvex, Inc.
authored andcommitted
Debug-mode clients report slow and large transitions (#40943)
GitOrigin-RevId: b932a0408c54b712c4e81e574e3ee108a784e6c7
1 parent 2c1deb1 commit a70eeb0

File tree

4 files changed

+125
-35
lines changed

4 files changed

+125
-35
lines changed

crates/sync/src/metrics.rs

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -348,9 +348,38 @@ impl TryFrom<JsonValue> for ClientMark {
348348
}
349349
}
350350

351+
pub fn log_client_transition(partition_id: u64, transition_transit_time: f64, message_length: f64) {
352+
log_distribution_with_labels(
353+
&SYNC_TRANSITION_TRANSIT_TIME_SECONDS,
354+
transition_transit_time / 1000.0,
355+
vec![StaticMetricLabel::new(
356+
"partition_id",
357+
partition_id.to_string(),
358+
)],
359+
);
360+
log_distribution_with_labels(
361+
&SYNC_TRANSITION_MESSAGE_LENGTH_BYTES,
362+
message_length,
363+
vec![StaticMetricLabel::new(
364+
"partition_id",
365+
partition_id.to_string(),
366+
)],
367+
);
368+
}
369+
351370
#[derive(Clone, Debug)]
352371
pub enum TypedClientEvent {
353-
ClientConnect { marks: Vec<ClientMark> },
372+
ClientConnect {
373+
marks: Vec<ClientMark>,
374+
},
375+
ClientReceivedTransition {
376+
/// Time from the server sending the transition to the client fully
377+
/// receiving it (after finishing downloading it), corrected by
378+
/// an estimated clock skew observed when the client sends a
379+
/// smaller message in the other direction.
380+
transition_transit_time: f64,
381+
message_length: f64,
382+
},
354383
}
355384

356385
impl TryFrom<ClientEvent> for TypedClientEvent {
@@ -371,6 +400,14 @@ impl TryFrom<ClientEvent> for TypedClientEvent {
371400
marks: parsed_marks,
372401
})
373402
},
403+
"ClientReceivedTransition" => {
404+
let event_data: ClientReceivedTransitionEvent =
405+
serde_json::from_value(value.event)?;
406+
Ok(TypedClientEvent::ClientReceivedTransition {
407+
transition_transit_time: event_data.transition_transit_time,
408+
message_length: event_data.message_length,
409+
})
410+
},
374411
_ => Err(anyhow::anyhow!("Unknown ClientEvent type")),
375412
}
376413
}
@@ -391,6 +428,23 @@ struct ClientMarkJson {
391428
start_time: f64,
392429
}
393430

431+
#[derive(Deserialize)]
432+
#[serde(rename_all = "camelCase")]
433+
struct ClientReceivedTransitionEvent {
434+
transition_transit_time: f64,
435+
message_length: f64,
436+
}
437+
438+
register_convex_histogram!(
439+
SYNC_TRANSITION_TRANSIT_TIME_SECONDS,
440+
"Time for transition to transit from client (corrected by an estimated clock skew)",
441+
&["partition_id"]
442+
);
443+
register_convex_histogram!(
444+
SYNC_TRANSITION_MESSAGE_LENGTH_BYTES,
445+
"Length of transition message from client",
446+
&["partition_id"]
447+
);
394448
register_convex_histogram!(
395449
SYNC_QUERY_INVALIDATION_LAG_SECONDS,
396450
"Time between an invalidating write and a query being rerun",

crates/sync/src/worker.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -741,6 +741,14 @@ impl<RT: Runtime> SyncWorker<RT> {
741741
TypedClientEvent::ClientConnect { marks } => {
742742
metrics::log_client_connect_timings(self.partition_id, marks)
743743
},
744+
TypedClientEvent::ClientReceivedTransition {
745+
transition_transit_time,
746+
message_length,
747+
} => metrics::log_client_transition(
748+
self.partition_id,
749+
transition_transit_time,
750+
message_length,
751+
),
744752
},
745753
Err(_) => (),
746754
}

npm-packages/convex/src/browser/sync/client.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,7 @@ export class BaseConvexClient {
497497
webSocketConstructor,
498498
this.logger,
499499
this.markConnectionStateDirty,
500+
this.debug,
500501
);
501502
this.mark("convexClientConstructed");
502503

npm-packages/convex/src/browser/sync/web_socket_manager.ts

Lines changed: 61 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import {
44
encodeClientMessage,
55
parseServerMessage,
66
ServerMessage,
7+
Transition,
78
} from "./protocol.js";
89

910
const CLOSE_NORMAL = 1000;
@@ -199,6 +200,7 @@ export class WebSocketManager {
199200
webSocketConstructor: typeof WebSocket,
200201
logger: Logger,
201202
private readonly markConnectionStateDirty: () => void,
203+
private readonly debug: boolean,
202204
) {
203205
this.webSocketConstructor = webSocketConstructor;
204206
this.socket = { state: "disconnected" };
@@ -312,40 +314,11 @@ export class WebSocketManager {
312314
const messageLength = message.data.length;
313315
const serverMessage = parseServerMessage(JSON.parse(message.data));
314316
this._logVerbose(`received ws message with type ${serverMessage.type}`);
315-
if (
316-
serverMessage.type === "Transition" &&
317-
serverMessage.clientClockSkew !== undefined &&
318-
serverMessage.serverTs !== undefined
319-
) {
320-
const transitionTransitTime =
321-
monotonicMillis() - // client time now
322-
// clientClockSkew = (server time + upstream latency) - client time
323-
// clientClockSkew is "how many milliseconds behind (slow) is the client clock"
324-
// but the latency of the Connect message inflates this, making it appear further behind
325-
serverMessage.clientClockSkew -
326-
serverMessage.serverTs / 1_000_000; // server time when transition was sent
327-
const prettyTransitionTime = `${Math.round(transitionTransitTime)}ms`;
328-
const prettyMessageMB = `${Math.round(messageLength / 10_000) / 100}MB`;
329-
const bytesPerSecond = messageLength / (transitionTransitTime / 1000);
330-
const prettyBytesPerSecond = `${Math.round(bytesPerSecond / 10_000) / 100}MB per second`;
331-
this._logVerbose(
332-
`received ${prettyMessageMB} transition in ${prettyTransitionTime} at ${prettyBytesPerSecond}`,
333-
);
334-
335-
// Warnings that will show up for *all users*, so these are not very aggressive goals.
336-
if (transitionTransitTime > 10_000 && messageLength > 10_000_000) {
337-
this.logger.log(
338-
`received query results totalling more than 10MB (${prettyMessageMB}) which took more than 10s (${prettyTransitionTime}) to arrive`,
339-
);
340-
} else if (messageLength > 20_000_000) {
341-
this.logger.log(
342-
`received query results totalling more that 20MB (${prettyMessageMB}) which will take a long time to download on slower connections`,
343-
);
344-
} else if (transitionTransitTime > 20_000) {
345-
this.logger.log(
346-
`received query results totalling ${prettyMessageMB} which took more than 20s to arrive (${prettyTransitionTime})`,
347-
);
348-
}
317+
if (serverMessage.type === "Transition") {
318+
this.reportLargeTransition({
319+
messageLength,
320+
transition: serverMessage,
321+
});
349322
}
350323
const response = this.onMessage(serverMessage);
351324
if (response.hasSyncedPastLastReconnect) {
@@ -686,4 +659,58 @@ export class WebSocketManager {
686659
const jitter = actualBackoff * (Math.random() - 0.5);
687660
return actualBackoff + jitter;
688661
}
662+
663+
private reportLargeTransition({
664+
transition,
665+
messageLength,
666+
}: {
667+
transition: Transition;
668+
messageLength: number;
669+
}) {
670+
if (
671+
transition.clientClockSkew === undefined ||
672+
transition.serverTs === undefined
673+
) {
674+
return;
675+
}
676+
677+
const transitionTransitTime =
678+
monotonicMillis() - // client time now
679+
// clientClockSkew = (server time + upstream latency) - client time
680+
// clientClockSkew is "how many milliseconds behind (slow) is the client clock"
681+
// but the latency of the Connect message inflates this, making it appear further behind
682+
transition.clientClockSkew -
683+
transition.serverTs / 1_000_000; // server time when transition was sent
684+
const prettyTransitionTime = `${Math.round(transitionTransitTime)}ms`;
685+
const prettyMessageMB = `${Math.round(messageLength / 10_000) / 100}MB`;
686+
const bytesPerSecond = messageLength / (transitionTransitTime / 1000);
687+
const prettyBytesPerSecond = `${Math.round(bytesPerSecond / 10_000) / 100}MB per second`;
688+
this._logVerbose(
689+
`received ${prettyMessageMB} transition in ${prettyTransitionTime} at ${prettyBytesPerSecond}`,
690+
);
691+
692+
// Warnings that will show up for *all users*, so these are not very aggressive goals.
693+
if (transitionTransitTime > 10_000 && messageLength > 10_000_000) {
694+
this.logger.log(
695+
`received query results totalling more than 10MB (${prettyMessageMB}) which took more than 10s (${prettyTransitionTime}) to arrive`,
696+
);
697+
} else if (messageLength > 20_000_000) {
698+
this.logger.log(
699+
`received query results totalling more that 20MB (${prettyMessageMB}) which will take a long time to download on slower connections`,
700+
);
701+
} else if (transitionTransitTime > 20_000) {
702+
this.logger.log(
703+
`received query results totalling ${prettyMessageMB} which took more than 20s to arrive (${prettyTransitionTime})`,
704+
);
705+
}
706+
if (this.debug) {
707+
if (transitionTransitTime > 10_000 || messageLength > 10_000_000) {
708+
this.sendMessage({
709+
type: "Event",
710+
eventType: "ClientReceivedTransition",
711+
event: { transitionTransitTime, messageLength },
712+
});
713+
}
714+
}
715+
}
689716
}

0 commit comments

Comments
 (0)