Skip to content

Commit 90dd5bb

Browse files
authored
Refactor get_validator_blocks_v3 fallback (sigp#8186)
sigp#7727 introduced a bug in the logging, where as long as the node failed the SSZ `get_validator_blocks_v3` endpoint, it would log as `Beacon node does not support...`. However, the failure can be due to other reasons, such as a timed out error as found by @jimmygchen: `WARN Beacon node does not support SSZ in block production, falling back to JSON slot: 5283379, error: HttpClient(url: https://ho-h-bn-cowl.spesi.io:15052/, kind: timeout, detail: operation timed out` This PR made the error log more generic, so there is less confusion. Additionally, suggested by @michaelsproul, this PR refactors the `get_validator_blocks_v3` calls by trying all beacon nodes using the SSZ endpoint first, and if all beacon node fails the SSZ endpoint, only then fallback to JSON. It changes the logic from: "SSZ -> JSON for primary beacon node, followed by SSZ -> JSON for second beacon node and so on" to "SSZ for all beacon nodes -> JSON for all beacon nodes" This has the advantage that if the primary beacon node is having issues and failed the SSZ, we avoid retrying the primary beacon node again on JSON (as it could be that the primary beacon node fail again); rather, we switch to the second beacon node. Co-Authored-By: Tan Chee Keong <tanck@sigmaprime.io> Co-Authored-By: chonghe <44791194+chong-he@users.noreply.github.com>
1 parent 64031b6 commit 90dd5bb

File tree

1 file changed

+68
-87
lines changed

1 file changed

+68
-87
lines changed

validator_client/validator_services/src/block_service.rs

Lines changed: 68 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
use beacon_node_fallback::{ApiTopic, BeaconNodeFallback, Error as FallbackError, Errors};
2-
use bls::SignatureBytes;
32
use eth2::{BeaconNodeHttpClient, StatusCode};
43
use graffiti_file::{GraffitiFile, determine_graffiti};
54
use logging::crit;
@@ -298,7 +297,7 @@ impl<S: ValidatorStore + 'static, T: SlotClock + 'static> BlockService<S, T> {
298297
self.inner.executor.spawn(
299298
async move {
300299
let result = service
301-
.publish_block(slot, validator_pubkey, builder_boost_factor)
300+
.get_validator_block_and_publish_block(slot, validator_pubkey, builder_boost_factor)
302301
.await;
303302

304303
match result {
@@ -396,7 +395,7 @@ impl<S: ValidatorStore + 'static, T: SlotClock + 'static> BlockService<S, T> {
396395
skip_all,
397396
fields(%slot, ?validator_pubkey)
398397
)]
399-
async fn publish_block(
398+
async fn get_validator_block_and_publish_block(
400399
self,
401400
slot: Slot,
402401
validator_pubkey: PublicKeyBytes,
@@ -449,33 +448,80 @@ impl<S: ValidatorStore + 'static, T: SlotClock + 'static> BlockService<S, T> {
449448

450449
info!(slot = slot.as_u64(), "Requesting unsigned block");
451450

452-
// Request block from first responsive beacon node.
451+
// Request an SSZ block from all beacon nodes in order, returning on the first successful response.
452+
// If all nodes fail, run a second pass falling back to JSON.
453453
//
454-
// Try the proposer nodes last, since it's likely that they don't have a
454+
// Proposer nodes will always be tried last during each pass since it's likely that they don't have a
455455
// great view of attestations on the network.
456-
let unsigned_block = proposer_fallback
456+
let ssz_block_response = proposer_fallback
457457
.request_proposers_last(|beacon_node| async move {
458458
let _get_timer = validator_metrics::start_timer_vec(
459459
&validator_metrics::BLOCK_SERVICE_TIMES,
460460
&[validator_metrics::BEACON_BLOCK_HTTP_GET],
461461
);
462-
Self::get_validator_block(
463-
&beacon_node,
464-
slot,
465-
randao_reveal_ref,
466-
graffiti,
467-
proposer_index,
468-
builder_boost_factor,
469-
)
470-
.await
471-
.map_err(|e| {
472-
BlockError::Recoverable(format!(
473-
"Error from beacon node when producing block: {:?}",
474-
e
475-
))
476-
})
462+
beacon_node
463+
.get_validator_blocks_v3_ssz::<S::E>(
464+
slot,
465+
randao_reveal_ref,
466+
graffiti.as_ref(),
467+
builder_boost_factor,
468+
)
469+
.await
477470
})
478-
.await?;
471+
.await;
472+
473+
let block_response = match ssz_block_response {
474+
Ok((ssz_block_response, _metadata)) => ssz_block_response,
475+
Err(e) => {
476+
warn!(
477+
slot = slot.as_u64(),
478+
error = %e,
479+
"SSZ block production failed, falling back to JSON"
480+
);
481+
482+
proposer_fallback
483+
.request_proposers_last(|beacon_node| async move {
484+
let _get_timer = validator_metrics::start_timer_vec(
485+
&validator_metrics::BLOCK_SERVICE_TIMES,
486+
&[validator_metrics::BEACON_BLOCK_HTTP_GET],
487+
);
488+
let (json_block_response, _metadata) = beacon_node
489+
.get_validator_blocks_v3::<S::E>(
490+
slot,
491+
randao_reveal_ref,
492+
graffiti.as_ref(),
493+
builder_boost_factor,
494+
)
495+
.await
496+
.map_err(|e| {
497+
BlockError::Recoverable(format!(
498+
"Error from beacon node when producing block: {:?}",
499+
e
500+
))
501+
})?;
502+
503+
Ok(json_block_response.data)
504+
})
505+
.await
506+
.map_err(BlockError::from)?
507+
}
508+
};
509+
510+
let (block_proposer, unsigned_block) = match block_response {
511+
eth2::types::ProduceBlockV3Response::Full(block) => {
512+
(block.block().proposer_index(), UnsignedBlock::Full(block))
513+
}
514+
eth2::types::ProduceBlockV3Response::Blinded(block) => {
515+
(block.proposer_index(), UnsignedBlock::Blinded(block))
516+
}
517+
};
518+
519+
info!(slot = slot.as_u64(), "Received unsigned block");
520+
if proposer_index != Some(block_proposer) {
521+
return Err(BlockError::Recoverable(
522+
"Proposer index does not match block proposer. Beacon chain re-orged".to_string(),
523+
));
524+
}
479525

480526
self_ref
481527
.sign_and_publish_block(
@@ -525,71 +571,6 @@ impl<S: ValidatorStore + 'static, T: SlotClock + 'static> BlockService<S, T> {
525571
}
526572
Ok::<_, BlockError>(())
527573
}
528-
529-
#[instrument(skip_all, fields(%slot))]
530-
async fn get_validator_block(
531-
beacon_node: &BeaconNodeHttpClient,
532-
slot: Slot,
533-
randao_reveal_ref: &SignatureBytes,
534-
graffiti: Option<Graffiti>,
535-
proposer_index: Option<u64>,
536-
builder_boost_factor: Option<u64>,
537-
) -> Result<UnsignedBlock<S::E>, BlockError> {
538-
let block_response = match beacon_node
539-
.get_validator_blocks_v3_ssz::<S::E>(
540-
slot,
541-
randao_reveal_ref,
542-
graffiti.as_ref(),
543-
builder_boost_factor,
544-
)
545-
.await
546-
{
547-
Ok((ssz_block_response, _)) => ssz_block_response,
548-
Err(e) => {
549-
warn!(
550-
slot = slot.as_u64(),
551-
error = %e,
552-
"Beacon node does not support SSZ in block production, falling back to JSON"
553-
);
554-
555-
let (json_block_response, _) = beacon_node
556-
.get_validator_blocks_v3::<S::E>(
557-
slot,
558-
randao_reveal_ref,
559-
graffiti.as_ref(),
560-
builder_boost_factor,
561-
)
562-
.await
563-
.map_err(|e| {
564-
BlockError::Recoverable(format!(
565-
"Error from beacon node when producing block: {:?}",
566-
e
567-
))
568-
})?;
569-
570-
// Extract ProduceBlockV3Response (data field of the struct ForkVersionedResponse)
571-
json_block_response.data
572-
}
573-
};
574-
575-
let (block_proposer, unsigned_block) = match block_response {
576-
eth2::types::ProduceBlockV3Response::Full(block) => {
577-
(block.block().proposer_index(), UnsignedBlock::Full(block))
578-
}
579-
eth2::types::ProduceBlockV3Response::Blinded(block) => {
580-
(block.proposer_index(), UnsignedBlock::Blinded(block))
581-
}
582-
};
583-
584-
info!(slot = slot.as_u64(), "Received unsigned block");
585-
if proposer_index != Some(block_proposer) {
586-
return Err(BlockError::Recoverable(
587-
"Proposer index does not match block proposer. Beacon chain re-orged".to_string(),
588-
));
589-
}
590-
591-
Ok::<_, BlockError>(unsigned_block)
592-
}
593574
}
594575

595576
/// Wrapper for values we want to log about a block we signed, for easy extraction from the possible

0 commit comments

Comments
 (0)