diff --git a/book/api/metrics-generated.md b/book/api/metrics-generated.md index 408aedb67c4..68680e37535 100644 --- a/book/api/metrics-generated.md +++ b/book/api/metrics-generated.md @@ -1195,5 +1195,9 @@ | tower_​lockout_​fail | counter | Locked out (can't vote) | | tower_​threshold_​fail | counter | Did not pass threshold check (can't vote) | | tower_​propagated_​fail | counter | Prev leader block did not propagate (can't vote) | +| tower_​hard_​forks_​seen | counter | Number of hard forks we've seen (block ids with multiple candidate bank hashes) | +| tower_​hard_​forks_​pruned | counter | Number of hard forks (candidate bank hashes) we've pruned | +| tower_​hard_​forks_​active | gauge | Currently active hard forks | +| tower_​hard_​forks_​max_​width | gauge | The max width of hard forks (block id with most candidate bank hashes) we've ever seen | diff --git a/src/app/firedancer-dev/commands/backtest.c b/src/app/firedancer-dev/commands/backtest.c index 23129ac596f..559a5be68d0 100644 --- a/src/app/firedancer-dev/commands/backtest.c +++ b/src/app/firedancer-dev/commands/backtest.c @@ -313,13 +313,6 @@ backtest_topo( config_t * config ) { FOR(exec_tile_cnt) fd_topob_tile_uses( topo, &topo->tiles[ fd_topo_find_tile( topo, "exec", i ) ], banks_obj, FD_SHMEM_JOIN_MODE_READ_WRITE ); FD_TEST( fd_pod_insertf_ulong( topo->props, banks_obj->id, "banks" ) ); - /* bank_hash_cmp_obj shared by replay and exec tiles */ - fd_topob_wksp( topo, "bh_cmp" ); - fd_topo_obj_t * bank_hash_cmp_obj = setup_topo_bank_hash_cmp( topo, "bh_cmp" ); - fd_topob_tile_uses( topo, replay_tile, bank_hash_cmp_obj, FD_SHMEM_JOIN_MODE_READ_WRITE ); - FOR(exec_tile_cnt) fd_topob_tile_uses( topo, &topo->tiles[ fd_topo_find_tile( topo, "exec", i ) ], bank_hash_cmp_obj, FD_SHMEM_JOIN_MODE_READ_WRITE ); - FD_TEST( fd_pod_insertf_ulong( topo->props, bank_hash_cmp_obj->id, "bh_cmp" ) ); - /* txncache_obj, busy_obj and poh_slot_obj only by replay tile */ fd_topob_wksp( topo, "txncache" ); fd_topob_wksp( topo, "bank_busy" ); @@ -358,7 +351,6 @@ backtest_topo( config_t * config ) { fd_topo_configure_tile( tile, config ); if( !strcmp( tile->name, "replay" ) ) { - tile->replay.enable_bank_hash_cmp = 0; tile->replay.enable_features_cnt = config->tiles.replay.enable_features_cnt; for( ulong i = 0; i < tile->replay.enable_features_cnt; i++ ) { strncpy( tile->replay.enable_features[i], config->tiles.replay.enable_features[i], sizeof(tile->replay.enable_features[i]) ); diff --git a/src/app/firedancer-dev/main.c b/src/app/firedancer-dev/main.c index 18eb0ef56f2..404cb9a6eb5 100644 --- a/src/app/firedancer-dev/main.c +++ b/src/app/firedancer-dev/main.c @@ -24,7 +24,6 @@ extern fd_topo_obj_callbacks_t fd_obj_cb_fec_sets; extern fd_topo_obj_callbacks_t fd_obj_cb_txncache; extern fd_topo_obj_callbacks_t fd_obj_cb_banks; extern fd_topo_obj_callbacks_t fd_obj_cb_funk; -extern fd_topo_obj_callbacks_t fd_obj_cb_bank_hash_cmp; extern fd_topo_obj_callbacks_t fd_obj_cb_vinyl_meta; extern fd_topo_obj_callbacks_t fd_obj_cb_vinyl_meta_ele; @@ -47,7 +46,6 @@ fd_topo_obj_callbacks_t * CALLBACKS[] = { &fd_obj_cb_txncache, &fd_obj_cb_banks, &fd_obj_cb_funk, - &fd_obj_cb_bank_hash_cmp, &fd_obj_cb_vinyl_meta, &fd_obj_cb_vinyl_meta_ele, &fd_obj_cb_vinyl_data, diff --git a/src/app/firedancer/callbacks.c b/src/app/firedancer/callbacks.c index 7caedb5eb92..b1d32da25a5 100644 --- a/src/app/firedancer/callbacks.c +++ b/src/app/firedancer/callbacks.c @@ -38,34 +38,6 @@ fd_topo_obj_callbacks_t fd_obj_cb_banks = { .new = banks_new, }; -static ulong -bh_cmp_footprint( fd_topo_t const * topo, - fd_topo_obj_t const * obj ) { - (void)topo; (void)obj; - return fd_bank_hash_cmp_footprint(); -} - -static ulong -bh_cmp_align( fd_topo_t const * topo, - fd_topo_obj_t const * obj ) { - (void)topo; (void)obj; - return fd_bank_hash_cmp_align(); -} - -static void -bh_cmp_new( fd_topo_t const * topo, - fd_topo_obj_t const * obj ) { - (void)topo; (void)obj; - FD_TEST( fd_bank_hash_cmp_new( fd_topo_obj_laddr( topo, obj->id ) ) ); -} - -fd_topo_obj_callbacks_t fd_obj_cb_bank_hash_cmp = { - .name = "bh_cmp", - .footprint = bh_cmp_footprint, - .align = bh_cmp_align, - .new = bh_cmp_new, -}; - static ulong funk_align( fd_topo_t const * topo, fd_topo_obj_t const * obj ) { diff --git a/src/app/firedancer/config/default.toml b/src/app/firedancer/config/default.toml index 4bffa6a2805..2a5985e1178 100644 --- a/src/app/firedancer/config/default.toml +++ b/src/app/firedancer/config/default.toml @@ -1371,15 +1371,15 @@ user = "" [tiles.tower] # Solana reaches consensus via replay, but can "cluster confirm" # slots ahead of the replay tip by listening to vote txns from - # gossip or TPU. The larger max_lookahead_conf, the further + # gossip or TPU. The larger max_vote_lookahead, the further # ahead slots can be cluster confirmed before they are replayed. # # Specifically, tower will ignore gossip or TPU votes that are - # more than max_lookahead_conf slots ahead of the root. + # more than max_vote_lookahead slots ahead of the root. # - # Note max_lookahead_conf must be >= max_live_slots and + # Note max_vote_lookahead must be >= max_live_slots and # Firedancer will ignore a value where this is not the case. - max_lookahead_conf = 4096 + max_vote_lookahead = 4096 [tiles.send] # The port the send tile uses for QUIC, to send votes and other @@ -1528,6 +1528,19 @@ user = "" # enabled during routine running of the validator. core_dump = true + # A hard fork occurs when our validator has some implementation + # difference in consensus rules and gets a different blockhash for + # a given slot than the rest of the cluster. + # + # Typically, we should assume that our validator is correct and keep + # running, but during development it can be useful to detect if we + # are on the minority hard fork and immediately exit or abort. + # + # If this option is true, the validator will immediately exit with + # an error if it detects a hard fork. If false, the validator will + # log the hard fork event and continue running. + hard_fork_fatal = true + # It can be convenient during development to use a network namespace # for running Firedancer. This allows us to send packets at a local # Firedancer instance and have them go through more of the kernel diff --git a/src/app/firedancer/main.c b/src/app/firedancer/main.c index d7ca33e67bc..2805a08a47e 100644 --- a/src/app/firedancer/main.c +++ b/src/app/firedancer/main.c @@ -21,7 +21,6 @@ extern fd_topo_obj_callbacks_t fd_obj_cb_fec_sets; extern fd_topo_obj_callbacks_t fd_obj_cb_txncache; extern fd_topo_obj_callbacks_t fd_obj_cb_banks; extern fd_topo_obj_callbacks_t fd_obj_cb_funk; -extern fd_topo_obj_callbacks_t fd_obj_cb_bank_hash_cmp; fd_topo_obj_callbacks_t * CALLBACKS[] = { &fd_obj_cb_mcache, @@ -39,7 +38,6 @@ fd_topo_obj_callbacks_t * CALLBACKS[] = { &fd_obj_cb_txncache, &fd_obj_cb_banks, &fd_obj_cb_funk, - &fd_obj_cb_bank_hash_cmp, NULL, }; diff --git a/src/app/firedancer/topology.c b/src/app/firedancer/topology.c index f0558304271..374cf67acdd 100644 --- a/src/app/firedancer/topology.c +++ b/src/app/firedancer/topology.c @@ -52,12 +52,6 @@ parse_ip_port( const char * name, const char * ip_port, fd_topo_ip_port_t *parse FD_LOG_ERR(( "could not parse port %s in [%s]", ip_end+1, name )); } -fd_topo_obj_t * -setup_topo_bank_hash_cmp( fd_topo_t * topo, char const * wksp_name ) { - fd_topo_obj_t * obj = fd_topob_obj( topo, "bh_cmp", wksp_name ); - return obj; -} - fd_topo_obj_t * setup_topo_banks( fd_topo_t * topo, char const * wksp_name, @@ -351,7 +345,6 @@ fd_topo_initialize( config_t * config ) { fd_topob_wksp( topo, "funk" ); fd_topob_wksp( topo, "progcache" ); - fd_topob_wksp( topo, "bh_cmp" ); fd_topob_wksp( topo, "fec_sets" ); fd_topob_wksp( topo, "txncache" ); fd_topob_wksp( topo, "banks" ); @@ -940,12 +933,6 @@ fd_topo_initialize( config_t * config ) { } } - /* TODO: This should not exist in production */ - fd_topo_obj_t * bank_hash_cmp_obj = setup_topo_bank_hash_cmp( topo, "bh_cmp" ); - /**/ fd_topob_tile_uses( topo, &topo->tiles[ fd_topo_find_tile( topo, "replay", 0UL ) ], bank_hash_cmp_obj, FD_SHMEM_JOIN_MODE_READ_WRITE ); - FOR(exec_tile_cnt) fd_topob_tile_uses( topo, &topo->tiles[ fd_topo_find_tile( topo, "exec", i ) ], bank_hash_cmp_obj, FD_SHMEM_JOIN_MODE_READ_WRITE ); - FD_TEST( fd_pod_insertf_ulong( topo->props, bank_hash_cmp_obj->id, "bh_cmp" ) ); - ulong fec_set_cnt = shred_depth + config->tiles.shred.max_pending_shred_sets + 4UL; ulong fec_sets_sz = fec_set_cnt*sizeof(fd_shred34_t)*4; /* mirrors # of dcache entires in frankendancer */ fd_topo_obj_t * fec_sets_obj = setup_topo_fec_sets( topo, "fec_sets", shred_tile_cnt*fec_sets_sz ); @@ -1185,7 +1172,6 @@ fd_topo_configure_tile( fd_topo_tile_t * tile, strncpy( tile->replay.identity_key_path, config->paths.identity_key, sizeof(tile->replay.identity_key_path) ); tile->replay.ip_addr = config->net.ip_addr; strncpy( tile->replay.vote_account_path, config->paths.vote_account, sizeof(tile->replay.vote_account_path) ); - tile->replay.enable_bank_hash_cmp = 1; tile->replay.capture_start_slot = config->capture.capture_start_slot; strncpy( tile->replay.solcap_capture, config->capture.solcap_capture, sizeof(tile->replay.solcap_capture) ); @@ -1212,8 +1198,9 @@ fd_topo_configure_tile( fd_topo_tile_t * tile, } else if( FD_UNLIKELY( !strcmp( tile->name, "tower" ) ) ) { + tile->tower.hard_fork_fatal = config->firedancer.development.hard_fork_fatal; tile->tower.max_live_slots = config->firedancer.runtime.max_live_slots; - tile->tower.max_lookahead_conf = config->tiles.tower.max_lookahead_conf; + tile->tower.max_vote_lookahead = config->tiles.tower.max_vote_lookahead; strncpy( tile->tower.identity_key, config->paths.identity_key, sizeof(tile->tower.identity_key) ); strncpy( tile->tower.vote_account, config->paths.vote_account, sizeof(tile->tower.vote_account) ); strncpy( tile->tower.base_path, config->paths.base, sizeof(tile->tower.base_path) ); diff --git a/src/app/firedancer/topology.h b/src/app/firedancer/topology.h index 0e3e9213b98..86d5482fad0 100644 --- a/src/app/firedancer/topology.h +++ b/src/app/firedancer/topology.h @@ -13,9 +13,6 @@ FD_PROTOTYPES_BEGIN void fd_topo_initialize( fd_config_t * config ); -fd_topo_obj_t * -setup_topo_bank_hash_cmp( fd_topo_t * topo, char const * wksp_name ); - fd_topo_obj_t * setup_topo_banks( fd_topo_t * topo, char const * wksp_name, diff --git a/src/app/shared/fd_config.h b/src/app/shared/fd_config.h index b771529d1de..e698541f1a4 100644 --- a/src/app/shared/fd_config.h +++ b/src/app/shared/fd_config.h @@ -161,6 +161,10 @@ struct fd_configf { uint full_effective_age_cancel_threshold; } snapshots; + struct { + int hard_fork_fatal; + } development; + struct { ulong max_completed_shred_sets; } store; @@ -486,7 +490,7 @@ struct fd_config { } shredcap; struct { - ulong max_lookahead_conf; + ulong max_vote_lookahead; } tower; } tiles; diff --git a/src/app/shared/fd_config_parse.c b/src/app/shared/fd_config_parse.c index 66dc796e1ae..2969d3f7162 100644 --- a/src/app/shared/fd_config_parse.c +++ b/src/app/shared/fd_config_parse.c @@ -118,6 +118,8 @@ fd_config_extract_podf( uchar * pod, CFG_POP ( uint, snapshots.max_incremental_snapshots_to_keep ); CFG_POP ( uint, snapshots.full_effective_age_cancel_threshold ); + CFG_POP ( bool, development.hard_fork_fatal ); + return config; } @@ -257,7 +259,7 @@ fd_config_extract_pod( uchar * pod, CFG_POP ( ushort, tiles.send.send_src_port ); - CFG_POP ( ulong, tiles.tower.max_lookahead_conf ); + CFG_POP ( ulong, tiles.tower.max_vote_lookahead ); CFG_POP ( bool, tiles.archiver.enabled ); CFG_POP ( ulong, tiles.archiver.end_slot ); diff --git a/src/app/shared/fd_tile_unit_test.c b/src/app/shared/fd_tile_unit_test.c index b89f40b4e0f..97274d2e6d9 100644 --- a/src/app/shared/fd_tile_unit_test.c +++ b/src/app/shared/fd_tile_unit_test.c @@ -21,7 +21,6 @@ extern fd_topo_obj_callbacks_t fd_obj_cb_fec_sets; extern fd_topo_obj_callbacks_t fd_obj_cb_txncache; extern fd_topo_obj_callbacks_t fd_obj_cb_banks; extern fd_topo_obj_callbacks_t fd_obj_cb_funk; -extern fd_topo_obj_callbacks_t fd_obj_cb_bank_hash_cmp; fd_topo_obj_callbacks_t * CALLBACKS[] = { &fd_obj_cb_mcache, @@ -39,7 +38,6 @@ fd_topo_obj_callbacks_t * CALLBACKS[] = { &fd_obj_cb_txncache, &fd_obj_cb_banks, &fd_obj_cb_funk, - &fd_obj_cb_bank_hash_cmp, NULL, }; diff --git a/src/choreo/fd_choreo_base.h b/src/choreo/fd_choreo_base.h index 667bee7c691..95b4d7c34c2 100644 --- a/src/choreo/fd_choreo_base.h +++ b/src/choreo/fd_choreo_base.h @@ -36,5 +36,6 @@ typedef uchar fd_block_id_t[ 32UL ]; typedef fd_slot_hash_t fd_slot_pubkey_t; static const fd_pubkey_t pubkey_null = {{ 0 }}; +static const fd_hash_t hash_null = {{ 0 }}; #endif /* HEADER_fd_src_choreo_fd_choreo_base_h */ diff --git a/src/choreo/ghost/fd_ghost.c b/src/choreo/ghost/fd_ghost.c index 77444a84cd1..e3f7c6e964e 100644 --- a/src/choreo/ghost/fd_ghost.c +++ b/src/choreo/ghost/fd_ghost.c @@ -271,18 +271,18 @@ fd_ghost_insert( fd_ghost_t * ghost, void fd_ghost_count_vote( fd_ghost_t * ghost, fd_ghost_blk_t * blk, - fd_pubkey_t const * vtr_addr, + fd_pubkey_t const * vote_acc, ulong stake, ulong slot ) { fd_ghost_blk_t const * root = fd_ghost_root( ghost ); fd_ghost_blk_t * pool = ghost->pool; - fd_ghost_vtr_t * vtr = vtr_map_query( ghost->vtr_map, *vtr_addr, NULL ); + fd_ghost_vtr_t * vtr = vtr_map_query( ghost->vtr_map, *vote_acc, NULL ); if( FD_UNLIKELY( slot == ULONG_MAX ) ) return; /* hasn't voted */ if( FD_UNLIKELY( slot < root->slot ) ) return; /* vote older than root */ - if( FD_UNLIKELY( !vtr ) ) vtr = vtr_map_insert( ghost->vtr_map, *vtr_addr ); + if( FD_UNLIKELY( !vtr ) ) vtr = vtr_map_insert( ghost->vtr_map, *vote_acc ); else { /* Only process the vote if it is not the same as the previous vote diff --git a/src/choreo/hfork/Local.mk b/src/choreo/hfork/Local.mk new file mode 100644 index 00000000000..ebbfe6497b2 --- /dev/null +++ b/src/choreo/hfork/Local.mk @@ -0,0 +1,8 @@ +$(call add-hdrs,fd_hfork.h) +$(call add-objs,fd_hfork,fd_choreo) +ifdef FD_HAS_HOSTED +ifdef FD_HAS_SECP256K1 +$(call make-unit-test,test_hfork,test_hfork,fd_choreo fd_flamenco fd_tango fd_ballet fd_util) +$(call run-unit-test,test_hfork) +endif +endif diff --git a/src/choreo/hfork/fd_hfork.c b/src/choreo/hfork/fd_hfork.c new file mode 100644 index 00000000000..c82be133f8a --- /dev/null +++ b/src/choreo/hfork/fd_hfork.c @@ -0,0 +1,300 @@ +#include "fd_hfork.h" +#include "fd_hfork_private.h" + +static void +check( fd_hfork_t * hfork, + ulong total_stake, + candidate_t * candidate, + int invalid, + fd_hash_t * our_bank_hash ) { + + if( FD_LIKELY( candidate->checked ) ) return; /* already checked this bank hash against our own */ + if( FD_LIKELY( candidate->stake * 100UL / total_stake < 52UL ) ) return; /* not enough stake to compare */ + + if( FD_UNLIKELY( invalid ) ) { + char msg[ 4096UL ]; + FD_BASE58_ENCODE_32_BYTES( candidate->key.block_id.uc, _block_id ); + FD_TEST( fd_cstr_printf_check( msg, sizeof( msg ), NULL, + "HARD FORK DETECTED: our validator has marked slot %lu with block ID `%s` dead, but %lu validators with %.1f of stake have voted on it", + candidate->slot, + _block_id, + candidate->cnt, + 100.0*(double)candidate->stake/(double)total_stake ) ); + + if( FD_UNLIKELY( hfork->fatal ) ) FD_LOG_ERR (( "%s", msg )); + else FD_LOG_WARNING(( "%s", msg )); + } else if( FD_UNLIKELY( 0!=memcmp( our_bank_hash, &candidate->key.bank_hash, 32UL ) ) ) { + char msg[ 4096UL ]; + FD_BASE58_ENCODE_32_BYTES( our_bank_hash->uc, _our_bank_hash ); + FD_BASE58_ENCODE_32_BYTES( candidate->key.block_id.uc, _block_id ); + FD_BASE58_ENCODE_32_BYTES( candidate->key.bank_hash.uc, _bank_hash ); + FD_TEST( fd_cstr_printf_check( msg, sizeof( msg ), NULL, + "HARD FORK DETECTED: our validator has produced bank hash `%s` for slot %lu with block ID `%s`, but %lu validators with %.1f of stake have voted on a different bank hash `%s` for the same slot", + _our_bank_hash, + candidate->slot, + _block_id, + candidate->cnt, + 100.0*(double)candidate->stake/(double)total_stake, + _bank_hash ) ); + + if( FD_UNLIKELY( hfork->fatal ) ) FD_LOG_ERR (( "%s", msg )); + else FD_LOG_WARNING(( "%s", msg )); + } + candidate->checked = 1; +} + +ulong +fd_hfork_align( void ) { + return 128UL; +} + +ulong +fd_hfork_footprint( ulong max_live_slots, + ulong max_vote_accounts ) { + ulong fork_max = max_live_slots * max_vote_accounts; + int lg_blk_max = fd_ulong_find_msb( fd_ulong_pow2_up( fork_max ) ) + 1; + int lg_vtr_max = fd_ulong_find_msb( fd_ulong_pow2_up( max_vote_accounts ) ) + 1; + + ulong l = FD_LAYOUT_INIT; + l = FD_LAYOUT_APPEND( l, alignof(fd_hfork_t), sizeof(fd_hfork_t) ); + l = FD_LAYOUT_APPEND( l, blk_map_align(), blk_map_footprint( lg_blk_max ) ); + l = FD_LAYOUT_APPEND( l, vtr_map_align(), vtr_map_footprint( lg_vtr_max ) ); + l = FD_LAYOUT_APPEND( l, candidate_map_align(), candidate_map_footprint( lg_blk_max ) ); + l = FD_LAYOUT_APPEND( l, bank_hash_pool_align(), bank_hash_pool_footprint( fork_max ) ); + for( ulong i = 0UL; i < fd_ulong_pow2( lg_vtr_max ); i++ ) { + l = FD_LAYOUT_APPEND( l, votes_align(), votes_footprint( max_live_slots ) ); + } + return FD_LAYOUT_FINI( l, fd_hfork_align() ); +} + +void * +fd_hfork_new( void * shmem, + ulong max_live_slots, + ulong max_vote_accounts, + ulong seed, + int fatal ) { + (void)seed; /* TODO map seed */ + + if( FD_UNLIKELY( !shmem ) ) { + FD_LOG_WARNING(( "NULL mem" )); + return NULL; + } + + if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shmem, fd_hfork_align() ) ) ) { + FD_LOG_WARNING(( "misaligned mem" )); + return NULL; + } + + ulong footprint = fd_hfork_footprint( max_live_slots, max_vote_accounts ); + if( FD_UNLIKELY( !footprint ) ) { + FD_LOG_WARNING(( "bad max_live_slots (%lu) or max_vote_accounts (%lu)", max_live_slots, max_vote_accounts )); + return NULL; + } + + fd_memset( shmem, 0, footprint ); + + ulong fork_max = max_live_slots * max_vote_accounts; + int lg_blk_max = fd_ulong_find_msb( fd_ulong_pow2_up( fork_max ) ) + 1; + int lg_vtr_max = fd_ulong_find_msb( fd_ulong_pow2_up( max_vote_accounts ) ) + 1; + + FD_SCRATCH_ALLOC_INIT( l, shmem ); + fd_hfork_t * hfork = FD_SCRATCH_ALLOC_APPEND( l, fd_hfork_align(), sizeof( fd_hfork_t ) ); + void * blk_map = FD_SCRATCH_ALLOC_APPEND( l, blk_map_align(), blk_map_footprint( lg_blk_max ) ); + void * vtr_map = FD_SCRATCH_ALLOC_APPEND( l, vtr_map_align(), vtr_map_footprint( lg_vtr_max ) ); + void * candidate_map = FD_SCRATCH_ALLOC_APPEND( l, candidate_map_align(), candidate_map_footprint( lg_blk_max ) ); + void * bank_hash_pool = FD_SCRATCH_ALLOC_APPEND( l, bank_hash_pool_align(), bank_hash_pool_footprint( fork_max ) ); + + hfork->blk_map = blk_map_new( blk_map, lg_blk_max ); + hfork->vtr_map = vtr_map_new( vtr_map, lg_vtr_max ); + hfork->candidate_map = candidate_map_new( candidate_map, lg_blk_max ); + hfork->bank_hash_pool = bank_hash_pool_new( bank_hash_pool, fork_max ); + for( ulong i = 0UL; i < fd_ulong_pow2( lg_vtr_max ); i++ ) { + void * votes = FD_SCRATCH_ALLOC_APPEND( l, votes_align(), votes_footprint( max_live_slots ) ); + vtr_t * join = vtr_map_join( hfork->vtr_map ); + join[i].votes = votes_new( votes, max_live_slots ); + } + FD_TEST( FD_SCRATCH_ALLOC_FINI( l, fd_hfork_align() ) == (ulong)shmem + footprint ); + hfork->fatal = fatal; + return shmem; +} + +fd_hfork_t * +fd_hfork_join( void * shhfork ) { + fd_hfork_t * hfork = (fd_hfork_t *)shhfork; + + if( FD_UNLIKELY( !hfork ) ) { + FD_LOG_WARNING(( "NULL hfork" )); + return NULL; + } + + if( FD_UNLIKELY( !fd_ulong_is_aligned((ulong)hfork, fd_hfork_align() ) ) ) { + FD_LOG_WARNING(( "misaligned hfork" )); + return NULL; + } + + hfork->blk_map = blk_map_join( hfork->blk_map ); + hfork->vtr_map = vtr_map_join( hfork->vtr_map ); + hfork->candidate_map = candidate_map_join( hfork->candidate_map ); + hfork->bank_hash_pool = bank_hash_pool_join( hfork->bank_hash_pool ); + for( ulong i = 0UL; i < vtr_map_slot_cnt( hfork->vtr_map ); i++ ) { + hfork->vtr_map[i].votes = votes_join( hfork->vtr_map[i].votes ); + } + + return hfork; +} + +void * +fd_hfork_leave( fd_hfork_t const * hfork ) { + + if( FD_UNLIKELY( !hfork ) ) { + FD_LOG_WARNING(( "NULL hfork" )); + return NULL; + } + + return (void *)hfork; +} + +void * +fd_hfork_delete( void * hfork ) { + + if( FD_UNLIKELY( !hfork ) ) { + FD_LOG_WARNING(( "NULL hfork" )); + return NULL; + } + + if( FD_UNLIKELY( !fd_ulong_is_aligned((ulong)hfork, fd_hfork_align() ) ) ) { + FD_LOG_WARNING(( "misaligned hfork" )); + return NULL; + } + + return hfork; +} + +void +fd_hfork_count_vote( fd_hfork_t * hfork, + fd_hash_t const * vote_acc, + fd_hash_t const * block_id, + fd_hash_t const * bank_hash, + ulong slot, + ulong stake, + ulong total_stake, + fd_hfork_metrics_t * metrics ) { + + /* Get the vtr. */ + + vtr_t * vtr = vtr_map_query( hfork->vtr_map, *vote_acc, NULL ); + if( FD_UNLIKELY( !vtr ) ) vtr = vtr_map_insert( hfork->vtr_map, *vote_acc ); + + /* Ignore out of order or duplicate votes. */ + + if( FD_UNLIKELY( !votes_empty( vtr->votes ) ) ) { + vote_t const * tail = votes_peek_tail_const( vtr->votes ); + if( FD_UNLIKELY( tail && tail->slot >= slot ) ) return; + } + + /* Evict the candidate's oldest vote (by vote slot). */ + + if( FD_UNLIKELY( votes_full( vtr->votes ) ) ) { + vote_t vote = votes_pop_head( vtr->votes ); + candidate_key_t key = { .block_id = vote.block_id, .bank_hash = vote.bank_hash }; + candidate_t * candidate = candidate_map_query( hfork->candidate_map, key, NULL ); + candidate->stake -= vote.stake; + candidate->cnt--; + if( FD_UNLIKELY( candidate->cnt==0 ) ) { + candidate_map_remove( hfork->candidate_map, candidate ); + blk_t * blk = blk_map_query( hfork->blk_map, vote.block_id, NULL ); + bank_hash_t * remove = blk->bank_hashes; + blk->bank_hashes = bank_hash_pool_ele( hfork->bank_hash_pool, remove->next ); + bank_hash_pool_ele_release( hfork->bank_hash_pool, remove ); + + if( FD_UNLIKELY( !blk->bank_hashes ) ) { + blk_map_remove( hfork->blk_map, blk ); + if( FD_UNLIKELY( blk->forked ) ) { + metrics->active--; + metrics->pruned++; + } + } + } + } + + /* Push the vote onto the vtr. */ + + vote_t vote = { .block_id = *block_id, .bank_hash = *bank_hash, .slot = slot, .stake = stake }; + vtr->votes = votes_push_tail( vtr->votes, vote ); + + /* Update the hard fork candidate for this block id. */ + + candidate_key_t key = { .block_id = *block_id, .bank_hash = *bank_hash }; + candidate_t * candidate = candidate_map_query( hfork->candidate_map, key, NULL ); + if( FD_UNLIKELY( !candidate ) ) { + candidate = candidate_map_insert( hfork->candidate_map, key ); + candidate->slot = slot; + candidate->stake = 0UL; + candidate->cnt = 0UL; + } + candidate->cnt++; + candidate->stake += stake; + + /* Update the list of bank hashes for this block_id. */ + + blk_t * blk = blk_map_query( hfork->blk_map, *block_id, NULL ); + if( FD_UNLIKELY( !blk ) ) { + FD_TEST( blk_map_key_cnt( hfork->blk_map ) < blk_map_key_max( hfork->blk_map ) ); /* invariant violation: blk_map full */ + blk = blk_map_insert( hfork->blk_map, *block_id ); + FD_TEST( blk ); + blk->bank_hashes = NULL; + blk->replayed = 0; + blk->invalid = 0; + } + int found = 0; + ulong cnt = 0; + bank_hash_t * prev = NULL; + bank_hash_t * curr = blk->bank_hashes; + while( FD_LIKELY( curr ) ) { + if( FD_LIKELY( 0==memcmp( curr, bank_hash, 32UL ) ) ) found = 1; + prev = curr; + curr = bank_hash_pool_ele( hfork->bank_hash_pool, curr->next ); + cnt++; + } + + if( FD_UNLIKELY( !found ) ) { + bank_hash_t * ele = bank_hash_pool_ele_acquire( hfork->bank_hash_pool ); + ele->bank_hash = *bank_hash; + ele->next = bank_hash_pool_idx_null( hfork->bank_hash_pool ); + if( FD_LIKELY( !prev ) ) blk->bank_hashes = ele; + else { + prev->next = bank_hash_pool_idx( hfork->bank_hash_pool, ele ); + blk->forked = 1; + metrics->seen++; + metrics->active++; + } + cnt++; + } + metrics->max_width = fd_ulong_max( metrics->max_width, cnt ); + + /* Check for hard forks. */ + + if( FD_LIKELY( blk->replayed ) ) check( hfork, total_stake, candidate, blk->invalid, &blk->our_bank_hash ); +} + +void +fd_hfork_record_our_bank_hash( fd_hfork_t * hfork, + fd_hash_t * block_id, + fd_hash_t * bank_hash, + ulong total_stake ) { + blk_t * blk = blk_map_query( hfork->blk_map, *block_id, NULL ); + if( FD_UNLIKELY( !blk ) ) { + blk = blk_map_insert( hfork->blk_map, *block_id ); + blk->replayed = 1; + } + if( FD_LIKELY( bank_hash ) ) { blk->invalid = 0; blk->our_bank_hash = *bank_hash; } + else blk->invalid = 1; + + bank_hash_t * curr = blk->bank_hashes; + while( FD_LIKELY( curr ) ) { + candidate_key_t key = { .block_id = *block_id, .bank_hash = curr->bank_hash }; + candidate_t * candidate = candidate_map_query( hfork->candidate_map, key, NULL ); + if( FD_LIKELY( candidate ) ) check( hfork, total_stake, candidate, blk->invalid, &blk->our_bank_hash ); + curr = bank_hash_pool_ele( hfork->bank_hash_pool, curr->next ); + } +} diff --git a/src/choreo/hfork/fd_hfork.h b/src/choreo/hfork/fd_hfork.h new file mode 100644 index 00000000000..14e5e6b5ac3 --- /dev/null +++ b/src/choreo/hfork/fd_hfork.h @@ -0,0 +1,172 @@ +#ifndef HEADER_fd_src_discof_tower_fd_hfork_h +#define HEADER_fd_src_discof_tower_fd_hfork_h + +/* The Solana chain occasionally forks for what we will call soft and + hard reasons. + + - Soft forks occur due to network latency and the distributed + nature of the network. Different validators may see different + blocks at different times, and so may disagree on ledger state + temporarily until the network converges. This is expected and + normal. + + - Hard forks occur when validators have a disagreement about the + rules of the protocol and come to different conclusions about the + permanent state of the ledger. This is not expected or normal. + + For Firedancer, it is useful to be able to detect when any hard fork + has occurred, because it means there was likely some consensus bug + that caused us to diverge from Agave. What we check is actually + broader: it is possible that we disagreed about the result of some + block, but did not diverge from Agave as the block we disagreed on + did not become canonical. We still want to detect this case, as it + indicates a consensus bug even if we did not ultimately diverge. + + We detect hard forked blocks by monitoring votes from validators, the + specific criteria is: + + - 52% of stake has voted on a block_id and agreed on a bank_hash + which is different than the bank_hash we have for that block_id. + + - 52% of stake has voted on a block_id and agreed on a bank_hash + when we marked the block dead because it failed to execute. + + Interestingly we do not need to handle the case where we thought a + block succeeded, but the rest of the cluster failed it, because this + does not cause a hard fork. We simply switch to the canonical fork + the rest of the cluster will converge on. It is not really possible + to detect this case, because other nodes do not report that the block + failed, they simply vote somewhere else. + + We are essentially receiving a + + Stream> + + The first variant is a vote from some validator for a certain + block_id and block_hash. This could come from either any replayed + block, or gossip. It does not matter where it comes from, or even if + the source is valid (e.g. it could come from a block which gets + marked dead because it did not validate), all the matters is that the + vote is validly signed by the vote account. + + Internally, we maintain a Map>. + Any time we receive a vote, if it causes an entry in this map to + exceed 52% of the total stake weight, or it is from our own validator + identity, we check if our vote is different, and potentially raise a + warning or error. + + The last max_live_slots votes for each vote account are kept in a + ring buffer and replaced as new votes come in, to prevent unbounded + memory usage. This makes the structure somewhat heuristic: it might + be that if some nodes are very far ahead, and some behind, we might + evict old votes for those ahead and never see a fork exceed 52% in + our window. This is unlikely to happen in practice, and even if it + does, it only means we might miss detecting a hard fork, which is not + catastrophic. The default behavior anyway should be to continue + running on the forked chain. */ + +#include "../fd_choreo_base.h" + +struct fd_hfork; +typedef struct fd_hfork fd_hfork_t; + +struct fd_hfork_metrics { + ulong seen; + ulong pruned; + ulong active; + ulong max_width; +}; +typedef struct fd_hfork_metrics fd_hfork_metrics_t; + +FD_PROTOTYPES_BEGIN + +/* fd_hfork_{align,footprint} return the required alignment and + footprint of a memory region suitable for use as a hfork. align + returns fd_hfork_ALIGN. footprint returns fd_hfork_FOOTPRINT. */ + +FD_FN_CONST ulong +fd_hfork_align( void ); + +FD_FN_CONST ulong +fd_hfork_footprint( ulong max_live_slots, + ulong max_vote_accounts ); + +/* fd_hfork_new formats an unused memory region for use as a hfork. mem + is a non-NULL pointer to this region in the local address space with + the required footprint and alignment. */ + +void * +fd_hfork_new( void * mem, + ulong max_live_slots, + ulong max_vote_accounts, + ulong seed, + int fatal ); + +/* fd_hfork_join joins the caller to the hfork. hfork points to the + first byte of the memory region backing the hfork in the caller's + address space. + + Returns a pointer in the local address space to hfork on success. */ + +fd_hfork_t * +fd_hfork_join( void * hfork ); + +/* fd_hfork_leave leaves a current local join. Returns a pointer to the + underlying shared memory region on success and NULL on failure (logs + details). Reasons for failure include hfork is NULL. */ + +void * +fd_hfork_leave( fd_hfork_t const * hfork ); + +/* fd_hfork_delete unformats a memory region used as a hfork. Assumes + only the local process is joined to the region. Returns a pointer to + the underlying shared memory region or NULL if used obviously in + error (e.g. hfork is obviously not a hfork ... logs details). The + ownership of the memory region is transferred to the caller. */ + +void * +fd_hfork_delete( void * hfork ); + +/* Update the block detector with a newly observed vote. A vote is for + a bank hash, with a block ID, from a certain vote account. Updates + are a time ordered stream, containing votes from both gossip and + replayed blocks. Any vote which has a valid signature is valid, and + should be provided to the update time series, even if, for example, + it's from a block which was not valid or got skipped, or the vote + otherwise looks malformed. + + If incorporating the vote would cause a hard fork to be detected, and + if the hard fork detector was created with fatal=0, this simply logs + a warning and returns, and the validator will continue running now on + the forked chain. This is the preferred mode for production, as it + makes sense from a consensus perspective to continue running in case + the other nodes switch to our fork. + + Otherwise, if the hard fork detector was created with fatal!=0, this + will log a critical error and abort the process. This mode is useful + for development, testing, and debugging purposes to make sure we can + isolate the failure to the specific time it occurs. */ + +void +fd_hfork_count_vote( fd_hfork_t * hfork, + fd_hash_t const * vote_acc, + fd_hash_t const * block_id, + fd_hash_t const * bank_hash, + ulong slot, + ulong stake, + ulong total_stake, + fd_hfork_metrics_t * metrics ); + +/* Update the hard fork detector with our bank hash (computed on replay) + for a given block ID. If bank_hash is NULL, this indicates the block + was marked dead during replay and we did not think it was valid. */ + +void +fd_hfork_record_our_bank_hash( fd_hfork_t * hfork, + fd_hash_t * block_id, + fd_hash_t * bank_hash, + ulong total_stake ); + +FD_PROTOTYPES_END + +#endif /* HEADER_fd_src_discof_tower_fd_hfork_h */ diff --git a/src/choreo/hfork/fd_hfork_private.h b/src/choreo/hfork/fd_hfork_private.h new file mode 100644 index 00000000000..5a8781ef713 --- /dev/null +++ b/src/choreo/hfork/fd_hfork_private.h @@ -0,0 +1,102 @@ +#include "fd_hfork.h" + +struct bank_hash { + fd_hash_t bank_hash; + ulong next; +}; +typedef struct bank_hash bank_hash_t; + +#define POOL_NAME bank_hash_pool +#define POOL_T bank_hash_t +#include "../../util/tmpl/fd_pool.c" + +struct blk { + fd_hash_t block_id; + uint hash; + int forked; /* whether this block id has hard forked (multiple candidate bank hashes) */ + int replayed; /* whether we've replayed */ + int invalid; /* whether we marked the block as invalid during replay (must ignore our_bank_hash) */ + fd_hash_t our_bank_hash; /* our bank hash for this block_id after replay */ + bank_hash_t * bank_hashes; +}; +typedef struct blk blk_t; + +#define MAP_NAME blk_map +#define MAP_T blk_t +#define MAP_KEY block_id +#define MAP_KEY_T fd_hash_t +#define MAP_KEY_NULL hash_null +#define MAP_KEY_EQUAL_IS_SLOW 1 +#define MAP_KEY_INVAL(k) MAP_KEY_EQUAL((k),MAP_KEY_NULL) +#define MAP_KEY_EQUAL(k0,k1) (!memcmp( (k0).key, (k1).key, 32UL )) +#define MAP_KEY_HASH(key) ((MAP_HASH_T)( (key).ul[1] )) +#include "../../util/tmpl/fd_map_dynamic.c" + +struct vote { + fd_hash_t block_id; + fd_hash_t bank_hash; + ulong slot; + ulong stake; +}; +typedef struct vote vote_t; + +#define DEQUE_NAME votes +#define DEQUE_T vote_t +#include "../../util/tmpl/fd_deque_dynamic.c" + +struct vtr { + fd_pubkey_t vote_acc; + uint hash; + vote_t * votes; +}; +typedef struct vtr vtr_t; + +#define MAP_NAME vtr_map +#define MAP_T vtr_t +#define MAP_KEY vote_acc +#define MAP_KEY_T fd_pubkey_t +#define MAP_KEY_NULL pubkey_null +#define MAP_KEY_EQUAL_IS_SLOW 1 +#define MAP_KEY_INVAL(k) MAP_KEY_EQUAL((k),MAP_KEY_NULL) +#define MAP_KEY_EQUAL(k0,k1) (!memcmp( (k0).key, (k1).key, 32UL )) +#define MAP_KEY_HASH(key) ((MAP_HASH_T)( (key).ul[1] )) +#include "../../util/tmpl/fd_map_dynamic.c" + +struct candidate_key { + fd_pubkey_t block_id; + fd_pubkey_t bank_hash; +}; +typedef struct candidate_key candidate_key_t; + +struct candidate { + candidate_key_t key; + uint hash; + ulong slot; + ulong stake; + ulong cnt; + int checked; +}; +typedef struct candidate candidate_t; + +static const candidate_key_t candidate_key_null = { 0 }; + +#define MAP_NAME candidate_map +#define MAP_KEY key +#define MAP_T candidate_t +#define MAP_KEY_T candidate_key_t +#define MAP_KEY_NULL candidate_key_null +#define MAP_KEY_INVAL(k) MAP_KEY_EQUAL((k),MAP_KEY_NULL) +#define MAP_KEY_EQUAL(k0,k1) (fd_pubkey_eq( &((k0).block_id), &((k1).block_id ) ) &\ + fd_pubkey_eq( &((k0).bank_hash), &((k1).bank_hash) ) ) +#define MAP_KEY_EQUAL_IS_SLOW 1 +#define MAP_KEY_HASH(key) (fd_uint_load_4( (key).block_id.uc ) ^ fd_uint_load_4( (key).bank_hash.uc ) ) +#include "../../util/tmpl/fd_map_dynamic.c" + +struct __attribute__((aligned(128UL))) fd_hfork { + blk_t * blk_map; + vtr_t * vtr_map; + candidate_t * candidate_map; + bank_hash_t * bank_hash_pool; + int fatal; +}; +typedef struct fd_hfork fd_hfork_t; diff --git a/src/choreo/hfork/test_hfork.c b/src/choreo/hfork/test_hfork.c new file mode 100644 index 00000000000..2eaa68b2625 --- /dev/null +++ b/src/choreo/hfork/test_hfork.c @@ -0,0 +1,78 @@ +#include "fd_hfork.h" +#include "fd_hfork_private.h" + +void +test_hfork_simple( fd_wksp_t * wksp ) { + ulong max_live_slots = 2; + ulong max_vote_accounts = 2; + + void * mem = fd_wksp_alloc_laddr( wksp, fd_hfork_align(), fd_hfork_footprint( max_live_slots, max_vote_accounts ), 1UL ); + fd_hfork_t * hfork = fd_hfork_join( fd_hfork_new( mem, max_live_slots, max_vote_accounts, 42, 0 ) ); + FD_TEST( hfork ); + + ulong slot = 368778153; + fd_hash_t block_id = { .ul = { slot } }; + fd_hash_t bank_hash = { .ul = { slot } }; + + ulong slot1 = 368778154; + fd_hash_t block_id1 = { .ul = { slot1 } }; + fd_hash_t bank_hash1 = { .ul = { slot1 } }; + + ulong slot2 = 368778155; + fd_hash_t block_id2 = { .ul = { slot2 } }; + fd_hash_t bank_hash2 = { .ul = { slot2 } }; + + fd_hash_t voters[2] = { + (fd_hash_t){ .ul = { 1 } }, + (fd_hash_t){ .ul = { 2 } }, + }; + + fd_hfork_metrics_t metrics = { 0 }; + + FD_TEST( 0!=memcmp( voters->key, pubkey_null.key, 32UL ) ); + for( ulong i = 0; i < vtr_map_slot_cnt( hfork->vtr_map ); i++ ) { + FD_TEST( 0==memcmp( hfork->vtr_map[i].vote_acc.key, pubkey_null.key, 32UL ) ); + } + + fd_hfork_count_vote( hfork, &voters[0], &block_id, &bank_hash, slot, 1, 100, &metrics ); + candidate_key_t key = { .block_id = block_id, .bank_hash = bank_hash }; + candidate_t * candidate = candidate_map_query( hfork->candidate_map, key, NULL ); + FD_TEST( candidate->slot ==slot ); + FD_TEST( candidate->stake ==1 ); + FD_TEST( candidate->cnt ==1 ); + FD_TEST( candidate->checked==0 ); + + fd_hfork_count_vote( hfork, &voters[1], &block_id, &bank_hash, slot, 51, 100, &metrics ); + FD_TEST( candidate->stake ==52 ); + FD_TEST( candidate->cnt ==2 ); + FD_TEST( candidate->checked==0 ); + + fd_hfork_record_our_bank_hash( hfork, &block_id, &bank_hash, 100 ); + FD_TEST( candidate->checked==1 ); + + fd_hfork_count_vote( hfork, &voters[0], &block_id1, &bank_hash1, slot1, 1, 100, &metrics ); + fd_hfork_count_vote( hfork, &voters[0], &block_id2, &bank_hash2, slot2, 1, 100, &metrics ); + + /* evicted */ + + FD_TEST( candidate->stake==51 ); + FD_TEST( candidate->cnt ==1 ); + + fd_wksp_free_laddr( fd_hfork_delete( fd_hfork_leave( hfork ) ) ); +} + +int +main( int argc, char ** argv ) { + fd_boot( &argc, &argv ); + + ulong page_cnt = 1; + char * _page_sz = "gigantic"; + ulong numa_idx = fd_shmem_numa_idx( 0 ); + fd_wksp_t * wksp = fd_wksp_new_anonymous( fd_cstr_to_shmem_page_sz( _page_sz ), page_cnt, fd_shmem_cpu_idx( numa_idx ), "wksp", 0UL ); + FD_TEST( wksp ); + + test_hfork_simple( wksp ); + + fd_halt(); + return 0; +} diff --git a/src/choreo/notar/fd_notar.c b/src/choreo/notar/fd_notar.c index caf2dbd28a2..32583606a36 100644 --- a/src/choreo/notar/fd_notar.c +++ b/src/choreo/notar/fd_notar.c @@ -159,7 +159,7 @@ fd_notar_advance_epoch( fd_notar_t * notar, notar->epoch = epoch; for( ulong i = 0; i < fd_notar_vtr_key_max( notar->vtr_map ); i++ ) { fd_notar_vtr_t * vtr = ¬ar->vtr_map[i]; - if( fd_notar_vtr_key_inval( vtr->addr ) ) continue; + if( fd_notar_vtr_key_inval( vtr->vote_acc ) ) continue; vtr->prev_stake = vtr->stake; vtr->stake = 0; vtr->prev_bit = vtr->bit; diff --git a/src/choreo/notar/fd_notar.h b/src/choreo/notar/fd_notar.h index 47938f9036c..d8ab539a85b 100644 --- a/src/choreo/notar/fd_notar.h +++ b/src/choreo/notar/fd_notar.h @@ -121,8 +121,6 @@ struct fd_notar_blk { }; typedef struct fd_notar_blk fd_notar_blk_t; -static const fd_hash_t hash_null = {{ 0 }}; - #define MAP_NAME fd_notar_blk #define MAP_T fd_notar_blk_t #define MAP_KEY block_id @@ -143,7 +141,7 @@ static const fd_hash_t hash_null = {{ 0 }}; #include "../../util/tmpl/fd_map_dynamic.c" struct fd_notar_vtr { - fd_pubkey_t addr; /* map key, vote account address */ + fd_pubkey_t vote_acc; /* map key, vote account address */ uint hash; /* reserved for fd_map_dynamic */ ulong prev_stake; /* amount of stake this voter has in epoch - 1 */ ulong stake; /* amount of stake this voter has in epoch */ @@ -154,7 +152,7 @@ typedef struct fd_notar_vtr fd_notar_vtr_t; #define MAP_NAME fd_notar_vtr #define MAP_T fd_notar_vtr_t -#define MAP_KEY addr +#define MAP_KEY vote_acc #define MAP_KEY_T fd_pubkey_t #define MAP_KEY_NULL pubkey_null #define MAP_KEY_EQUAL_IS_SLOW 1 @@ -207,7 +205,7 @@ fd_notar_footprint( ulong slot_max ) { the required footprint and alignment. */ void * -fd_notar_new( void * mem, +fd_notar_new( void * shmem, ulong slot_max ); /* fd_notar_join joins the caller to the notar. notar points to the diff --git a/src/choreo/tower/fd_epoch_stakes.c b/src/choreo/tower/fd_epoch_stakes.c index 296d7d4ba47..5bedf20af65 100644 --- a/src/choreo/tower/fd_epoch_stakes.c +++ b/src/choreo/tower/fd_epoch_stakes.c @@ -1,7 +1,8 @@ #include "fd_epoch_stakes.h" void * -fd_epoch_stakes_new( void * shmem, ulong slot_max ) { +fd_epoch_stakes_new( void * shmem, + ulong slot_max ) { if( FD_UNLIKELY( !shmem ) ) { FD_LOG_WARNING(( "NULL mem" )); return NULL; @@ -49,7 +50,7 @@ fd_epoch_stakes_slot_stakes_add( fd_epoch_stakes_t * epoch_stakes, ulong slot, f fd_voter_stake_t * pool = epoch_stakes->voter_stake_pool; if( FD_UNLIKELY( !fd_voter_stake_pool_free( pool ) ) ) FD_LOG_CRIT(( "no free voter stakes in pool" )); fd_voter_stake_t * new_voter_stake = fd_voter_stake_pool_ele_acquire( pool ); - new_voter_stake->key = (fd_voter_stake_key_t){ .vote_account = *vote_account, .slot = slot }; + new_voter_stake->key = (fd_voter_stake_key_t){ .vote_account = *vote_account, .slot = slot }; new_voter_stake->stake = stake; new_voter_stake->prev = prev_voter_idx; fd_voter_stake_map_ele_insert( epoch_stakes->voter_stake_map, new_voter_stake, pool ); @@ -76,5 +77,3 @@ fd_epoch_stakes_slot_stakes_remove( fd_epoch_stakes_t * epoch_stakes, fd_epoch_s } fd_epoch_stakes_slot_map_remove( epoch_stakes->slot_stakes_map, slot ); } - - diff --git a/src/choreo/tower/fd_epoch_stakes.h b/src/choreo/tower/fd_epoch_stakes.h index 33a7c7dfb1a..7ec7b2e9bbb 100644 --- a/src/choreo/tower/fd_epoch_stakes.h +++ b/src/choreo/tower/fd_epoch_stakes.h @@ -117,7 +117,8 @@ fd_epoch_stakes_footprint( ulong slot_max ) { } void * -fd_epoch_stakes_new( void * shmem, ulong slot_max ); +fd_epoch_stakes_new( void * shmem, + ulong slot_max ); fd_epoch_stakes_t * fd_epoch_stakes_join( void * shepoch_stakes ); diff --git a/src/choreo/tower/fd_tower_accts.h b/src/choreo/tower/fd_tower_accts.h index b6b1e53e03b..7b3943ffa28 100644 --- a/src/choreo/tower/fd_tower_accts.h +++ b/src/choreo/tower/fd_tower_accts.h @@ -3,6 +3,8 @@ #include "../fd_choreo_base.h" +#define FD_VOTE_STATE_DATA_MAX 3762UL + /* fd_tower_accts describes the set of vote accounts that feed into TowerBFT rules. This is fixed for each epoch, and each acct is associated with a 3-tuple of (vote account address, vote account @@ -10,9 +12,9 @@ intended to be as of the same slot. */ struct fd_tower_accts { - fd_pubkey_t addr; /* vote account address */ - ulong stake; /* vote account stake */ - uchar data[3762]; /* vote account data (max 3762 bytes) */ + fd_pubkey_t addr; /* vote account address */ + ulong stake; /* vote account stake */ + uchar data[FD_VOTE_STATE_DATA_MAX]; /* vote account data (max 3762 bytes) */ }; typedef struct fd_tower_accts fd_tower_accts_t; diff --git a/src/choreo/tower/fd_tower_forks.c b/src/choreo/tower/fd_tower_forks.c index 4b1d4ea3324..fe44a0506ad 100644 --- a/src/choreo/tower/fd_tower_forks.c +++ b/src/choreo/tower/fd_tower_forks.c @@ -99,6 +99,10 @@ fd_forks_lowest_common_ancestor( fd_forks_t * forks, fd_tower_forks_t * fork1 = fd_tower_forks_query( forks->tower_forks, slot1, NULL ); fd_tower_forks_t * fork2 = fd_tower_forks_query( forks->tower_forks, slot2, NULL ); + if( FD_UNLIKELY( !fork1 )) FD_LOG_CRIT(( "slot1 %lu not found", slot1 )); + if( FD_UNLIKELY( !fork2 )) FD_LOG_CRIT(( "slot2 %lu not found", slot2 )); + + while( FD_LIKELY( fork1 && fork2 ) ) { if( FD_UNLIKELY( fork1->slot == fork2->slot ) ) return fork1->slot; if( fork1->slot > fork2->slot ) fork1 = fd_tower_forks_query( forks->tower_forks, fork1->parent_slot, NULL ); diff --git a/src/choreo/tower/fd_tower_forks.h b/src/choreo/tower/fd_tower_forks.h index 834ea84dba1..3bff1e54129 100644 --- a/src/choreo/tower/fd_tower_forks.h +++ b/src/choreo/tower/fd_tower_forks.h @@ -166,7 +166,6 @@ fd_forks_align( void ) { FD_FN_CONST static inline ulong fd_forks_footprint( ulong slot_max, ulong voter_max ) { ulong interval_max = fd_ulong_pow2_up( FD_LOCKOUT_ENTRY_MAX*slot_max*voter_max ); - FD_LOG_NOTICE(( "interval_max: %lu", interval_max )); int lg_slot_max = fd_ulong_find_msb( fd_ulong_pow2_up( slot_max ) ) + 1; return FD_LAYOUT_FINI( FD_LAYOUT_APPEND( diff --git a/src/disco/metrics/generated/fd_metrics_tower.c b/src/disco/metrics/generated/fd_metrics_tower.c index b0fbbd7aa1e..89fd394d404 100644 --- a/src/disco/metrics/generated/fd_metrics_tower.c +++ b/src/disco/metrics/generated/fd_metrics_tower.c @@ -13,4 +13,8 @@ const fd_metrics_meta_t FD_METRICS_TOWER[FD_METRICS_TOWER_TOTAL] = { DECLARE_METRIC( TOWER_LOCKOUT_FAIL, COUNTER ), DECLARE_METRIC( TOWER_THRESHOLD_FAIL, COUNTER ), DECLARE_METRIC( TOWER_PROPAGATED_FAIL, COUNTER ), + DECLARE_METRIC( TOWER_HARD_FORKS_SEEN, COUNTER ), + DECLARE_METRIC( TOWER_HARD_FORKS_PRUNED, COUNTER ), + DECLARE_METRIC( TOWER_HARD_FORKS_ACTIVE, GAUGE ), + DECLARE_METRIC( TOWER_HARD_FORKS_MAX_WIDTH, GAUGE ), }; diff --git a/src/disco/metrics/generated/fd_metrics_tower.h b/src/disco/metrics/generated/fd_metrics_tower.h index 47b8a4fb891..332b04fa022 100644 --- a/src/disco/metrics/generated/fd_metrics_tower.h +++ b/src/disco/metrics/generated/fd_metrics_tower.h @@ -72,7 +72,31 @@ #define FD_METRICS_COUNTER_TOWER_PROPAGATED_FAIL_DESC "Prev leader block did not propagate (can't vote)" #define FD_METRICS_COUNTER_TOWER_PROPAGATED_FAIL_CVT (FD_METRICS_CONVERTER_NONE) -#define FD_METRICS_TOWER_TOTAL (11UL) +#define FD_METRICS_COUNTER_TOWER_HARD_FORKS_SEEN_OFF (27UL) +#define FD_METRICS_COUNTER_TOWER_HARD_FORKS_SEEN_NAME "tower_hard_forks_seen" +#define FD_METRICS_COUNTER_TOWER_HARD_FORKS_SEEN_TYPE (FD_METRICS_TYPE_COUNTER) +#define FD_METRICS_COUNTER_TOWER_HARD_FORKS_SEEN_DESC "Number of hard forks we've seen (block ids with multiple candidate bank hashes)" +#define FD_METRICS_COUNTER_TOWER_HARD_FORKS_SEEN_CVT (FD_METRICS_CONVERTER_NONE) + +#define FD_METRICS_COUNTER_TOWER_HARD_FORKS_PRUNED_OFF (28UL) +#define FD_METRICS_COUNTER_TOWER_HARD_FORKS_PRUNED_NAME "tower_hard_forks_pruned" +#define FD_METRICS_COUNTER_TOWER_HARD_FORKS_PRUNED_TYPE (FD_METRICS_TYPE_COUNTER) +#define FD_METRICS_COUNTER_TOWER_HARD_FORKS_PRUNED_DESC "Number of hard forks (candidate bank hashes) we've pruned" +#define FD_METRICS_COUNTER_TOWER_HARD_FORKS_PRUNED_CVT (FD_METRICS_CONVERTER_NONE) + +#define FD_METRICS_GAUGE_TOWER_HARD_FORKS_ACTIVE_OFF (29UL) +#define FD_METRICS_GAUGE_TOWER_HARD_FORKS_ACTIVE_NAME "tower_hard_forks_active" +#define FD_METRICS_GAUGE_TOWER_HARD_FORKS_ACTIVE_TYPE (FD_METRICS_TYPE_GAUGE) +#define FD_METRICS_GAUGE_TOWER_HARD_FORKS_ACTIVE_DESC "Currently active hard forks" +#define FD_METRICS_GAUGE_TOWER_HARD_FORKS_ACTIVE_CVT (FD_METRICS_CONVERTER_NONE) + +#define FD_METRICS_GAUGE_TOWER_HARD_FORKS_MAX_WIDTH_OFF (30UL) +#define FD_METRICS_GAUGE_TOWER_HARD_FORKS_MAX_WIDTH_NAME "tower_hard_forks_max_width" +#define FD_METRICS_GAUGE_TOWER_HARD_FORKS_MAX_WIDTH_TYPE (FD_METRICS_TYPE_GAUGE) +#define FD_METRICS_GAUGE_TOWER_HARD_FORKS_MAX_WIDTH_DESC "The max width of hard forks (block id with most candidate bank hashes) we've ever seen" +#define FD_METRICS_GAUGE_TOWER_HARD_FORKS_MAX_WIDTH_CVT (FD_METRICS_CONVERTER_NONE) + +#define FD_METRICS_TOWER_TOTAL (15UL) extern const fd_metrics_meta_t FD_METRICS_TOWER[FD_METRICS_TOWER_TOTAL]; #endif /* HEADER_fd_src_disco_metrics_generated_fd_metrics_tower_h */ diff --git a/src/disco/metrics/metrics.xml b/src/disco/metrics/metrics.xml index 3d722bb581e..19c141862b5 100644 --- a/src/disco/metrics/metrics.xml +++ b/src/disco/metrics/metrics.xml @@ -1115,9 +1115,15 @@ metric introduced. + + + + + + @@ -1126,6 +1132,12 @@ metric introduced. + + + + + + diff --git a/src/disco/topo/fd_topo.h b/src/disco/topo/fd_topo.h index 17d8b2855d7..91e3c911baf 100644 --- a/src/disco/topo/fd_topo.h +++ b/src/disco/topo/fd_topo.h @@ -384,8 +384,6 @@ struct fd_topo_tile { ulong enable_features_cnt; char enable_features[ 16 ][ FD_BASE58_ENCODED_32_SZ ]; - ulong enable_bank_hash_cmp; - int larger_max_cost_per_block; ulong capture_start_slot; @@ -491,8 +489,9 @@ struct fd_topo_tile { } archiver; struct { + int hard_fork_fatal; ulong max_live_slots; - ulong max_lookahead_conf; + ulong max_vote_lookahead; char identity_key[ PATH_MAX ]; char vote_account[ PATH_MAX ]; char base_path[PATH_MAX]; diff --git a/src/discof/bank/fd_bank_tile.c b/src/discof/bank/fd_bank_tile.c index 908d853eb91..5d282debc35 100644 --- a/src/discof/bank/fd_bank_tile.c +++ b/src/discof/bank/fd_bank_tile.c @@ -587,7 +587,6 @@ unprivileged_init( fd_topo_t * topo, ctx->txn_ctx[ i ].bundle.prev_txn_ctxs[ j ] = &ctx->txn_ctx[ j ]; } - ctx->txn_ctx[ i ].bank_hash_cmp = NULL; /* TODO - do we need this? */ ctx->txn_ctx[ i ].progcache = ctx->txn_ctx[ i ]._progcache; ctx->txn_ctx[ i ].status_cache = txncache; *(ctx->txn_ctx[ i ].funk) = *funk; diff --git a/src/discof/exec/fd_exec_tile.c b/src/discof/exec/fd_exec_tile.c index c21974a6950..167e9f0602d 100644 --- a/src/discof/exec/fd_exec_tile.c +++ b/src/discof/exec/fd_exec_tile.c @@ -28,7 +28,6 @@ typedef struct link_ctx { } link_ctx_t; typedef struct fd_exec_tile_ctx { - ulong tile_idx; /* link-related data structures. */ @@ -39,8 +38,6 @@ typedef struct fd_exec_tile_ctx { fd_sha512_t sha_mem[ FD_TXN_ACTUAL_SIG_MAX ]; fd_sha512_t * sha_lj[ FD_TXN_ACTUAL_SIG_MAX ]; - fd_bank_hash_cmp_t * bank_hash_cmp; - /* Data structures related to managing and executing the transaction. The fd_txn_p_t is refreshed with every transaction and is sent from the dispatch/replay tile. The fd_exec_txn_ctx_t * is a valid @@ -261,19 +258,6 @@ unprivileged_init( fd_topo_t * topo, FD_LOG_ERR(( "Failed to join banks" )); } - /********************************************************************/ - /* bank hash cmp */ - /********************************************************************/ - - ulong bank_hash_cmp_obj_id = fd_pod_queryf_ulong( topo->props, ULONG_MAX, "bh_cmp" ); - if( FD_UNLIKELY( bank_hash_cmp_obj_id==ULONG_MAX ) ) { - FD_LOG_ERR(( "Could not find topology object for bank hash cmp" )); - } - ctx->bank_hash_cmp = fd_bank_hash_cmp_join( fd_topo_obj_laddr( topo, bank_hash_cmp_obj_id ) ); - if( FD_UNLIKELY( !ctx->bank_hash_cmp ) ) { - FD_LOG_ERR(( "Failed to join bank hash cmp" )); - } - void * shfunk = fd_topo_obj_laddr( topo, tile->exec.funk_obj_id ); if( FD_UNLIKELY( !fd_funk_join( ctx->funk, shfunk ) ) ) { FD_LOG_CRIT(( "fd_funk_join(accdb) failed" )); @@ -304,7 +288,6 @@ unprivileged_init( fd_topo_t * topo, FD_LOG_CRIT(( "fd_progcache_join() failed" )); } ctx->txn_ctx->status_cache = ctx->txncache; - ctx->txn_ctx->bank_hash_cmp = ctx->bank_hash_cmp; ctx->txn_ctx->bundle.is_bundle = 0; /********************************************************************/ diff --git a/src/discof/replay/fd_replay_tile.c b/src/discof/replay/fd_replay_tile.c index f2db5fe5688..ba9bb78c721 100644 --- a/src/discof/replay/fd_replay_tile.c +++ b/src/discof/replay/fd_replay_tile.c @@ -159,8 +159,6 @@ struct fd_replay_tile { /* Replay state machine. */ fd_sched_t * sched; - uint enable_bank_hash_cmp:1; - fd_bank_hash_cmp_t * bank_hash_cmp; ulong exec_cnt; fd_replay_out_link_t exec_out[ 1 ]; /* Sending work down to exec tiles */ @@ -764,35 +762,6 @@ replay_block_finalize( fd_replay_tile_t * ctx, fd_hash_t const * bank_hash = fd_bank_bank_hash_query( bank ); FD_TEST( bank_hash ); - fd_bank_hash_cmp_t * bank_hash_cmp = ctx->bank_hash_cmp; - fd_bank_hash_cmp_lock( bank_hash_cmp ); - fd_bank_hash_cmp_insert( bank_hash_cmp, fd_bank_slot_get( bank ), bank_hash, 1, 0 ); - - /* Try to move the bank hash comparison watermark forward */ - for( ulong cmp_slot = bank_hash_cmp->watermark + 1; cmp_slot < fd_bank_slot_get( bank ); cmp_slot++ ) { - if( FD_UNLIKELY( !ctx->enable_bank_hash_cmp ) ) { - bank_hash_cmp->watermark = cmp_slot; - break; - } - int rc = fd_bank_hash_cmp_check( bank_hash_cmp, cmp_slot ); - switch ( rc ) { - case -1: - /* Mismatch */ - FD_LOG_CRIT(( "Bank hash mismatch on slot: %lu. Halting.", cmp_slot )); - break; - case 0: - /* Not ready */ - break; - case 1: - /* Match*/ - bank_hash_cmp->watermark = cmp_slot; - break; - default:; - } - } - - fd_bank_hash_cmp_unlock( bank_hash_cmp ); - /* Must be last so we can measure completion time correctly, even though we could technically do this before the hash cmp and vote tower stuff. */ @@ -923,35 +892,6 @@ fini_leader_bank( fd_replay_tile_t * ctx, fd_hash_t const * bank_hash = fd_bank_bank_hash_query( ctx->leader_bank ); FD_TEST( bank_hash ); - fd_bank_hash_cmp_t * bank_hash_cmp = ctx->bank_hash_cmp; - fd_bank_hash_cmp_lock( bank_hash_cmp ); - fd_bank_hash_cmp_insert( bank_hash_cmp, curr_slot, bank_hash, 1, 0 ); - - /* Try to move the bank hash comparison watermark forward */ - for( ulong cmp_slot = bank_hash_cmp->watermark + 1; cmp_slot < curr_slot; cmp_slot++ ) { - if( FD_UNLIKELY( !ctx->enable_bank_hash_cmp ) ) { - bank_hash_cmp->watermark = cmp_slot; - break; - } - int rc = fd_bank_hash_cmp_check( bank_hash_cmp, cmp_slot ); - switch ( rc ) { - case -1: - /* Mismatch */ - FD_LOG_WARNING(( "Bank hash mismatch on slot: %lu. Halting.", cmp_slot )); - break; - case 0: - /* Not ready */ - break; - case 1: - /* Match*/ - bank_hash_cmp->watermark = cmp_slot; - break; - default:; - } - } - - fd_bank_hash_cmp_unlock( bank_hash_cmp ); - publish_slot_completed( ctx, stem, ctx->leader_bank, 0, 1 /* is_leader */ ); /* The reference on the bank is finally no longer needed. */ @@ -1086,21 +1026,6 @@ init_after_snapshot( fd_replay_tile_t * ctx ) { snapshot_slot = 0UL; } - /* Initialize consensus structures post-snapshot */ - - fd_vote_states_t const * vote_states = fd_bank_vote_states_locking_query( bank ); - - fd_bank_hash_cmp_t * bank_hash_cmp = ctx->bank_hash_cmp; - - fd_vote_states_iter_t iter_[1]; - for( fd_vote_states_iter_t * iter = fd_vote_states_iter_init( iter_, vote_states ); !fd_vote_states_iter_done( iter ); fd_vote_states_iter_next( iter ) ) { - fd_vote_state_ele_t const * vote_state = fd_vote_states_iter_ele( iter ); - bank_hash_cmp->total_stake += vote_state->stake; - } - bank_hash_cmp->watermark = snapshot_slot; - - fd_bank_vote_states_end_locking_query( bank ); - if( FD_UNLIKELY( ctx->capture_ctx ) ) fd_solcap_writer_flush( ctx->capture_ctx->capture ); } @@ -2208,7 +2133,7 @@ maybe_verify_shred_version( fd_replay_tile_t * ctx ) { xor = fd_ushort_if( xorgenesis_hash ) )); + FD_LOG_ERR(( "shred version mismatch: expected %u but got %u from genesis hash %s and hard forks", expected_shred_version, xor, FD_BASE58_ENC_32_ALLOCA( &ctx->genesis_hash ) )); } } } @@ -2478,13 +2403,6 @@ unprivileged_init( fd_topo_t * topo, ctx->sched = fd_sched_join( fd_sched_new( sched_mem, tile->replay.max_live_slots, ctx->exec_cnt ), tile->replay.max_live_slots ); FD_TEST( ctx->sched ); - ctx->enable_bank_hash_cmp = !!tile->replay.enable_bank_hash_cmp; - - ulong bank_hash_cmp_obj_id = fd_pod_query_ulong( topo->props, "bh_cmp", ULONG_MAX ); - FD_TEST( bank_hash_cmp_obj_id!=ULONG_MAX ); - ctx->bank_hash_cmp = fd_bank_hash_cmp_join( fd_bank_hash_cmp_new( fd_topo_obj_laddr( topo, bank_hash_cmp_obj_id ) ) ); - FD_TEST( ctx->bank_hash_cmp ); - ctx->vote_tracker = fd_vote_tracker_join( fd_vote_tracker_new( vote_tracker_mem, 0UL ) ); FD_TEST( ctx->vote_tracker ); diff --git a/src/discof/tower/fd_tower_tile.c b/src/discof/tower/fd_tower_tile.c index e827279aa9d..d2b73ec1fa2 100644 --- a/src/discof/tower/fd_tower_tile.c +++ b/src/discof/tower/fd_tower_tile.c @@ -1,8 +1,8 @@ #include "fd_tower_tile.h" #include "generated/fd_tower_tile_seccomp.h" -#include "../genesis/fd_genesi_tile.h" #include "../../choreo/ghost/fd_ghost.h" +#include "../../choreo/hfork/fd_hfork.h" #include "../../choreo/notar/fd_notar.h" #include "../../choreo/tower/fd_tower.h" #include "../../choreo/tower/fd_tower_accts.h" @@ -18,8 +18,8 @@ #include "../../discof/replay/fd_exec.h" #include "../../discof/replay/fd_replay_tile.h" #include "../../flamenco/fd_flamenco_base.h" -#include "../../util/pod/fd_pod.h" #include "../../flamenco/runtime/fd_bank.h" +#include "../../util/pod/fd_pod.h" #include #include @@ -57,14 +57,14 @@ #define VOTE_TXN_SIG_MAX (2UL) /* validator identity and vote authority */ -struct conf { +struct notif { ulong slot; int kind; }; -typedef struct conf conf_t; +typedef struct notif notif_t; -#define DEQUE_NAME confs -#define DEQUE_T conf_t +#define DEQUE_NAME notif +#define DEQUE_T notif_t #include "../../util/tmpl/fd_deque_dynamic.c" static const fd_hash_t manifest_block_id = { .ul = { 0xf17eda2ce7b1d } }; /* FIXME manifest_block_id */ @@ -77,25 +77,30 @@ typedef struct { } in_ctx_t; typedef struct { - ulong seed; /* map seed */ - fd_pubkey_t identity_key[1]; - fd_pubkey_t vote_account[1]; + ulong seed; /* map seed */ int checkpt_fd; int restore_fd; + fd_pubkey_t identity_key[1]; + fd_pubkey_t vote_account[1]; + uchar our_vote_acct[FD_VOTE_STATE_DATA_MAX]; /* buffer for reading back our own vote acct data */ /* structures owned by tower tile */ - fd_ghost_t * ghost; - fd_notar_t * notar; - fd_tower_t * tower; - fd_forks_t * forks; - fd_tower_t * tower_spare; /* spare tower used during processing */ - conf_t * confs; /* deque of confirmations queued for publishing */ + fd_forks_t * forks; + fd_ghost_t * ghost; + fd_hfork_t * hfork; + fd_notar_t * notar; + fd_tower_t * tower; + fd_tower_t * tower_spare; /* spare tower used during processing */ + notif_t * notif; /* deque of confirmation notifications queued for publishing */ + fd_tower_accts_t * tower_accts; /* deque of accts, stake, and pubkey for the currently replayed slot */ + fd_epoch_stakes_t * slot_stakes; /* tracks the stakes for each voter in the epoch per fork */ + + /* external joins owned by replay tile */ + fd_banks_t * banks; fd_accdb_user_t accdb[1]; - fd_tower_accts_t * tower_accts; /* deque of accts, stake, and pubkey for the currently replayed slot */ - fd_epoch_stakes_t * epoch_stakes; /* tracks the stakes for each voter in the epoch per fork */ /* frag-related structures (consume and publish) */ uchar vote_txn[FD_TPU_PARSED_MTU]; @@ -122,10 +127,11 @@ typedef struct { /* metrics */ - struct { + struct ctx_metrics_t { ulong vote_txn_invalid; ulong vote_txn_ignored; ulong vote_txn_mismatch; + ulong ancestor_rollback; ulong sibling_confirmed; ulong same_fork; @@ -134,6 +140,8 @@ typedef struct { ulong lockout_fail; ulong threshold_fail; ulong propagated_fail; + + fd_hfork_metrics_t hard_forks; } metrics; } ctx_t; @@ -144,17 +152,22 @@ scratch_align( void ) { FD_FN_PURE static inline ulong scratch_footprint( FD_PARAM_UNUSED fd_topo_tile_t const * tile ) { - ulong slot_max = tile->tower.max_live_slots; + ulong slot_max = tile->tower.max_live_slots; + int lg_slot_max = fd_ulong_find_msb( fd_ulong_pow2_up( slot_max ) ) + 1; + FD_LOG_NOTICE(( "hfork footprint %lu", fd_hfork_footprint( slot_max, FD_VOTER_MAX ) )); ulong l = FD_LAYOUT_INIT; l = FD_LAYOUT_APPEND( l, alignof(ctx_t), sizeof(ctx_t) ); l = FD_LAYOUT_APPEND( l, fd_ghost_align(), fd_ghost_footprint( 2*slot_max, FD_VOTER_MAX ) ); - l = FD_LAYOUT_APPEND( l, fd_notar_align(), fd_notar_footprint( tile->tower.max_lookahead_conf ) ); + l = FD_LAYOUT_APPEND( l, fd_hfork_align(), fd_hfork_footprint( slot_max, FD_VOTER_MAX ) ); + l = FD_LAYOUT_APPEND( l, fd_notar_align(), fd_notar_footprint( tile->tower.max_vote_lookahead ) ); l = FD_LAYOUT_APPEND( l, fd_tower_align(), fd_tower_footprint() ); l = FD_LAYOUT_APPEND( l, fd_tower_accts_align(), fd_tower_accts_footprint( FD_VOTER_MAX ) ); l = FD_LAYOUT_APPEND( l, fd_forks_align(), fd_forks_footprint( slot_max, FD_VOTER_MAX ) ); l = FD_LAYOUT_APPEND( l, fd_tower_align(), fd_tower_footprint() ); - l = FD_LAYOUT_APPEND( l, confs_align(), confs_footprint( slot_max ) ); l = FD_LAYOUT_APPEND( l, fd_epoch_stakes_align(), fd_epoch_stakes_footprint( slot_max ) ); + l = FD_LAYOUT_APPEND( l, fd_tower_forks_align(), fd_tower_forks_footprint( lg_slot_max ) ); + l = FD_LAYOUT_APPEND( l, fd_tower_align(), fd_tower_footprint() ); /* ctx->tower_spare */ + l = FD_LAYOUT_APPEND( l, notif_align(), notif_footprint( slot_max ) ); return FD_LAYOUT_FINI( l, scratch_align() ); } @@ -163,6 +176,7 @@ metrics_write( ctx_t * ctx ) { FD_MCNT_SET( TOWER, VOTE_TXN_INVALID, ctx->metrics.vote_txn_invalid ); FD_MCNT_SET( TOWER, VOTE_TXN_IGNORED, ctx->metrics.vote_txn_ignored ); FD_MCNT_SET( TOWER, VOTE_TXN_MISMATCH, ctx->metrics.vote_txn_mismatch ); + FD_MCNT_SET( TOWER, ANCESTOR_ROLLBACK, ctx->metrics.ancestor_rollback ); FD_MCNT_SET( TOWER, SIBLING_CONFIRMED, ctx->metrics.sibling_confirmed ); FD_MCNT_SET( TOWER, SAME_FORK, ctx->metrics.same_fork ); @@ -171,6 +185,12 @@ metrics_write( ctx_t * ctx ) { FD_MCNT_SET( TOWER, LOCKOUT_FAIL, ctx->metrics.lockout_fail ); FD_MCNT_SET( TOWER, THRESHOLD_FAIL, ctx->metrics.threshold_fail ); FD_MCNT_SET( TOWER, PROPAGATED_FAIL, ctx->metrics.propagated_fail ); + + FD_MCNT_SET( TOWER, HARD_FORKS_SEEN, ctx->metrics.hard_forks.seen ); + FD_MCNT_SET( TOWER, HARD_FORKS_PRUNED, ctx->metrics.hard_forks.pruned ); + + FD_MGAUGE_SET( TOWER, HARD_FORKS_ACTIVE, ctx->metrics.hard_forks.active ); + FD_MGAUGE_SET( TOWER, HARD_FORKS_MAX_WIDTH, ctx->metrics.hard_forks.max_width ); } static void @@ -201,12 +221,15 @@ contiguous_confirm( ctx_t * ctx, ancestors (confirmations can be out-of-order and roots can be skipped due to lockout). */ + ulong cnt = 0; ulong ancestor = slot; while( FD_UNLIKELY( ancestor > wmark ) ) { fd_tower_forks_t * fork = fd_forks_query( ctx->forks, ancestor ); if( FD_UNLIKELY( !fork ) ) break; /* rooted past this ancestor */ - confs_push_tail( ctx->confs, (conf_t){ .slot = ancestor, .kind = kind } ); - ancestor = fork->parent_slot; + if( FD_UNLIKELY( !notif_avail( ctx->notif ) ) ) FD_LOG_CRIT(( "attempted to confirm %lu slots more than slot max %lu", cnt, notif_max( ctx->notif ) )); + notif_push_tail( ctx->notif, (notif_t){ .slot = ancestor, .kind = kind } ); + cnt++; + ancestor = fork->parent_slot; } } @@ -307,12 +330,25 @@ count_vote_txn( ctx_t * ctx, fd_tower_vote_t const * their_last_vote = fd_tower_peek_tail_const( ctx->tower_spare ); fd_hash_t const * their_block_id = &ctx->compact_tower_sync_serde.block_id; + fd_hash_t const * their_bank_hash = &ctx->compact_tower_sync_serde.hash; + + /* Similar to what Agave does in cluster_info_vote_listener, we use + the stake associated with a vote account as of our current root + (which could potentially be a different epoch than the vote we are + counting or when we observe the vote). They default stake to 0 for + voters who are not found. */ + + ulong total_stake = fd_ghost_root( ctx->ghost )->total_stake; + + fd_voter_stake_key_t stake_key = { .vote_account = *vote_acc, .slot = ctx->root_slot }; + fd_voter_stake_t * stake = fd_voter_stake_map_ele_query( ctx->slot_stakes->voter_stake_map, &stake_key, NULL, ctx->slot_stakes->voter_stake_pool ); + + fd_hfork_count_vote( ctx->hfork, vote_acc, their_block_id, their_bank_hash, their_last_vote->slot, stake ? stake->stake : 0, total_stake, &ctx->metrics.hard_forks ); - ulong total_stake = fd_ghost_root( ctx->ghost )->total_stake; fd_notar_blk_t * notar_blk = fd_notar_count_vote( ctx->notar, total_stake, vote_acc, their_last_vote->slot, their_block_id ); if( FD_LIKELY( notar_blk ) ) notar_confirm( ctx, stem, tsorig, notar_blk ); - fd_tower_forks_t * fork = fd_forks_query( ctx->forks, their_last_vote->slot ); + fd_tower_forks_t * fork = fd_tower_forks_query( ctx->forks->tower_forks, their_last_vote->slot, NULL ); if( FD_UNLIKELY( !fork ) ) { ctx->metrics.vote_txn_ignored++; return; /* we haven't replayed this slot yet */ }; fd_hash_t const * our_block_id = fd_forks_canonical_block_id( ctx->forks, their_last_vote->slot ); @@ -371,15 +407,11 @@ count_vote_txn( ctx_t * ctx, } ulong -get_voters( fd_tower_accts_t * tower_accts_deque, - fd_epoch_stakes_t * epoch_stakes, - fd_banks_t * banks, - ulong bank_idx, - ulong slot ) { - ulong voters_cnt = 0UL; - fd_bank_t * bank = fd_banks_bank_query( banks, bank_idx ); - if( FD_UNLIKELY( !bank ) ) FD_LOG_ERR(( "invariant violation: bank %lu is missing", bank_idx )); - +query_acct_stake_from_bank( fd_tower_accts_t * tower_accts_deque, + fd_epoch_stakes_t * epoch_stakes, + fd_bank_t * bank, + ulong slot ) { + ulong total_stake = 0; fd_vote_states_t const * vote_states = fd_bank_vote_states_locking_query( bank ); fd_vote_states_iter_t iter_[1]; ulong prev_voter_idx = ULONG_MAX; @@ -388,43 +420,38 @@ get_voters( fd_tower_accts_t * tower_accts_deque, fd_vote_states_iter_next( iter ) ) { fd_vote_state_ele_t const * vote_state = fd_vote_states_iter_ele( iter ); if( FD_UNLIKELY( vote_state->stake_t_2 == 0 ) ) continue; /* skip unstaked vote accounts */ - if( FD_UNLIKELY( voters_cnt >= FD_VOTER_MAX ) ) break; /* voters deque is full */ fd_pubkey_t const * vote_account_pubkey = &vote_state->vote_account; fd_tower_accts_push_tail( tower_accts_deque, (fd_tower_accts_t){ .addr = *vote_account_pubkey, .stake = vote_state->stake_t_2 } ); prev_voter_idx = fd_epoch_stakes_slot_stakes_add( epoch_stakes, slot, vote_account_pubkey, vote_state->stake_t_2, prev_voter_idx ); - voters_cnt++; + total_stake += vote_state->stake_t_2; } fd_bank_vote_states_end_locking_query( bank ); - return voters_cnt; + return total_stake; } -static int -get_vote_account_from_accdb( ctx_t * ctx, - fd_pubkey_t const * vote_account_pubkey, - fd_tower_accts_t * vote_tower_out, - fd_funk_txn_xid_t const * xid ) { +/* query accdb for the vote state (vote account data) of the given vote + account address as of xid. Returns 1 if found, 0 otherwise. */ +static int +query_vote_state_from_accdb( fd_accdb_user_t * accdb, + fd_funk_txn_xid_t const * xid, + fd_pubkey_t const * vote_acc, + uchar buf[static FD_VOTE_STATE_DATA_MAX] ) { for(;;) { - fd_memset( vote_tower_out->data, 0, sizeof(vote_tower_out->data) ); - fd_accdb_peek_t peek[1]; - if( FD_UNLIKELY( !fd_accdb_peek( ctx->accdb, peek, xid, vote_account_pubkey->uc ) ) ) { - /* Vote account should be guaranteed to be in the accdb */ - FD_BASE58_ENCODE_32_BYTES( vote_account_pubkey->uc, acc_cstr ); - FD_LOG_CRIT(( "vote account not found. address: %s", acc_cstr )); - } + if( FD_UNLIKELY( !fd_accdb_peek( accdb, peek, xid, vote_acc->uc ) ) ) return 0; ulong data_sz = fd_accdb_ref_data_sz( peek->acc ); - if( FD_UNLIKELY( data_sz > sizeof(vote_tower_out->data) ) ) { - FD_BASE58_ENCODE_32_BYTES( vote_account_pubkey->uc, acc_cstr ); - FD_LOG_CRIT(( "vote account %s has too large data. dlen %lu > %lu", acc_cstr, data_sz, sizeof(vote_tower_out->data) )); + if( FD_UNLIKELY( data_sz > FD_VOTE_STATE_DATA_MAX ) ) { + FD_BASE58_ENCODE_32_BYTES( vote_acc->uc, acc_cstr ); + FD_LOG_CRIT(( "vote account %s exceeds FD_VOTE_STATE_DATA_MAX. dlen %lu > %lu", acc_cstr, data_sz, FD_VOTE_STATE_DATA_MAX )); } - fd_memcpy( vote_tower_out->data, fd_accdb_ref_data_const( peek->acc ), data_sz ); + fd_memcpy( buf, fd_accdb_ref_data_const( peek->acc ), data_sz ); if( FD_LIKELY( fd_accdb_peek_test( peek ) ) ) break; FD_SPIN_PAUSE(); } - return 0; + return 1; } static void @@ -432,84 +459,69 @@ replay_slot_completed( ctx_t * ctx, fd_replay_slot_completed_t * slot_info, ulong tsorig, fd_stem_context_t * stem ) { - /* Populate the tower_accts deque with the vote pubkeys and stakes from the bank. */ - - fd_tower_accts_remove_all( ctx->tower_accts ); - get_voters( ctx->tower_accts, ctx->epoch_stakes, ctx->banks, slot_info->bank_idx, slot_info->slot ); - /* fd_notar requires some bookkeeping when there is a new epoch. */ - - if( FD_UNLIKELY( ctx->notar->epoch==ULONG_MAX || slot_info->epoch > ctx->notar->epoch ) ) { - fd_notar_advance_epoch( ctx->notar, ctx->tower_accts, slot_info->epoch ); - } - - fd_tower_forks_t * fork = fd_forks_query( ctx->forks, slot_info->slot ); /* ensure fork exists */ - if( FD_UNLIKELY( !fork ) ) fork = fd_forks_insert( ctx->forks, slot_info->slot, slot_info->parent_slot ); + /* Initialize slot watermarks on the first replay_slot_completed. */ if( FD_UNLIKELY( ctx->init_slot == ULONG_MAX ) ) { ctx->init_slot = slot_info->slot; - fork->confirmed = 1; - fork->confirmed_block_id = manifest_block_id; - ctx->root_slot = slot_info->slot; ctx->conf_slot = slot_info->slot; } - /* Insert the just replayed block into ghost. */ - - fd_hash_t const * parent_block_id = &slot_info->parent_block_id; - if( FD_UNLIKELY( slot_info->parent_slot==ctx->init_slot ) ) parent_block_id = &manifest_block_id; - if( FD_UNLIKELY( slot_info->slot ==ctx->init_slot ) ) parent_block_id = NULL; - fd_ghost_blk_t * ghost_blk = fd_ghost_insert( ctx->ghost, &slot_info->block_id, parent_block_id, slot_info->slot ); + /* Initialize the xid. */ - /* Record metadata about the replayed block. */ + fd_funk_txn_xid_t xid = { .ul = { slot_info->slot, slot_info->bank_idx } }; - fd_forks_replayed( ctx->forks, fork, slot_info->bank_idx, &slot_info->block_id ); - fd_forks_lockouts_clear( ctx->forks, slot_info->parent_slot ); + /* Query our on-chain vote acct and reconcile with our local tower. */ - /* Check if gossip votes already confirmed a block id (via notar). */ + int found = query_vote_state_from_accdb( ctx->accdb, &xid, ctx->vote_account, ctx->our_vote_acct ); + if( FD_LIKELY( found ) ) fd_tower_reconcile( ctx->tower, ctx->root_slot, ctx->our_vote_acct ); - fd_notar_slot_t * notar_slot = fd_notar_slot_query( ctx->notar->slot_map, slot_info->slot, NULL ); - if( FD_UNLIKELY( notar_slot )) { /* optimize for replay keeping up (being ahead of gossip votes) */ - for( ulong i = 0; i < notar_slot->block_ids_cnt; i++ ) { - fd_notar_blk_t * notar_blk = fd_notar_blk_query( ctx->notar->blk_map, notar_slot->block_ids[i], NULL ); - FD_TEST( notar_blk ); /* block_ids_cnt corrupt */ - if( FD_LIKELY( notar_blk->dup_conf ) ) { - fd_forks_confirmed( fork, ¬ar_blk->block_id ); - break; - } - } - } + /* Insert the vote acct addrs and stakes from the bank into accts. */ - /* We replayed an unconfirmed duplicate, warn for now. Follow-up PR - will implement eviction and repair of the correct one. */ + fd_tower_accts_remove_all( ctx->tower_accts ); + fd_bank_t * bank = fd_banks_bank_query( ctx->banks, slot_info->bank_idx ); + if( FD_UNLIKELY( !bank ) ) FD_LOG_CRIT(( "invariant violation: bank %lu is missing", slot_info->bank_idx )); + ulong total_stake = query_acct_stake_from_bank( ctx->tower_accts, ctx->slot_stakes, bank, slot_info->slot ); + + /* Insert the just replayed block into forks. */ + + FD_TEST( !fd_forks_query( ctx->forks, slot_info->slot ) ); + fd_tower_forks_t * fork = fd_forks_insert( ctx->forks, slot_info->slot, slot_info->parent_slot ); + fork->parent_slot = slot_info->parent_slot; + fork->confirmed = 0; + fork->voted = 0; + fork->replayed_block_id = slot_info->block_id; + fork->bank_idx = slot_info->bank_idx; + fd_forks_replayed( ctx->forks, fork, slot_info->bank_idx, &slot_info->block_id ); + fd_forks_lockouts_clear( ctx->forks, slot_info->parent_slot ); - if( FD_UNLIKELY( fork->confirmed && 0!=memcmp( &fork->confirmed_block_id, &fork->replayed_block_id, sizeof(fd_hash_t) ) ) ) { - FD_LOG_WARNING(( "replayed an unconfirmed duplicate %lu. ours %s. confirmed %s.", slot_info->slot, FD_BASE58_ENC_32_ALLOCA( &slot_info->block_id ), FD_BASE58_ENC_32_ALLOCA( &fork->confirmed_block_id ) )); - } + /* Insert the just replayed block into ghost. */ - /* Iterate all the vote accounts to count votes towards fork choice - (fd_ghost) and confirmation (fd_notar) and also reconcile our local - tower with our on-chain one (fd_tower_reconcile). */ + fd_hash_t const * parent_block_id = &slot_info->parent_block_id; + if( FD_UNLIKELY( slot_info->parent_slot==ctx->init_slot ) ) parent_block_id = &manifest_block_id; + if( FD_UNLIKELY( slot_info->slot ==ctx->init_slot ) ) parent_block_id = NULL; + fd_ghost_blk_t * ghost_blk = fd_ghost_insert( ctx->ghost, &slot_info->block_id, parent_block_id, slot_info->slot ); + ghost_blk->total_stake = total_stake; - fd_funk_txn_xid_t xid = { .ul = { slot_info->slot, slot_info->bank_idx } }; - ulong total_stake = 0; + /* Iterate vote accounts. */ - /* For each vote account, populate the vote account data from funk and count the vote. */ for( fd_tower_accts_iter_t iter = fd_tower_accts_iter_init( ctx->tower_accts ); !fd_tower_accts_iter_done( ctx->tower_accts, iter ); iter = fd_tower_accts_iter_next( ctx->tower_accts, iter ) ) { - fd_tower_accts_t * acct = fd_tower_accts_iter_ele( ctx->tower_accts, iter ); - fd_pubkey_t const * vote_account_pubkey = &acct->addr; - FD_TEST( 0==get_vote_account_from_accdb( ctx, vote_account_pubkey, acct, &xid ) ); + fd_tower_accts_t * acct = fd_tower_accts_iter_ele( ctx->tower_accts, iter ); + fd_pubkey_t const * vote_acc = &acct->addr; - total_stake += acct->stake; + if( FD_UNLIKELY( !query_vote_state_from_accdb( ctx->accdb, &xid, vote_acc, acct->data ) ) ) { + FD_BASE58_ENCODE_32_BYTES( vote_acc->uc, acc_cstr ); + FD_LOG_CRIT(( "vote account in bank->vote_states not found. slot %lu address: %s", slot_info->slot, acc_cstr )); + }; - /* If this is our vote acc, reconcile with our local tower. */ + /* 1. Update forks with lockouts. */ - if( FD_UNLIKELY( 0==memcmp( &acct->addr, ctx->vote_account, sizeof(fd_pubkey_t) ) ) ) fd_tower_reconcile( ctx->tower, ctx->root_slot, acct->data ); + fd_forks_lockouts_add( ctx->forks, slot_info->slot, &acct->addr, acct ); - /* Deserialize the last vote slot from this vote account's tower. */ + /* 2. Count the last vote slot in the vote state towards ghost. */ ulong vote_slot = fd_voter_vote_slot( acct->data ); if( FD_UNLIKELY( vote_slot==ULONG_MAX ) ) continue; /* hasn't voted */ @@ -531,31 +543,57 @@ replay_slot_completed( ctx_t * ctx, if( FD_UNLIKELY( !ancestor_blk ) ) FD_LOG_CRIT(( "missing ancestor. replay slot %lu vote slot %lu voter %s", slot_info->slot, vote_slot, FD_BASE58_ENC_32_ALLOCA( &acct->addr ) )); - /* Count the vote toward ghost, notar and total_stake. */ - fd_ghost_count_vote( ctx->ghost, ancestor_blk, &acct->addr, acct->stake, vote_slot ); + } - fd_forks_lockouts_add( ctx->forks, slot_info->slot, &acct->addr, acct ); - /* TODO count TPU vote txns towards notar */ + /* Insert the just replayed block into hard fork detector. */ + + fd_hfork_record_our_bank_hash( ctx->hfork, &slot_info->block_id, &slot_info->bank_hash, fd_ghost_root( ctx->ghost )->total_stake ); + + /* fd_notar requires some bookkeeping when there is a new epoch. */ + + if( FD_UNLIKELY( ctx->notar->epoch==ULONG_MAX || slot_info->epoch > ctx->notar->epoch ) ) { + fd_notar_advance_epoch( ctx->notar, ctx->tower_accts, slot_info->epoch ); + } + + /* Check if gossip votes already confirmed the fork's block_id (gossip + can be ahead of replay - this is tracked by fd_notar). */ + + fd_notar_slot_t * notar_slot = fd_notar_slot_query( ctx->notar->slot_map, slot_info->slot, NULL ); + if( FD_UNLIKELY( notar_slot )) { /* optimize for replay keeping up (being ahead of gossip votes) */ + for( ulong i = 0; i < notar_slot->block_ids_cnt; i++ ) { + fd_notar_blk_t * notar_blk = fd_notar_blk_query( ctx->notar->blk_map, notar_slot->block_ids[i], NULL ); + FD_TEST( notar_blk ); /* block_ids_cnt corrupt */ + if( FD_LIKELY( notar_blk->dup_conf ) ) { + fork->confirmed = 1; + fork->confirmed_block_id = notar_blk->block_id; + break; + } + } + } + + /* We replayed an unconfirmed duplicate, warn for now. Follow-up PR + will implement eviction and repair of the correct one. */ + + if( FD_UNLIKELY( fork->confirmed && 0!=memcmp( &fork->confirmed_block_id, &fork->replayed_block_id, sizeof(fd_hash_t) ) ) ) { + FD_LOG_WARNING(( "replayed an unconfirmed duplicate %lu. ours %s. confirmed %s.", slot_info->slot, FD_BASE58_ENC_32_ALLOCA( &slot_info->block_id ), FD_BASE58_ENC_32_ALLOCA( &fork->confirmed_block_id ) )); } - if( FD_UNLIKELY( fd_ghost_root( ctx->ghost )->total_stake==0 ) ) fd_ghost_root( ctx->ghost )->total_stake = total_stake; - ghost_blk->total_stake = total_stake; /* Determine reset, vote, and root slots. There may not be a vote or root slot but there is always a reset slot. */ - fd_tower_out_t out = fd_tower_vote_and_reset( ctx->tower, ctx->tower_accts, ctx->epoch_stakes, ctx->forks, ctx->ghost, ctx->notar ); + fd_tower_out_t out = fd_tower_vote_and_reset( ctx->tower, ctx->tower_accts, ctx->slot_stakes, ctx->forks, ctx->ghost, ctx->notar ); /* Write out metrics for vote / reset reasons. */ ctx->metrics.ancestor_rollback += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_ANCESTOR_ROLLBACK ); ctx->metrics.sibling_confirmed += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_SIBLING_CONFIRMED ); - ctx->metrics.same_fork += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_SAME_FORK ); - ctx->metrics.switch_pass += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_SWITCH_PASS ); - ctx->metrics.switch_fail += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_SWITCH_FAIL ); - ctx->metrics.lockout_fail += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_LOCKOUT_FAIL ); - ctx->metrics.threshold_fail += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_THRESHOLD_FAIL ); - ctx->metrics.propagated_fail += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_PROPAGATED_FAIL ); + ctx->metrics.same_fork += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_SAME_FORK ); + ctx->metrics.switch_pass += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_SWITCH_PASS ); + ctx->metrics.switch_fail += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_SWITCH_FAIL ); + ctx->metrics.lockout_fail += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_LOCKOUT_FAIL ); + ctx->metrics.threshold_fail += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_THRESHOLD_FAIL ); + ctx->metrics.propagated_fail += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_PROPAGATED_FAIL ); /* Update forks if there is a vote slot. */ @@ -568,20 +606,28 @@ replay_slot_completed( ctx_t * ctx, /* Publish according structures if there is a root */ if( FD_UNLIKELY( out.root_slot!=ULONG_MAX ) ) { + + /* forks */ + + for(ulong slot = ctx->root_slot; slot < out.root_slot; slot++ ) { + fd_tower_forks_t * fork = fd_forks_query ( ctx->forks, slot ); + if( FD_LIKELY( fork ) ) fd_forks_remove( ctx->forks, slot ); + fd_epoch_stakes_slot_t * slot_stakes = fd_epoch_stakes_slot_map_query ( ctx->slot_stakes->slot_stakes_map, slot, NULL ); + if( FD_LIKELY( slot_stakes ) ) fd_epoch_stakes_slot_stakes_remove( ctx->slot_stakes, slot_stakes ); + } + + /* ghost */ + fd_ghost_blk_t * newr = fd_ghost_query( ctx->ghost, &out.root_block_id ); - if( FD_UNLIKELY( !newr ) ) { + if( FD_UNLIKELY( !newr ) ) { /* a block id we rooted is missing from ghost */ FD_BASE58_ENCODE_32_BYTES( out.root_block_id.uc, block_id_cstr ); FD_LOG_CRIT(( "missing root block id %s at slot %lu", block_id_cstr, out.root_slot )); } fd_ghost_publish( ctx->ghost, newr ); + + /* notar */ + fd_notar_advance_wmark( ctx->notar, out.root_slot ); - /* TODO: move expensive puvlishing operations to after the contiguous confirmations are published? */ - for(ulong slot = ctx->root_slot; slot < out.root_slot; slot++ ) { - fd_tower_forks_t * fork = fd_forks_query ( ctx->forks, slot ); - if( FD_LIKELY( fork ) ) fd_forks_remove( ctx->forks, slot ); - fd_epoch_stakes_slot_t * slot_stakes = fd_epoch_stakes_slot_map_query ( ctx->epoch_stakes->slot_stakes_map, slot, NULL ); - if( FD_LIKELY( slot_stakes ) ) fd_epoch_stakes_slot_stakes_remove( ctx->epoch_stakes, slot_stakes ); - } /* Rooting implies optimistic confirmation in the Firedancer API, so we need to make sure to publish the optimistic frags before the @@ -592,6 +638,9 @@ replay_slot_completed( ctx_t * ctx, contiguous_confirm( ctx, out.root_slot, ctx->conf_slot, FD_TOWER_SLOT_CONFIRMED_OPTIMISTIC ); contiguous_confirm( ctx, out.root_slot, ctx->root_slot, FD_TOWER_SLOT_CONFIRMED_ROOTED ); + + /* Update slot watermarks. */ + ctx->conf_slot = out.root_slot; ctx->root_slot = out.root_slot; } @@ -611,8 +660,8 @@ replay_slot_completed( ctx_t * ctx, TODO only do this on refresh_last_vote? */ - fd_lockout_offset_t lockouts[ FD_TOWER_VOTE_MAX ]; - fd_txn_p_t txn[1]; + fd_lockout_offset_t lockouts[FD_TOWER_VOTE_MAX]; + fd_txn_p_t txn[1]; fd_tower_to_vote_txn( ctx->tower, out.root_slot, lockouts, &slot_info->bank_hash, &slot_info->block_hash, ctx->identity_key, ctx->identity_key, ctx->vote_account, txn ); FD_TEST( !fd_tower_empty( ctx->tower ) ); FD_TEST( txn->payload_sz && txn->payload_sz<=FD_TPU_MTU ); @@ -633,9 +682,9 @@ after_credit( ctx_t * ctx, fd_stem_context_t * stem, int * opt_poll_in, int * charge_busy ) { - while( FD_LIKELY( !confs_empty( ctx->confs ) ) ) { - conf_t ancestor = confs_pop_tail( ctx->confs ); - fd_tower_forks_t * fork = fd_forks_query( ctx->forks, ancestor.slot ); + while( FD_LIKELY( !notif_empty( ctx->notif ) ) ) { + notif_t ancestor = notif_pop_tail( ctx->notif ); + fd_tower_forks_t * fork = fd_tower_forks_query( ctx->forks->tower_forks, ancestor.slot, NULL ); if( FD_UNLIKELY( !fork ) ) FD_LOG_CRIT(( "missing fork for ancestor %lu", ancestor.slot )); publish_slot_confirmed( ctx, stem, fd_frag_meta_ts_comp( fd_tickcount() ), ancestor.slot, fd_forks_canonical_block_id( ctx->forks, ancestor.slot ), fork->bank_idx, ancestor.kind ); *opt_poll_in = 0; /* drain the confirmations */ @@ -655,10 +704,11 @@ returnable_frag( ctx_t * ctx, ulong tspub FD_PARAM_UNUSED, fd_stem_context_t * stem ) { + if( FD_UNLIKELY( chunkin[ in_idx ].chunk0 || chunk>ctx->in[ in_idx ].wmark || sz>ctx->in[ in_idx ].mtu ) ) + FD_LOG_ERR(( "chunk %lu %lu from in %d corrupt, not in range [%lu,%lu]", chunk, sz, ctx->in_kind[ in_idx ], ctx->in[ in_idx ].chunk0, ctx->in[ in_idx ].wmark )); + switch( ctx->in_kind[ in_idx ] ) { case IN_KIND_DEDUP: { - if( FD_UNLIKELY( chunkin[ in_idx ].chunk0 || chunk>ctx->in[ in_idx ].wmark || sz>ctx->in[ in_idx ].mtu ) ) - FD_LOG_ERR(( "chunk %lu %lu from in %d corrupt, not in range [%lu,%lu]", chunk, sz, ctx->in_kind[ in_idx ], ctx->in[ in_idx ].chunk0, ctx->in[ in_idx ].wmark )); if( FD_UNLIKELY( ctx->root_slot==ULONG_MAX ) ) return 1; fd_txn_m_t * txnm = (fd_txn_m_t *)fd_chunk_to_laddr( ctx->in[in_idx].mem, chunk ); FD_TEST( txnm->payload_sz<=FD_TPU_MTU ); @@ -674,8 +724,6 @@ returnable_frag( ctx_t * ctx, return 0; } case IN_KIND_REPLAY: { - if( FD_UNLIKELY( chunkin[ in_idx ].chunk0 || chunk>ctx->in[ in_idx ].wmark || sz>ctx->in[ in_idx ].mtu ) ) - FD_LOG_ERR(( "chunk %lu %lu from in %d corrupt, not in range [%lu,%lu]", chunk, sz, ctx->in_kind[ in_idx ], ctx->in[ in_idx ].chunk0, ctx->in[ in_idx ].wmark )); if( FD_LIKELY( sig==REPLAY_SIG_SLOT_COMPLETED ) ) { fd_memcpy( &ctx->replay_slot_completed, fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ), sizeof(fd_replay_slot_completed_t) ); replay_slot_completed( ctx, &ctx->replay_slot_completed, tsorig, stem ); @@ -696,7 +744,7 @@ privileged_init( fd_topo_t * topo, ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(ctx_t), sizeof(ctx_t) ); FD_SCRATCH_ALLOC_FINI( l, scratch_align() ); - FD_TEST( fd_rng_secure( &ctx->seed, 8 ) ); + FD_TEST( fd_rng_secure( &ctx->seed, sizeof(ctx->seed) ) ); if( FD_UNLIKELY( !strcmp( tile->tower.identity_key, "" ) ) ) FD_LOG_ERR(( "identity_key_path not set" )); ctx->identity_key[ 0 ] = *(fd_pubkey_t const *)fd_type_pun_const( fd_keyload_load( tile->tower.identity_key, /* pubkey only: */ 1 ) ); @@ -723,36 +771,39 @@ privileged_init( fd_topo_t * topo, static void unprivileged_init( fd_topo_t * topo, fd_topo_tile_t * tile ) { - void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id ); - ulong slot_max = tile->tower.max_live_slots; + ulong slot_max = tile->tower.max_live_slots; + void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id ); FD_SCRATCH_ALLOC_INIT( l, scratch ); - ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(ctx_t), sizeof(ctx_t) ); - void * ghost = FD_SCRATCH_ALLOC_APPEND( l, fd_ghost_align(), fd_ghost_footprint( 2*slot_max, FD_VOTER_MAX ) ); - void * notar = FD_SCRATCH_ALLOC_APPEND( l, fd_notar_align(), fd_notar_footprint( tile->tower.max_lookahead_conf ) ); - void * tower = FD_SCRATCH_ALLOC_APPEND( l, fd_tower_align(), fd_tower_footprint() ); - void * accts = FD_SCRATCH_ALLOC_APPEND( l, fd_tower_accts_align(), fd_tower_accts_footprint( FD_VOTER_MAX ) ); - void * forks = FD_SCRATCH_ALLOC_APPEND( l, fd_forks_align(), fd_forks_footprint( slot_max, FD_VOTER_MAX ) ); - void * spare = FD_SCRATCH_ALLOC_APPEND( l, fd_tower_align(), fd_tower_footprint() ); - void * slots = FD_SCRATCH_ALLOC_APPEND( l, confs_align(), confs_footprint( slot_max ) ); - void * stakes = FD_SCRATCH_ALLOC_APPEND( l, fd_epoch_stakes_align(), fd_epoch_stakes_footprint( slot_max ) ); + ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(ctx_t), sizeof(ctx_t) ); + void * ghost = FD_SCRATCH_ALLOC_APPEND( l, fd_ghost_align(), fd_ghost_footprint( 2*slot_max, FD_VOTER_MAX ) ); + void * hfork = FD_SCRATCH_ALLOC_APPEND( l, fd_hfork_align(), fd_hfork_footprint( slot_max, FD_VOTER_MAX ) ); + void * notar = FD_SCRATCH_ALLOC_APPEND( l, fd_notar_align(), fd_notar_footprint( tile->tower.max_vote_lookahead ) ); + void * tower = FD_SCRATCH_ALLOC_APPEND( l, fd_tower_align(), fd_tower_footprint() ); + void * accts = FD_SCRATCH_ALLOC_APPEND( l, fd_tower_accts_align(), fd_tower_accts_footprint( FD_VOTER_MAX ) ); + void * forks = FD_SCRATCH_ALLOC_APPEND( l, fd_forks_align(), fd_forks_footprint( slot_max, FD_VOTER_MAX ) ); + void * spare = FD_SCRATCH_ALLOC_APPEND( l, fd_tower_align(), fd_tower_footprint() ); + void * stake = FD_SCRATCH_ALLOC_APPEND( l, fd_epoch_stakes_align(), fd_epoch_stakes_footprint( slot_max ) ); + void * notif = FD_SCRATCH_ALLOC_APPEND( l, notif_align(), notif_footprint( slot_max ) ); FD_SCRATCH_ALLOC_FINI( l, scratch_align() ); ctx->ghost = fd_ghost_join ( fd_ghost_new ( ghost, 2*slot_max, FD_VOTER_MAX, 42UL ) ); /* FIXME seed */ - ctx->notar = fd_notar_join ( fd_notar_new ( notar, tile->tower.max_lookahead_conf ) ); + ctx->hfork = fd_hfork_join ( fd_hfork_new ( hfork, slot_max, FD_VOTER_MAX, ctx->seed, tile->tower.hard_fork_fatal ) ); + ctx->notar = fd_notar_join ( fd_notar_new ( notar, tile->tower.max_vote_lookahead ) ); ctx->tower = fd_tower_join ( fd_tower_new ( tower ) ); ctx->tower_accts = fd_tower_accts_join( fd_tower_accts_new( accts, FD_VOTER_MAX ) ); ctx->forks = fd_forks_join ( fd_forks_new ( forks, slot_max, FD_VOTER_MAX ) ); ctx->tower_spare = fd_tower_join ( fd_tower_new ( spare ) ); - ctx->confs = confs_join ( confs_new ( slots, slot_max ) ); - ctx->epoch_stakes = fd_epoch_stakes_join( fd_epoch_stakes_new( stakes, slot_max ) ); + ctx->slot_stakes = fd_epoch_stakes_join( fd_epoch_stakes_new( stake, slot_max ) ); + ctx->notif = notif_join ( notif_new ( notif, slot_max ) ); FD_TEST( ctx->ghost ); + FD_TEST( ctx->hfork ); FD_TEST( ctx->notar ); FD_TEST( ctx->tower ); FD_TEST( ctx->forks ); FD_TEST( ctx->tower_spare ); - FD_TEST( ctx->confs ); FD_TEST( ctx->tower_accts ); - FD_TEST( ctx->epoch_stakes ); + FD_TEST( ctx->slot_stakes ); + FD_TEST( ctx->notif ); for( ulong i = 0; iroot_slot = ULONG_MAX; ctx->conf_slot = ULONG_MAX; + memset( &ctx->metrics, 0, sizeof( struct ctx_metrics_t ) ); + ulong banks_obj_id = fd_pod_query_ulong( topo->props, "banks", ULONG_MAX ); FD_TEST( banks_obj_id!=ULONG_MAX ); ctx->banks = fd_banks_join( fd_topo_obj_laddr( topo, banks_obj_id ) ); diff --git a/src/flamenco/runtime/Local.mk b/src/flamenco/runtime/Local.mk index ecb7b18ff46..facc56a2965 100644 --- a/src/flamenco/runtime/Local.mk +++ b/src/flamenco/runtime/Local.mk @@ -1,11 +1,6 @@ $(call add-hdrs,fd_acc_mgr.h) $(call add-objs,fd_acc_mgr,fd_flamenco) -ifdef FD_HAS_ALLOCA -$(call add-hdrs,fd_bank_hash_cmp.h) -$(call add-objs,fd_bank_hash_cmp,fd_flamenco) -endif - $(call add-hdrs,fd_blockhashes.h) $(call add-objs,fd_blockhashes,fd_flamenco) diff --git a/src/flamenco/runtime/context/fd_exec_txn_ctx.h b/src/flamenco/runtime/context/fd_exec_txn_ctx.h index 2cc0ad9066f..2f6589b88ed 100644 --- a/src/flamenco/runtime/context/fd_exec_txn_ctx.h +++ b/src/flamenco/runtime/context/fd_exec_txn_ctx.h @@ -6,7 +6,6 @@ #include "../../../ballet/txn/fd_txn.h" #include "../../features/fd_features.h" #include "../fd_txncache.h" -#include "../fd_bank_hash_cmp.h" #include "../../progcache/fd_progcache_user.h" #include "../fd_compute_budget_details.h" #include "../../../disco/pack/fd_microblock.h" @@ -50,7 +49,6 @@ struct fd_exec_txn_ctx { fd_txn_p_t txn; fd_exec_stack_t * exec_stack; fd_exec_accounts_t * exec_accounts; - fd_bank_hash_cmp_t * bank_hash_cmp; /* During sanitization, v0 transactions are allowed to have up to 256 accounts: https://github.com/anza-xyz/agave/blob/838c1952595809a31520ff1603a13f2c9123aa51/sdk/program/src/message/versions/v0/mod.rs#L139 diff --git a/src/flamenco/runtime/fd_bank_hash_cmp.c b/src/flamenco/runtime/fd_bank_hash_cmp.c deleted file mode 100644 index 88abc3667c3..00000000000 --- a/src/flamenco/runtime/fd_bank_hash_cmp.c +++ /dev/null @@ -1,239 +0,0 @@ -#include "fd_bank_hash_cmp.h" -#include - -void * -fd_bank_hash_cmp_new( void * mem ) { - - if( FD_UNLIKELY( !mem ) ) { - FD_LOG_WARNING( ( "NULL mem" ) ); - return NULL; - } - - if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)mem, fd_bank_hash_cmp_align() ) ) ) { - FD_LOG_WARNING( ( "misaligned mem" ) ); - return NULL; - } - - ulong footprint = fd_bank_hash_cmp_footprint(); - - fd_memset( mem, 0, footprint ); - - ulong laddr = (ulong)mem; - laddr += sizeof( fd_bank_hash_cmp_t ); - - laddr = fd_ulong_align_up( laddr, fd_bank_hash_cmp_map_align() ); - fd_bank_hash_cmp_map_new( (void *)laddr ); - fd_bank_hash_cmp_set_map_offset( (fd_bank_hash_cmp_t *)mem, (uchar *)laddr ); - laddr += fd_bank_hash_cmp_map_footprint(); - - laddr = fd_ulong_align_up( laddr, fd_bank_hash_cmp_align() ); - FD_TEST( laddr == (ulong)mem + fd_bank_hash_cmp_footprint() ); - - return mem; -} - -fd_bank_hash_cmp_t * -fd_bank_hash_cmp_join( void * bank_hash_cmp ) { - if( FD_UNLIKELY( !bank_hash_cmp ) ) { - FD_LOG_WARNING(( "NULL bank_hash_cmp" )); - return NULL; - } - - if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)bank_hash_cmp, fd_bank_hash_cmp_align() ) ) ) { - FD_LOG_WARNING(( "misaligned bank_hash_cmp" )); - return NULL; - } - - ulong laddr = (ulong)bank_hash_cmp; - laddr += sizeof( fd_bank_hash_cmp_t ); - - fd_bank_hash_cmp_entry_t * map_entry = fd_bank_hash_cmp_map_join( (void *)laddr ); - if( FD_UNLIKELY( !map_entry ) ) { - FD_LOG_WARNING(( "invalid entry map" )); - return NULL; - } - - return bank_hash_cmp; -} - -void * -fd_bank_hash_cmp_leave( fd_bank_hash_cmp_t const * bank_hash_cmp ) { - - if( FD_UNLIKELY( !bank_hash_cmp ) ) { - FD_LOG_WARNING( ( "NULL bank_hash_cmp" ) ); - return NULL; - } - - return (void *)bank_hash_cmp; -} - -void * -fd_bank_hash_cmp_delete( void * bank_hash_cmp ) { - - if( FD_UNLIKELY( !bank_hash_cmp ) ) { - FD_LOG_WARNING( ( "NULL bank_hash_cmp" ) ); - return NULL; - } - - if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)bank_hash_cmp, fd_bank_hash_cmp_align() ) ) ) { - FD_LOG_WARNING( ( "misaligned bank_hash_cmp" ) ); - return NULL; - } - - return bank_hash_cmp; -} - -void -fd_bank_hash_cmp_lock( fd_bank_hash_cmp_t * bank_hash_cmp ) { - volatile int * lock = &bank_hash_cmp->lock; -# if FD_HAS_THREADS - for( ;; ) { - if( FD_LIKELY( !FD_ATOMIC_CAS( lock, 0UL, 1UL ) ) ) break; - FD_SPIN_PAUSE(); - } -# else - *lock = 1; -# endif - FD_COMPILER_MFENCE(); -} - -void -fd_bank_hash_cmp_unlock( fd_bank_hash_cmp_t * bank_hash_cmp ) { - volatile int * lock = &bank_hash_cmp->lock; - FD_COMPILER_MFENCE(); - FD_VOLATILE( *lock ) = 0UL; -} - -void -fd_bank_hash_cmp_insert( fd_bank_hash_cmp_t * bank_hash_cmp, - ulong slot, - fd_hash_t const * hash, - int ours, - ulong stake ) { - fd_bank_hash_cmp_entry_t * map = fd_bank_hash_cmp_get_map( bank_hash_cmp ); - - if( FD_UNLIKELY( slot <= bank_hash_cmp->watermark ) ) { return; } - fd_bank_hash_cmp_entry_t * cmp = fd_bank_hash_cmp_map_query( map, slot, NULL ); - - if( !cmp ) { - - /* If full, make room for new bank hashes */ - - if( FD_UNLIKELY( bank_hash_cmp->cnt == fd_bank_hash_cmp_map_key_max() ) ) { - FD_LOG_WARNING( ( "Bank matches unexpectedly full. Clearing. " ) ); - for( ulong i = 0; i < fd_bank_hash_cmp_map_slot_cnt(); i++ ) { - fd_bank_hash_cmp_entry_t * entry = &map[i]; - if( FD_LIKELY( !fd_bank_hash_cmp_map_key_inval( entry->slot ) && - entry->slot < bank_hash_cmp->watermark ) ) { - fd_bank_hash_cmp_map_remove( map, entry ); - bank_hash_cmp->cnt--; - } - } - } - - cmp = fd_bank_hash_cmp_map_insert( map, slot ); - cmp->cnt = 0; - bank_hash_cmp->cnt++; - } - - if( FD_UNLIKELY( ours ) ) { - cmp->ours = *hash; - return; - } - - for( ulong i = 0; i < cmp->cnt; i++ ) { - if( FD_LIKELY( 0 == memcmp( &cmp->theirs[i], hash, sizeof( fd_hash_t ) ) ) ) { - cmp->stakes[i] += stake; - return; - } - } - - ulong max = sizeof( cmp->stakes ) / sizeof( ulong ); - if( FD_UNLIKELY( cmp->cnt == max ) ) { - if( !cmp->overflow ) { - FD_LOG_WARNING(( "[Bank Hash Comparison] more than %lu equivocating hashes for slot %lu. " - "new hash: %s. ignoring.", - max, - slot, - FD_BASE58_ENC_32_ALLOCA( hash ) )); - cmp->overflow = 1; - } - return; - } - cmp->cnt++; - cmp->theirs[cmp->cnt - 1] = *hash; - cmp->stakes[cmp->cnt - 1] = stake; - if( FD_UNLIKELY( cmp->cnt > 1 ) ) { - for( ulong i = 0; i < cmp->cnt; i++ ) { - FD_LOG_WARNING(( "slot: %lu. equivocating hash (#%lu): %s. stake: %lu", - cmp->slot, - i, - FD_BASE58_ENC_32_ALLOCA( cmp->theirs[i].hash ), - cmp->stakes[i] )); - } - } -} - -int -fd_bank_hash_cmp_check( fd_bank_hash_cmp_t * bank_hash_cmp, ulong slot ) { - fd_bank_hash_cmp_entry_t * map = fd_bank_hash_cmp_get_map( bank_hash_cmp ); - - fd_bank_hash_cmp_entry_t * cmp = fd_bank_hash_cmp_map_query( map, slot, NULL ); - - if( FD_UNLIKELY( !cmp ) ) return 0; - - fd_hash_t null_hash = { 0 }; - if( FD_LIKELY( 0 == memcmp( &cmp->ours, &null_hash, sizeof( fd_hash_t ) ) ) ) return 0; - - if( FD_UNLIKELY( cmp->cnt == 0 ) ) return 0; - - fd_hash_t * theirs = &cmp->theirs[0]; - ulong stake = cmp->stakes[0]; - for( ulong i = 1; i < cmp->cnt; i++ ) { - if( FD_UNLIKELY( cmp->stakes[i] > stake ) ) { - theirs = &cmp->theirs[i]; - stake = cmp->stakes[i]; - } - } - - double pct = (double)stake / (double)bank_hash_cmp->total_stake; - if( FD_LIKELY( pct > 0.52 ) ) { - if( FD_UNLIKELY( 0 != memcmp( &cmp->ours, theirs, sizeof( fd_hash_t ) ) ) ) { - FD_LOG_WARNING(( "\n\n[Bank Hash Comparison]\n" - "slot: %lu\n" - "ours: %s\n" - "theirs: %s\n" - "stake: %.0lf%%\n" - "result: mismatch!\n", - cmp->slot, - FD_BASE58_ENC_32_ALLOCA( cmp->ours.hash ), - FD_BASE58_ENC_32_ALLOCA( theirs->hash ), - pct * 100 )); - if( FD_UNLIKELY( cmp->cnt > 1 ) ) { - for( ulong i = 0; i < cmp->cnt; i++ ) { - FD_LOG_WARNING(( "slot: %lu. hash (#%lu): %s. stake: %lu", - cmp->slot, - i, - FD_BASE58_ENC_32_ALLOCA( cmp->theirs[i].hash ), - cmp->stakes[i] )); - } - } - return -1; - } else { - FD_LOG_INFO(( "\n\n[Bank Hash Comparison]\n" - "slot: %lu\n" - "ours: %s\n" - "theirs: %s\n" - "stake: %.0lf%%\n" - "result: match!\n", - cmp->slot, - FD_BASE58_ENC_32_ALLOCA( cmp->ours.hash ), - FD_BASE58_ENC_32_ALLOCA( theirs->hash ), - pct * 100 )); - } - fd_bank_hash_cmp_map_remove( map, cmp ); - bank_hash_cmp->cnt--; - return 1; - } - return 0; -} diff --git a/src/flamenco/runtime/fd_bank_hash_cmp.h b/src/flamenco/runtime/fd_bank_hash_cmp.h deleted file mode 100644 index 09a27174629..00000000000 --- a/src/flamenco/runtime/fd_bank_hash_cmp.h +++ /dev/null @@ -1,95 +0,0 @@ -#ifndef HEADER_fd_src_flamenco_runtime_fd_bank_hash_cmp_h -#define HEADER_fd_src_flamenco_runtime_fd_bank_hash_cmp_h - -#include "../fd_flamenco_base.h" -#include "../types/fd_types_custom.h" - -struct fd_bank_hash_cmp_entry { - ulong slot; - uint hash; - fd_hash_t ours; - fd_hash_t theirs[8]; - ulong stakes[8]; - ulong cnt; - int overflow; -}; -typedef struct fd_bank_hash_cmp_entry fd_bank_hash_cmp_entry_t; -#define MAP_NAME fd_bank_hash_cmp_map -#define MAP_T fd_bank_hash_cmp_entry_t -#define MAP_KEY slot -#define MAP_KEY_NULL ULONG_MAX -#define MAP_KEY_INVAL(k) ((k)==ULONG_MAX) -#define MAP_LG_SLOT_CNT (16) /* 0.25 fill ratio */ -#include "../../util/tmpl/fd_map.c" - -struct fd_bank_hash_cmp { - ulong map_offset; - ulong cnt; - ulong watermark; /* */ - ulong total_stake; - volatile int lock; -}; -typedef struct fd_bank_hash_cmp fd_bank_hash_cmp_t; - -FD_PROTOTYPES_BEGIN - -static inline void -fd_bank_hash_cmp_set_map_offset( fd_bank_hash_cmp_t * bank_hash_cmp, - uchar * map_mem ) { - bank_hash_cmp->map_offset = (ulong)map_mem - (ulong)bank_hash_cmp; -} - -static inline fd_bank_hash_cmp_entry_t * -fd_bank_hash_cmp_get_map( fd_bank_hash_cmp_t * bank_hash_cmp ) { - return fd_bank_hash_cmp_map_join( (uchar *)bank_hash_cmp + bank_hash_cmp->map_offset ); -} - -static inline ulong -fd_bank_hash_cmp_align( void ) { - return 128UL; -} - -FD_FN_CONST static inline ulong -fd_bank_hash_cmp_footprint( void ) { - /* clang-format off */ - return FD_LAYOUT_FINI( FD_LAYOUT_APPEND( FD_LAYOUT_APPEND( FD_LAYOUT_INIT, - fd_bank_hash_cmp_align(), sizeof(fd_bank_hash_cmp_t) ), - fd_bank_hash_cmp_map_align(), fd_bank_hash_cmp_map_footprint() ), - fd_bank_hash_cmp_align() ); - /* clang-format on */ -} - -void * -fd_bank_hash_cmp_new( void * mem ); - -fd_bank_hash_cmp_t * -fd_bank_hash_cmp_join( void * bank_hash_cmp ); - -void * -fd_bank_hash_cmp_leave( fd_bank_hash_cmp_t const * bank_hash_cmp ); - -void * -fd_bank_hash_cmp_delete( void * bank_hash_cmp ); - -void -fd_bank_hash_cmp_lock( fd_bank_hash_cmp_t * bank_hash_cmp ); - -void -fd_bank_hash_cmp_unlock( fd_bank_hash_cmp_t * bank_hash_cmp ); - -void -fd_bank_hash_cmp_insert( fd_bank_hash_cmp_t * bank_hash_cmp, - ulong slot, - fd_hash_t const * hash, - int ours, - ulong stake ); - -/* Returns 1 on bank hash match (caller should move watermark forward), - -1 on mismatch - 0 if we weren't able to compare yet */ -int -fd_bank_hash_cmp_check( fd_bank_hash_cmp_t * bank_hash_cmp, ulong slot ); - -FD_PROTOTYPES_END - -#endif /* HEADER_fd_src_flamenco_runtime_fd_bank_hash_cmp_h */ diff --git a/src/flamenco/runtime/program/fd_bpf_loader_program.c b/src/flamenco/runtime/program/fd_bpf_loader_program.c index f985b18d997..517259df3df 100644 --- a/src/flamenco/runtime/program/fd_bpf_loader_program.c +++ b/src/flamenco/runtime/program/fd_bpf_loader_program.c @@ -2624,7 +2624,6 @@ fd_directly_invoke_loader_v3_deploy( fd_bank_t * bank, txn_ctx->xid[0] = *xid; txn_ctx->progcache = NULL; txn_ctx->status_cache = NULL; - txn_ctx->bank_hash_cmp = NULL; txn_ctx->log.enable_exec_recording = !!(bank->flags & FD_BANK_FLAGS_EXEC_RECORDING); txn_ctx->bank = bank; diff --git a/src/flamenco/runtime/program/fd_vote_program.c b/src/flamenco/runtime/program/fd_vote_program.c index a9a75626ff8..f8107449286 100644 --- a/src/flamenco/runtime/program/fd_vote_program.c +++ b/src/flamenco/runtime/program/fd_vote_program.c @@ -1975,53 +1975,15 @@ process_vote_state_update( fd_borrowed_account_t * vote_account, fd_vote_state_update_t * vote_state_update, fd_pubkey_t const * signers[static FD_TXN_SIG_MAX], fd_exec_instr_ctx_t const * ctx /* feature_set */ ) { - int rc; - - /* A temporary hack to accumulate the stake-weighted bank hash from - all vote transactions. This determines whether our validator has - bank hash mismatched. TODO: move to a tile. */ - if( FD_LIKELY( !!ctx->txn_ctx->bank_hash_cmp ) ) { - // tie in code for fd_bank_hash_cmp that helps us detect if we have forked from the cluster. - // - // There is no corresponding code in anza - - fd_vote_states_t const * vote_states = fd_bank_vote_states_locking_query( ctx->txn_ctx->bank ); - if( !vote_states ) { - FD_LOG_CRIT(( "vote_states is NULL" )); - } - - fd_vote_state_ele_t const * vote_state_ele = fd_vote_states_query_const( vote_states, vote_account->acct->pubkey ); - if( !vote_state_ele ) { - FD_LOG_CRIT(( "vote_state is NULL" )); - } - - if( !deq_fd_vote_lockout_t_empty( vote_state_update->lockouts ) ) { - fd_vote_lockout_t * lockout = deq_fd_vote_lockout_t_peek_tail( vote_state_update->lockouts ); - fd_bank_hash_cmp_t * bank_hash_cmp = ctx->txn_ctx->bank_hash_cmp; - if( FD_LIKELY( lockout && bank_hash_cmp ) ) { - fd_bank_hash_cmp_lock( bank_hash_cmp ); - fd_bank_hash_cmp_insert( - bank_hash_cmp, - lockout->slot, - &vote_state_update->hash, - 0, - vote_state_ele->stake ); - fd_bank_hash_cmp_unlock( bank_hash_cmp ); - } - } - - fd_bank_vote_states_end_locking_query( ctx->txn_ctx->bank ); - } - fd_vote_state_t vote_state; // https://github.com/anza-xyz/agave/blob/v2.0.1/programs/vote/src/vote_state/mod.rs#L1144 - rc = verify_and_get_vote_state( vote_account, - clock, - signers, - &vote_state, - ctx->txn_ctx->exec_stack->vote_program.process_vote.vote_state_mem, - ctx->txn_ctx->exec_stack->vote_program.process_vote.authorized_voters_mem, - ctx->txn_ctx->exec_stack->vote_program.process_vote.landed_votes_mem ); + int rc = verify_and_get_vote_state( vote_account, + clock, + signers, + &vote_state, + ctx->txn_ctx->exec_stack->vote_program.process_vote.vote_state_mem, + ctx->txn_ctx->exec_stack->vote_program.process_vote.authorized_voters_mem, + ctx->txn_ctx->exec_stack->vote_program.process_vote.landed_votes_mem ); if( FD_UNLIKELY( rc ) ) return rc; @@ -2077,32 +2039,6 @@ process_tower_sync( fd_borrowed_account_t * vote_account, fd_tower_sync_t * tower_sync, fd_pubkey_t const * signers[static FD_TXN_SIG_MAX], fd_exec_instr_ctx_t const * ctx /* feature_set */ ) { - /* A temporary hack to accumulate the stake-weighted bank hash from - all vote transactions. This determines whether our validator has - bank hash mismatched. TODO: move to a tile. */ - if( FD_LIKELY( !!ctx->txn_ctx->bank_hash_cmp ) ) { - if( !deq_fd_vote_lockout_t_empty( tower_sync->lockouts ) ) { - fd_vote_lockout_t * lockout = deq_fd_vote_lockout_t_peek_tail( tower_sync->lockouts ); - fd_bank_hash_cmp_t * bank_hash_cmp = ctx->txn_ctx->bank_hash_cmp; - fd_vote_states_t const * vote_states = fd_bank_vote_states_locking_query( ctx->txn_ctx->bank ); - if( !vote_states ) { - FD_LOG_CRIT(( "vote_states is NULL" )); - } - fd_vote_state_ele_t const * vote_state_ele = fd_vote_states_query_const( vote_states, vote_account->acct->pubkey ); - if( FD_LIKELY( lockout && bank_hash_cmp && vote_state_ele ) ) { - fd_bank_hash_cmp_lock( bank_hash_cmp ); - fd_bank_hash_cmp_insert( - bank_hash_cmp, - lockout->slot, - &tower_sync->hash, - 0, - vote_state_ele->stake ); - fd_bank_hash_cmp_unlock( bank_hash_cmp ); - } - fd_bank_vote_states_end_locking_query( ctx->txn_ctx->bank ); - } - } - // https://github.com/anza-xyz/agave/blob/v2.0.1/programs/vote/src/vote_state/mod.rs#L1194 fd_vote_state_t vote_state; do { diff --git a/src/flamenco/runtime/tests/fd_instr_harness.c b/src/flamenco/runtime/tests/fd_instr_harness.c index 73a0774b7ad..d87d61216f2 100644 --- a/src/flamenco/runtime/tests/fd_instr_harness.c +++ b/src/flamenco/runtime/tests/fd_instr_harness.c @@ -94,7 +94,6 @@ fd_solfuzz_pb_instr_ctx_create( fd_solfuzz_runner_t * runner, txn_ctx->xid[0] = *xid; txn_ctx->status_cache = NULL; - txn_ctx->bank_hash_cmp = NULL; txn_ctx->log.enable_exec_recording = !!( runner->bank->flags & FD_BANK_FLAGS_EXEC_RECORDING ); txn_ctx->bank = runner->bank; @@ -352,7 +351,6 @@ fd_solfuzz_pb_instr_ctx_create( fd_solfuzz_runner_t * runner, txn_ctx->xid[0] = *xid; txn_ctx->status_cache = NULL; - txn_ctx->bank_hash_cmp = NULL; txn_ctx->log.enable_exec_recording = !!( runner->bank->flags & FD_BANK_FLAGS_EXEC_RECORDING ); txn_ctx->bank = runner->bank; diff --git a/src/flamenco/runtime/tests/fd_txn_harness.c b/src/flamenco/runtime/tests/fd_txn_harness.c index c3ee3023dfc..a694a5eac15 100644 --- a/src/flamenco/runtime/tests/fd_txn_harness.c +++ b/src/flamenco/runtime/tests/fd_txn_harness.c @@ -348,7 +348,6 @@ fd_solfuzz_txn_ctx_exec( fd_solfuzz_runner_t * runner, if( FD_UNLIKELY( !txn_ctx->progcache ) ) { FD_LOG_CRIT(( "fd_progcache_join failed" )); } - txn_ctx->bank_hash_cmp = NULL; txn_ctx->xid[0] = *xid; txn_ctx->log.enable_vm_tracing = runner->enable_vm_tracing;