Skip to content

Commit 1ef93fb

Browse files
committed
feat(hfork): rewrite hard fork detector based on #7087
1 parent 2dbb1b0 commit 1ef93fb

31 files changed

+908
-896
lines changed

book/api/metrics-generated.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1195,5 +1195,9 @@
11951195
| <span class="metrics-name">tower_&#8203;lockout_&#8203;fail</span> | counter | Locked out (can't vote) |
11961196
| <span class="metrics-name">tower_&#8203;threshold_&#8203;fail</span> | counter | Did not pass threshold check (can't vote) |
11971197
| <span class="metrics-name">tower_&#8203;propagated_&#8203;fail</span> | counter | Prev leader block did not propagate (can't vote) |
1198+
| <span class="metrics-name">tower_&#8203;hard_&#8203;forks_&#8203;seen</span> | counter | Number of hard forks we've seen (block ids with multiple candidate bank hashes) |
1199+
| <span class="metrics-name">tower_&#8203;hard_&#8203;forks_&#8203;pruned</span> | counter | Number of hard forks (candidate bank hashes) we've pruned |
1200+
| <span class="metrics-name">tower_&#8203;hard_&#8203;forks_&#8203;active</span> | gauge | Currently active hard forks |
1201+
| <span class="metrics-name">tower_&#8203;hard_&#8203;forks_&#8203;max_&#8203;width</span> | gauge | The max width of hard forks (block id with most candidate bank hashes) we've ever seen |
11981202

11991203
</div>

src/app/firedancer/config/default.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1371,15 +1371,15 @@ user = ""
13711371
[tiles.tower]
13721372
# Solana reaches consensus via replay, but can "cluster confirm"
13731373
# slots ahead of the replay tip by listening to vote txns from
1374-
# gossip or TPU. The larger max_lookahead_conf, the further
1374+
# gossip or TPU. The larger max_vote_lookahead, the further
13751375
# ahead slots can be cluster confirmed before they are replayed.
13761376
#
13771377
# Specifically, tower will ignore gossip or TPU votes that are
1378-
# more than max_lookahead_conf slots ahead of the root.
1378+
# more than max_vote_lookahead slots ahead of the root.
13791379
#
1380-
# Note max_lookahead_conf must be >= max_live_slots and
1380+
# Note max_vote_lookahead must be >= max_live_slots and
13811381
# Firedancer will ignore a value where this is not the case.
1382-
max_lookahead_conf = 4096
1382+
max_vote_lookahead = 4096
13831383

13841384
[tiles.send]
13851385
# The port the send tile uses for QUIC, to send votes and other

src/app/firedancer/topology.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,6 @@ fd_topo_initialize( config_t * config ) {
345345

346346
fd_topob_wksp( topo, "funk" );
347347
fd_topob_wksp( topo, "progcache" );
348-
fd_topob_wksp( topo, "bh_cmp" );
349348
fd_topob_wksp( topo, "fec_sets" );
350349
fd_topob_wksp( topo, "txncache" );
351350
fd_topob_wksp( topo, "banks" );
@@ -1199,9 +1198,9 @@ fd_topo_configure_tile( fd_topo_tile_t * tile,
11991198

12001199
} else if( FD_UNLIKELY( !strcmp( tile->name, "tower" ) ) ) {
12011200

1202-
tile->tower.fork_fatal = config->firedancer.development.hard_fork_fatal;
1201+
tile->tower.hard_fork_fatal = config->firedancer.development.hard_fork_fatal;
12031202
tile->tower.max_live_slots = config->firedancer.runtime.max_live_slots;
1204-
tile->tower.max_lookahead_conf = config->tiles.tower.max_lookahead_conf;
1203+
tile->tower.max_vote_lookahead = config->tiles.tower.max_vote_lookahead;
12051204
strncpy( tile->tower.identity_key, config->paths.identity_key, sizeof(tile->tower.identity_key) );
12061205
strncpy( tile->tower.vote_account, config->paths.vote_account, sizeof(tile->tower.vote_account) );
12071206
strncpy( tile->tower.base_path, config->paths.base, sizeof(tile->tower.base_path) );

src/app/shared/fd_config.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -490,7 +490,7 @@ struct fd_config {
490490
} shredcap;
491491

492492
struct {
493-
ulong max_lookahead_conf;
493+
ulong max_vote_lookahead;
494494
} tower;
495495

496496
} tiles;

src/app/shared/fd_config_parse.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ fd_config_extract_pod( uchar * pod,
259259

260260
CFG_POP ( ushort, tiles.send.send_src_port );
261261

262-
CFG_POP ( ulong, tiles.tower.max_lookahead_conf );
262+
CFG_POP ( ulong, tiles.tower.max_vote_lookahead );
263263

264264
CFG_POP ( bool, tiles.archiver.enabled );
265265
CFG_POP ( ulong, tiles.archiver.end_slot );

src/choreo/fd_choreo_base.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,5 +36,6 @@ typedef uchar fd_block_id_t[ 32UL ];
3636
typedef fd_slot_hash_t fd_slot_pubkey_t;
3737

3838
static const fd_pubkey_t pubkey_null = {{ 0 }};
39+
static const fd_hash_t hash_null = {{ 0 }};
3940

4041
#endif /* HEADER_fd_src_choreo_fd_choreo_base_h */

src/choreo/ghost/fd_ghost.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -271,18 +271,18 @@ fd_ghost_insert( fd_ghost_t * ghost,
271271
void
272272
fd_ghost_count_vote( fd_ghost_t * ghost,
273273
fd_ghost_blk_t * blk,
274-
fd_pubkey_t const * vtr_addr,
274+
fd_pubkey_t const * vote_acc,
275275
ulong stake,
276276
ulong slot ) {
277277

278278
fd_ghost_blk_t const * root = fd_ghost_root( ghost );
279279
fd_ghost_blk_t * pool = ghost->pool;
280-
fd_ghost_vtr_t * vtr = vtr_map_query( ghost->vtr_map, *vtr_addr, NULL );
280+
fd_ghost_vtr_t * vtr = vtr_map_query( ghost->vtr_map, *vote_acc, NULL );
281281

282282
if( FD_UNLIKELY( slot == ULONG_MAX ) ) return; /* hasn't voted */
283283
if( FD_UNLIKELY( slot < root->slot ) ) return; /* vote older than root */
284284

285-
if( FD_UNLIKELY( !vtr ) ) vtr = vtr_map_insert( ghost->vtr_map, *vtr_addr );
285+
if( FD_UNLIKELY( !vtr ) ) vtr = vtr_map_insert( ghost->vtr_map, *vote_acc );
286286
else {
287287

288288
/* Only process the vote if it is not the same as the previous vote

src/choreo/hfork/Local.mk

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
$(call add-hdrs,fd_hfork.h)
2+
$(call add-objs,fd_hfork,fd_choreo)
3+
ifdef FD_HAS_HOSTED
4+
ifdef FD_HAS_SECP256K1
5+
$(call make-unit-test,test_hfork,test_hfork,fd_choreo fd_flamenco fd_tango fd_ballet fd_util)
6+
$(call run-unit-test,test_hfork)
7+
endif
8+
endif

src/choreo/hfork/fd_hfork.c

Lines changed: 282 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
#include "fd_hfork.h"
2+
#include "fd_hfork_private.h"
3+
4+
static void
5+
check( fd_hfork_t * hfork,
6+
ulong total_stake,
7+
candidate_t * candidate,
8+
int invalid,
9+
fd_hash_t * our_bank_hash ) {
10+
11+
if( FD_LIKELY( candidate->checked ) ) return; /* already checked this bank hash against our own */
12+
if( FD_LIKELY( candidate->stake * 100UL / total_stake < 52UL ) ) return; /* not enough stake to compare */
13+
14+
if( FD_UNLIKELY( invalid ) ) {
15+
char msg[ 4096UL ];
16+
FD_BASE58_ENCODE_32_BYTES( candidate->key.block_id.uc, _block_id );
17+
FD_TEST( fd_cstr_printf_check( msg, sizeof( msg ), NULL,
18+
"HARD FORK DETECTED: our validator has marked slot %lu with block ID `%s` dead, but %lu validators with %.1f of stake have voted on it",
19+
candidate->slot,
20+
_block_id,
21+
candidate->cnt,
22+
100.0*(double)candidate->stake/(double)total_stake ) );
23+
24+
if( FD_UNLIKELY( hfork->fatal ) ) FD_LOG_ERR (( "%s", msg ));
25+
else FD_LOG_WARNING(( "%s", msg ));
26+
} else if( FD_UNLIKELY( 0!=memcmp( our_bank_hash, &candidate->key.bank_hash, 32UL ) ) ) {
27+
char msg[ 4096UL ];
28+
FD_BASE58_ENCODE_32_BYTES( our_bank_hash->uc, _our_bank_hash );
29+
FD_BASE58_ENCODE_32_BYTES( candidate->key.block_id.uc, _block_id );
30+
FD_BASE58_ENCODE_32_BYTES( candidate->key.bank_hash.uc, _bank_hash );
31+
FD_TEST( fd_cstr_printf_check( msg, sizeof( msg ), NULL,
32+
"HARD FORK DETECTED: our validator has produced bank hash `%s` for slot %lu with block ID `%s`, but %lu validators with %.1f of stake have voted on a different bank hash `%s` for the same slot",
33+
_our_bank_hash,
34+
candidate->slot,
35+
_block_id,
36+
candidate->cnt,
37+
100.0*(double)candidate->stake/(double)total_stake,
38+
_bank_hash ) );
39+
40+
if( FD_UNLIKELY( hfork->fatal ) ) FD_LOG_ERR (( "%s", msg ));
41+
else FD_LOG_WARNING(( "%s", msg ));
42+
}
43+
candidate->checked = 1;
44+
}
45+
46+
ulong
47+
fd_hfork_align( void ) {
48+
return 128UL;
49+
}
50+
51+
ulong
52+
fd_hfork_footprint( ulong max_live_slots,
53+
ulong max_vote_accounts ) {
54+
int lg_blk_max = fd_ulong_find_msb( fd_ulong_pow2_up( max_live_slots * max_vote_accounts ) ) + 1;
55+
int lg_vtr_max = fd_ulong_find_msb( fd_ulong_pow2_up( max_vote_accounts ) ) + 1;
56+
57+
ulong l = FD_LAYOUT_INIT;
58+
l = FD_LAYOUT_APPEND( l, alignof(fd_hfork_t), sizeof(fd_hfork_t) );
59+
l = FD_LAYOUT_APPEND( l, blk_map_align(), blk_map_footprint( lg_blk_max ) );
60+
l = FD_LAYOUT_APPEND( l, vtr_map_align(), vtr_map_footprint( lg_vtr_max ) );
61+
l = FD_LAYOUT_APPEND( l, candidate_map_align(), candidate_map_footprint( lg_blk_max ) );
62+
for( ulong i = 0UL; i < fd_ulong_pow2( lg_vtr_max ); i++ ) {
63+
l = FD_LAYOUT_APPEND( l, votes_align(), votes_footprint( max_live_slots ) );
64+
}
65+
return FD_LAYOUT_FINI( l, fd_hfork_align() );
66+
}
67+
68+
void *
69+
fd_hfork_new( void * shmem,
70+
ulong max_live_slots,
71+
ulong max_vote_accounts,
72+
ulong seed,
73+
int fatal ) {
74+
(void)seed; /* TODO map seed */
75+
76+
if( FD_UNLIKELY( !shmem ) ) {
77+
FD_LOG_WARNING(( "NULL mem" ));
78+
return NULL;
79+
}
80+
81+
if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shmem, fd_hfork_align() ) ) ) {
82+
FD_LOG_WARNING(( "misaligned mem" ));
83+
return NULL;
84+
}
85+
86+
ulong footprint = fd_hfork_footprint( max_live_slots, max_vote_accounts );
87+
if( FD_UNLIKELY( !footprint ) ) {
88+
FD_LOG_WARNING(( "bad max_live_slots (%lu) or max_vote_accounts (%lu)", max_live_slots, max_vote_accounts ));
89+
return NULL;
90+
}
91+
92+
fd_memset( shmem, 0, footprint );
93+
94+
int lg_blk_max = fd_ulong_find_msb( fd_ulong_pow2_up( max_live_slots * max_vote_accounts ) ) + 1;
95+
int lg_vtr_max = fd_ulong_find_msb( fd_ulong_pow2_up( max_vote_accounts ) ) + 1;
96+
97+
FD_SCRATCH_ALLOC_INIT( l, shmem );
98+
fd_hfork_t * hfork = FD_SCRATCH_ALLOC_APPEND( l, fd_hfork_align(), sizeof( fd_hfork_t ) );
99+
void * blk_map = FD_SCRATCH_ALLOC_APPEND( l, blk_map_align(), blk_map_footprint( lg_blk_max ) );
100+
void * vtr_map = FD_SCRATCH_ALLOC_APPEND( l, vtr_map_align(), vtr_map_footprint( lg_vtr_max ) );
101+
void * candidate_map = FD_SCRATCH_ALLOC_APPEND( l, candidate_map_align(), candidate_map_footprint( lg_blk_max ) );
102+
103+
hfork->blk_map = blk_map_new( blk_map, lg_blk_max );
104+
hfork->vtr_map = vtr_map_new( vtr_map, lg_vtr_max );
105+
hfork->candidate_map = candidate_map_new( candidate_map, lg_blk_max );
106+
for( ulong i = 0UL; i < fd_ulong_pow2( lg_vtr_max ); i++ ) {
107+
void * votes = FD_SCRATCH_ALLOC_APPEND( l, votes_align(), votes_footprint( max_live_slots ) );
108+
vtr_t * join = vtr_map_join( hfork->vtr_map );
109+
join[i].votes = votes_new( votes, max_live_slots );
110+
}
111+
FD_TEST( FD_SCRATCH_ALLOC_FINI( l, fd_hfork_align() ) == (ulong)shmem + footprint );
112+
hfork->fatal = fatal;
113+
return shmem;
114+
}
115+
116+
fd_hfork_t *
117+
fd_hfork_join( void * shhfork ) {
118+
fd_hfork_t * hfork = (fd_hfork_t *)shhfork;
119+
120+
if( FD_UNLIKELY( !hfork ) ) {
121+
FD_LOG_WARNING(( "NULL hfork" ));
122+
return NULL;
123+
}
124+
125+
if( FD_UNLIKELY( !fd_ulong_is_aligned((ulong)hfork, fd_hfork_align() ) ) ) {
126+
FD_LOG_WARNING(( "misaligned hfork" ));
127+
return NULL;
128+
}
129+
130+
hfork->blk_map = blk_map_join( hfork->blk_map );
131+
hfork->vtr_map = vtr_map_join( hfork->vtr_map );
132+
hfork->candidate_map = candidate_map_join( hfork->candidate_map );
133+
for( ulong i = 0UL; i < vtr_map_slot_cnt( hfork->vtr_map ); i++ ) {
134+
hfork->vtr_map[i].votes = votes_join( hfork->vtr_map[i].votes );
135+
}
136+
137+
return hfork;
138+
}
139+
140+
void *
141+
fd_hfork_leave( fd_hfork_t const * hfork ) {
142+
143+
if( FD_UNLIKELY( !hfork ) ) {
144+
FD_LOG_WARNING(( "NULL hfork" ));
145+
return NULL;
146+
}
147+
148+
return (void *)hfork;
149+
}
150+
151+
void *
152+
fd_hfork_delete( void * hfork ) {
153+
154+
if( FD_UNLIKELY( !hfork ) ) {
155+
FD_LOG_WARNING(( "NULL hfork" ));
156+
return NULL;
157+
}
158+
159+
if( FD_UNLIKELY( !fd_ulong_is_aligned((ulong)hfork, fd_hfork_align() ) ) ) {
160+
FD_LOG_WARNING(( "misaligned hfork" ));
161+
return NULL;
162+
}
163+
164+
return hfork;
165+
}
166+
167+
void
168+
fd_hfork_count_vote( fd_hfork_t * hfork,
169+
fd_hash_t const * vote_acc,
170+
fd_hash_t const * block_id,
171+
fd_hash_t const * bank_hash,
172+
ulong slot,
173+
ulong stake,
174+
ulong total_stake,
175+
fd_hfork_metrics_t * metrics ) {
176+
177+
/* Get the vtr. */
178+
179+
vtr_t * vtr = vtr_map_query( hfork->vtr_map, *vote_acc, NULL );
180+
if( FD_UNLIKELY( !vtr ) ) vtr = vtr_map_insert( hfork->vtr_map, *vote_acc );
181+
182+
/* Ignore out of order or duplicate votes. */
183+
184+
if( FD_UNLIKELY( !votes_empty( vtr->votes ) ) ) {
185+
vote_t const * tail = votes_peek_tail_const( vtr->votes );
186+
if( FD_UNLIKELY( tail && tail->slot >= slot ) ) return;
187+
}
188+
189+
/* Evict the candidate's oldest vote (by vote slot). */
190+
191+
if( FD_UNLIKELY( votes_full( vtr->votes ) ) ) {
192+
vote_t vote = votes_pop_head( vtr->votes );
193+
candidate_key_t key = { .block_id = vote.block_id, .bank_hash = vote.bank_hash };
194+
candidate_t * candidate = candidate_map_query( hfork->candidate_map, key, NULL );
195+
candidate->stake -= vote.stake;
196+
candidate->cnt--;
197+
if( FD_UNLIKELY( candidate->cnt==0 ) ) {
198+
candidate_map_remove( hfork->candidate_map, candidate );
199+
blk_t * blk = blk_map_query( hfork->blk_map, vote.block_id, NULL );
200+
blk->bank_hashes_cnt--;
201+
if( FD_UNLIKELY( blk->bank_hashes_cnt == 0 ) ) {
202+
blk_map_remove( hfork->blk_map, blk );
203+
if( FD_UNLIKELY( blk->forked ) ) {
204+
metrics->active--;
205+
metrics->pruned++;
206+
}
207+
}
208+
}
209+
}
210+
211+
/* Push the vote onto the vtr. */
212+
213+
vote_t vote = { .block_id = *block_id, .bank_hash = *bank_hash, .slot = slot, .stake = stake };
214+
vtr->votes = votes_push_tail( vtr->votes, vote );
215+
216+
/* Update the hard fork candidate for this block id. */
217+
218+
candidate_key_t key = { .block_id = *block_id, .bank_hash = *bank_hash };
219+
candidate_t * candidate = candidate_map_query( hfork->candidate_map, key, NULL );
220+
if( FD_UNLIKELY( !candidate ) ) {
221+
candidate = candidate_map_insert( hfork->candidate_map, key );
222+
candidate->slot = slot;
223+
candidate->stake = 0UL;
224+
candidate->cnt = 0UL;
225+
}
226+
candidate->cnt++;
227+
candidate->stake += stake;
228+
229+
/* Update the list of bank hashes for this block_id. */
230+
231+
blk_t * blk = blk_map_query( hfork->blk_map, *block_id, NULL );
232+
if( FD_UNLIKELY( !blk ) ) {
233+
FD_TEST( blk_map_key_cnt( hfork->blk_map ) < blk_map_key_max( hfork->blk_map ) ); /* invariant violation: blk_map full */
234+
blk = blk_map_insert( hfork->blk_map, *block_id );
235+
FD_TEST( blk );
236+
blk->bank_hashes_cnt = 0;
237+
blk->replayed = 0;
238+
blk->invalid = 0;
239+
}
240+
int found = 0;
241+
for( ulong i=0UL; i<blk->bank_hashes_cnt; i++ ) {
242+
if( FD_LIKELY( 0==memcmp( &blk->bank_hashes[i], bank_hash, 32UL ) ) ) found = 1;
243+
}
244+
if( FD_UNLIKELY( !found ) ) {
245+
blk->bank_hashes[ blk->bank_hashes_cnt++ ] = *bank_hash;
246+
247+
/* Found a hard fork. */
248+
249+
if( FD_UNLIKELY( !blk->forked && blk->bank_hashes_cnt > 1 ) ) {
250+
blk->forked = 1;
251+
metrics->seen++;
252+
metrics->active++;
253+
}
254+
}
255+
metrics->max_width = fd_ulong_max( metrics->max_width, blk->bank_hashes_cnt );
256+
257+
/* Check for hard forks. */
258+
259+
if( FD_LIKELY( blk->replayed ) ) check( hfork, total_stake, candidate, blk->invalid, &blk->our_bank_hash );
260+
}
261+
262+
void
263+
fd_hfork_record_our_bank_hash( fd_hfork_t * hfork,
264+
fd_hash_t * block_id,
265+
fd_hash_t * bank_hash,
266+
ulong total_stake ) {
267+
blk_t * blk = blk_map_query( hfork->blk_map, *block_id, NULL );
268+
if( FD_UNLIKELY( !blk ) ) {
269+
blk = blk_map_insert( hfork->blk_map, *block_id );
270+
FD_TEST( blk );
271+
blk->bank_hashes_cnt = 0UL;
272+
blk->replayed = 1;
273+
}
274+
if( FD_LIKELY( bank_hash ) ) { blk->invalid = 0; blk->our_bank_hash = *bank_hash; }
275+
else blk->invalid = 1;
276+
277+
for( ulong i=0UL; i<blk->bank_hashes_cnt; i++ ) {
278+
candidate_key_t key = { .block_id = *block_id, .bank_hash = blk->bank_hashes[i] };
279+
candidate_t * candidate = candidate_map_query( hfork->candidate_map, key, NULL );
280+
if( FD_LIKELY( candidate ) ) check( hfork, total_stake, candidate, blk->invalid, &blk->our_bank_hash );
281+
}
282+
}

0 commit comments

Comments
 (0)