@@ -95,24 +95,13 @@ def __init__(self, ancient_path, name, uses_compression):
9595 self .ancient_path = ancient_path
9696 self .name = name
9797 self .uses_compression = uses_compression
98- logger .debug (f'opening freezer table. name={ self .name } ' )
9998
10099 self .index_file = open (os .path .join (ancient_path , self .index_file_name ), 'rb' )
101100 stat_result = os .stat (self .index_file .fileno ())
102101 index_file_size = stat_result .st_size
103102 assert index_file_size % 6 == 0 , index_file_size
104- logger .debug (f'index_size={ index_file_size } ({ index_file_size // 6 } entries)' )
105103 self .entries = index_file_size // 6
106104
107- first_index_bytes = self .index_file .read (6 )
108- first_index = GethFreezerIndexEntry .from_bytes (first_index_bytes )
109- logger .debug (f'first_index={ first_index } ' )
110-
111- self .index_file .seek (- 6 , 2 )
112- last_index_bytes = self .index_file .read (6 )
113- last_index = GethFreezerIndexEntry .from_bytes (last_index_bytes )
114- logger .debug (f'last_index={ last_index } ' )
115-
116105 self ._data_files = dict ()
117106
118107 @property
@@ -163,6 +152,19 @@ def __del__(self) -> None:
163152 f .close ()
164153 self .index_file .close ()
165154
155+ @property
156+ def last_index (self ):
157+ self .index_file .seek (- 6 , 2 )
158+ last_index_bytes = self .index_file .read (6 )
159+ return GethFreezerIndexEntry .from_bytes (last_index_bytes )
160+
161+ @property
162+ def first_index (self ):
163+ self .index_file .seek (0 )
164+ first_index_bytes = self .index_file .read (6 )
165+ return GethFreezerIndexEntry .from_bytes (first_index_bytes )
166+
167+
166168
167169class BlockBody (rlp .Serializable ):
168170 "This is how geth stores block bodies"
@@ -274,7 +276,6 @@ def open_gethdb(location):
274276
275277 last_block = gethdb .last_block_hash
276278 last_block_num = gethdb .block_num_for_hash (last_block )
277- logger .info ('geth database opened' )
278279 logger .info (f'found geth chain tip: header_hash={ humanize_hash (last_block )} block_number={ last_block_num } ' )
279280
280281 genesis_hash = gethdb .header_hash_for_block_number (0 )
@@ -297,17 +298,18 @@ def open_trinitydb(location):
297298 logger .info (f'Trinity database did not already exist, initializing it now' )
298299 chain = MainnetChain .from_genesis_header (leveldb , MAINNET_GENESIS_HEADER )
299300
300- # from_genesis_header copied the header over to our trinity db but not the state
301+ logger .warining ('The new db contains the genesis header but not the genesis state.' )
302+ logger .warining ('Attempts to full sync will fail.' )
301303
302304 return chain
303305
304306
305- def main (args ):
306- gethdb = open_gethdb (args .gethdb )
307- chain = open_trinitydb (args .destdb )
307+ def import_headers (gethdb , chain ):
308308 headerdb = chain .headerdb
309309
310- # 3. Import headers + bodies
310+ logger .warning ('Some features are not yet implemented:' )
311+ logger .warning ('- This only supports importing the mainnet chain' )
312+ logger .warning ('- This script will not verify that geth is using the mainnet chain' )
311313
312314 canonical_head = headerdb .get_canonical_head ()
313315 logger .info (f'starting import from trinity\' s canonical head: { canonical_head } ' )
@@ -324,13 +326,9 @@ def main(args):
324326 final_block_to_sync = min (args .syncuntil , final_block_to_sync )
325327
326328 for i in range (canonical_head .block_number , final_block_to_sync + 1 ):
327-
328- if not args .nobodies :
329- import_block_body (gethdb , chain , i )
330- else :
331- header_hash = gethdb .header_hash_for_block_number (i )
332- header = gethdb .block_header (i , header_hash )
333- headerdb .persist_header (header )
329+ header_hash = gethdb .header_hash_for_block_number (i )
330+ header = gethdb .block_header (i , header_hash )
331+ headerdb .persist_header (header )
334332
335333 if i % 1000 == 0 :
336334 logger .debug (f'current canonical header: { headerdb .get_canonical_head ()} ' )
@@ -344,54 +342,14 @@ def main(args):
344342
345343 logger .info ('finished importing headers + bodies' )
346344
347- if args .justblocks :
348- return
349-
350- scan_state (gethdb , leveldb )
351- return
352-
353- state_root = canonical_head .state_root
354- logger .info (f'starting state trie import: { humanize_hash (state_root )} ' )
355-
356- # 4. Import the state trie + storage tries
357- # Write something which iterates over the entire trie, from left to right
358- # Pass it a database which first looks in the trinity db, and if nothing is there
359- # copies the requested node from geth->trinity before returning it
360-
361- imported_leaf_count = 0
362- importdb = ImportDatabase (gethdb = gethdb .db , trinitydb = leveldb .db )
363- for path , leaf_data in iterate_leaves (importdb , state_root ):
364- account = rlp .decode (leaf_data , sedes = Account )
365- addr_hash = nibbles_to_bytes (path )
366345
367-
368- if account .code_hash != EMPTY_SHA3 :
369- bytecode = importdb .get (account .code_hash )
370-
371- if account .storage_root == BLANK_ROOT_HASH :
372- imported_leaf_count += 1
373-
374- if imported_leaf_count % 1000 == 0 :
375- logger .debug (f'progress sha(addr)={ addr_hash .hex ()} ' )
376- continue
377-
378- for path , leaf_data in iterate_leaves (importdb , account .storage_root ):
379- item_addr = nibbles_to_bytes (path )
380- imported_leaf_count += 1
381-
382- if imported_leaf_count % 1000 == 0 :
383- logger .debug (f'progress sha(addr)={ addr_hash .hex ()} sha(item)={ item_addr .hex ()} ' )
384-
385- loger .info ('successfully imported state trie and all storage tries' )
386-
387-
388- def scan_state (gethdb : GethDatabase , trinitydb : LevelDB ):
346+ def sweep_state (gethdb : GethDatabase , trinitydb : LevelDB ):
389347 """
390348 Imports state, but by indiscriminately copying over everything which might be part of
391349 the state trie. This copies more data than necessary, but is likely to be much faster
392350 than iterating all state.
393351 """
394- logger .debug ('scan_state : bulk-importing state entries' )
352+ logger .debug ('sweep_state : bulk-importing state entries' )
395353
396354 iterator = gethdb .db .iterator (
397355 start = b'\x00 ' * 32 ,
@@ -416,7 +374,43 @@ def scan_state(gethdb: GethDatabase, trinitydb: LevelDB):
416374 break
417375 bucket = (int .from_bytes (bucket , 'big' ) + 1 ).to_bytes (2 , 'big' )
418376
419- logger .info (f'scan_state: successfully imported { imported_entries } state entries' )
377+ logger .info (f'sweep_state: successfully imported { imported_entries } state entries' )
378+
379+
380+ def import_state (gethdb : GethDatabase , chain ):
381+ headerdb = chain .headerdb
382+ canonical_head = headerdb .get_canonical_head ()
383+ state_root = canonical_head .state_root
384+
385+ logger .info (
386+ f'starting state trie import. canonical_head={ canonical_head } '
387+ f'state_root={ humanize_hash (state_root )} '
388+ )
389+
390+ imported_leaf_count = 0
391+ importdb = ImportDatabase (gethdb = gethdb .db , trinitydb = leveldb .db )
392+ for path , leaf_data in iterate_leaves (importdb , state_root ):
393+ account = rlp .decode (leaf_data , sedes = Account )
394+ addr_hash = nibbles_to_bytes (path )
395+
396+ if account .code_hash != EMPTY_SHA3 :
397+ bytecode = importdb .get (account .code_hash )
398+
399+ if account .storage_root == BLANK_ROOT_HASH :
400+ imported_leaf_count += 1
401+
402+ if imported_leaf_count % 1000 == 0 :
403+ logger .debug (f'progress sha(addr)={ addr_hash .hex ()} ' )
404+ continue
405+
406+ for path , leaf_data in iterate_leaves (importdb , account .storage_root ):
407+ item_addr = nibbles_to_bytes (path )
408+ imported_leaf_count += 1
409+
410+ if imported_leaf_count % 1000 == 0 :
411+ logger .debug (f'progress sha(addr)={ addr_hash .hex ()} sha(item)={ item_addr .hex ()} ' )
412+
413+ loger .info ('successfully imported state trie and all storage tries' )
420414
421415
422416def import_block_body (gethdb , chain , block_number : int ):
@@ -429,7 +423,7 @@ def import_block_body(gethdb, chain, block_number: int):
429423 chain .chaindb .persist_block (block )
430424
431425 # persist_block saves the transactions into an index, but doesn't actually persist the
432- # transaction trie, meaning that without this next block attempts to read out the
426+ # transaction trie, meaning that without this next section attempts to read out the
433427 # block will throw an exception
434428 tx_root_hash , tx_kv_nodes = make_trie_root_and_nodes (body .transactions )
435429 assert tx_root_hash == block .header .transaction_root
@@ -489,32 +483,112 @@ def read_receipts(gethdb, block_number):
489483 logger .info (f'- post_state_or_status={ post_state } gas_used={ gas_used } len(logs)={ len (logs )} ' )
490484
491485
486+ def read_geth (gethdb ):
487+ logger .info (f'database_version={ gethdb .database_version } ' )
488+
489+ ancient_entry_count = gethdb .ancient_hashes .entries
490+ logger .info (f'entries_in_ancient_db={ ancient_entry_count } ' )
491+
492+
493+ def read_trinity (location ):
494+ if not os .path .exists (location ):
495+ logger .error (f'There is no database at { location } ' )
496+ return
497+
498+ chain = open_trinitydb (location )
499+ headerdb = chain .headerdb
500+
501+ canonical_head = headerdb .get_canonical_head ()
502+ logger .info (f'canonical_head={ canonical_head } ' )
503+
504+
492505if __name__ == "__main__" :
493506 logging .basicConfig (
494507 level = logging .DEBUG ,
495508 format = '%(asctime)s.%(msecs)03d %(levelname)s: %(message)s' ,
496509 datefmt = '%H:%M:%S'
497510 )
498511
499- parser = argparse .ArgumentParser ()
500- parser .add_argument ('-gethdb' , type = str , required = True )
501- parser .add_argument ('-destdb' , type = str , required = True )
502- parser .add_argument ('-justblocks' , action = 'store_true' )
503- parser .add_argument ('-nobodies' , action = 'store_true' )
504- parser .add_argument ('-syncuntil' , type = int , action = 'store' )
505-
506- subparsers = parser .add_subparsers (dest = "command" )
512+ parser = argparse .ArgumentParser (
513+ description = "Import chaindata from geth: builds a database py-evm understands." ,
514+ epilog = "For more information on using a subcommand: 'subcommand --help'"
515+ )
516+ subparsers = parser .add_subparsers (dest = "command" , title = "subcommands" )
517+
518+ import_headers_parser = subparsers .add_parser (
519+ 'import_headers' ,
520+ help = "Copies over headers from geth into trinity" ,
521+ description = """
522+ copies every header, starting from trinity's canonical chain tip,
523+ continuing up to geth's canonical chain tip
524+ """
525+ )
526+ import_headers_parser .add_argument ('-gethdb' , type = str , required = True )
527+ import_headers_parser .add_argument ('-destdb' , type = str , required = True )
528+ import_headers_parser .add_argument (
529+ '-syncuntil' , type = int , action = 'store' ,
530+ help = "Only import headers up to this block number"
531+ )
507532
508- import_body_range_parser = subparsers .add_parser ('import_body_range' )
533+ sweep_state_parser = subparsers .add_parser (
534+ 'sweep_state' ,
535+ help = "Does a (very fast) bulk copy of state entries from the gethdb" ,
536+ description = """
537+ Scans over every key:value pair in the geth database, and copies over
538+ everything which looks like a state node (has a 32-byte key). This is
539+ much faster than iterating over the state trie (as import_state does)
540+ but imports too much. If a geth node has been running for a while (and
541+ started and stopped a lot) then there will be a lot of unimportant
542+ state entries.
543+ """
544+ )
545+ sweep_state_parser .add_argument ('-gethdb' , type = str , required = True )
546+ sweep_state_parser .add_argument ('-destdb' , type = str , required = True )
547+
548+ import_body_range_parser = subparsers .add_parser (
549+ 'import_body_range' ,
550+ help = "Imports block bodies (transactions and uncles, but not receipts)" ,
551+ description = """
552+ block bodies take a while to import so this command lets you import
553+ just the segment you need. -startblock and -endblock are inclusive.
554+ """
555+ )
556+ import_body_range_parser .add_argument ('-gethdb' , type = str , required = True )
557+ import_body_range_parser .add_argument ('-destdb' , type = str , required = True )
509558 import_body_range_parser .add_argument ('-startblock' , type = int , required = True )
510559 import_body_range_parser .add_argument ('-endblock' , type = int , required = True )
511560
512- process_blocks_parser = subparsers .add_parser ('process_blocks' )
561+ process_blocks_parser = subparsers .add_parser (
562+ 'process_blocks' ,
563+ help = "Simulates a full sync, runs each block." ,
564+ description = """
565+ Starting from trinity's canonical chain tip this fetches block bodies
566+ from the gethdb and runs each of them.
567+ """
568+ )
569+ process_blocks_parser .add_argument ('-gethdb' , type = str , required = True )
570+ process_blocks_parser .add_argument ('-destdb' , type = str , required = True )
513571 process_blocks_parser .add_argument ('-endblock' , type = int , required = True )
514572
515- read_receipts_parser = subparsers .add_parser ('read_receipts' )
573+ read_receipts_parser = subparsers .add_parser (
574+ 'read_receipts' ,
575+ help = "Helper to inspect all the receipts for a given block"
576+ )
577+ read_receipts_parser .add_argument ('-gethdb' , type = str , required = True )
516578 read_receipts_parser .add_argument ('-block' , type = int , required = True )
517579
580+ read_trinity_parser = subparsers .add_parser (
581+ 'read_trinity' ,
582+ help = "Helper to print summary statistics for a given trinitydb"
583+ )
584+ read_trinity_parser .add_argument ('-destdb' , type = str , required = True )
585+
586+ read_geth_parser = subparsers .add_parser (
587+ 'read_geth' ,
588+ help = "Helper to print summary statistics for a given gethdb"
589+ )
590+ read_geth_parser .add_argument ('-gethdb' , type = str , required = True )
591+
518592 args = parser .parse_args ()
519593
520594 if args .command == 'import_body_range' :
@@ -528,9 +602,18 @@ def read_receipts(gethdb, block_number):
528602 elif args .command == 'read_receipts' :
529603 gethdb = open_gethdb (args .gethdb )
530604 read_receipts (gethdb , args .block )
605+ elif args .command == 'read_geth' :
606+ gethdb = open_gethdb (args .gethdb )
607+ read_geth (gethdb )
608+ elif args .command == 'read_trinity' :
609+ read_trinity (args .destdb )
610+ elif args .command == 'import_headers' :
611+ gethdb = open_gethdb (args .gethdb )
612+ chain = open_trinitydb (args .destdb )
613+ import_headers (gethdb , chain )
614+ elif args .command == 'sweep_state' :
615+ gethdb = open_gethdb (args .gethdb )
616+ chain = open_trinitydb (args .destdb )
617+ sweep_state (gethdb , chain .headerdb .db )
531618 else :
532- main (args )
533-
534- logger .warning ('Some features are not yet implemented:' )
535- logger .warning ('- Receipts were not imported' )
536- logger .warning ('- This script did not verify that the chain configs match' )
619+ logger .error (f'unrecognized command. command={ args .command } ' )
0 commit comments