55"""
66
77import argparse
8+ import logging
89import os
910import os .path
1011from pathlib import Path
1617
1718from eth_utils import humanize_hash
1819import rlp
20+ from rlp .sedes import CountableList
1921
2022from eth .chains .mainnet import MAINNET_GENESIS_HEADER , MainnetChain
2123from eth .db .backends .level import LevelDB
2224from eth .rlp .headers import BlockHeader
25+ from eth .rlp .transactions import BaseTransactionFields
26+
27+
28+ logger = logging .getLogger ('importer' )
2329
2430
2531class GethKeys :
@@ -31,6 +37,8 @@ class GethKeys:
3137 headerNumberPrefix = b'H'
3238 headerHashSuffix = b'n'
3339
40+ blockBodyPrefix = b'b'
41+
3442 @classmethod
3543 def header_hash_for_block_number (cls , block_number : int ) -> bytes :
3644 "The key to get the hash of the header with the given block number"
@@ -47,6 +55,11 @@ def block_header(cls, block_number: int, header_hash: bytes) -> bytes:
4755 packed_block_number = struct .pack ('>Q' , block_number )
4856 return cls .headerPrefix + packed_block_number + header_hash
4957
58+ @classmethod
59+ def block_body (cls , block_number : int , header_hash : bytes ) -> bytes :
60+ packed_block_number = struct .pack ('>Q' , block_number )
61+ return cls .blockBodyPrefix + packed_block_number + header_hash
62+
5063
5164class GethFreezerIndexEntry :
5265 def __init__ (self , filenum : int , offset : int ):
@@ -68,23 +81,23 @@ def __init__(self, ancient_path, name, uses_compression):
6881 self .ancient_path = ancient_path
6982 self .name = name
7083 self .uses_compression = uses_compression
71- print (f'opening freezer table. name={ self .name } ' )
84+ logger . debug (f'opening freezer table. name={ self .name } ' )
7285
7386 self .index_file = open (os .path .join (ancient_path , self .index_file_name ), 'rb' )
7487 stat_result = os .stat (self .index_file .fileno ())
7588 index_file_size = stat_result .st_size
7689 assert index_file_size % 6 == 0 , index_file_size
77- print (f'index_size={ index_file_size } ({ index_file_size // 6 } entries)' )
90+ logger . debug (f'index_size={ index_file_size } ({ index_file_size // 6 } entries)' )
7891 self .entries = index_file_size // 6
7992
8093 first_index_bytes = self .index_file .read (6 )
8194 first_index = GethFreezerIndexEntry .from_bytes (first_index_bytes )
82- print (f'first_index={ first_index } ' )
95+ logger . debug (f'first_index={ first_index } ' )
8396
8497 self .index_file .seek (- 6 , 2 )
8598 last_index_bytes = self .index_file .read (6 )
8699 last_index = GethFreezerIndexEntry .from_bytes (last_index_bytes )
87- print (f'last_index={ last_index } ' )
100+ logger . debug (f'last_index={ last_index } ' )
88101
89102 self ._data_files = dict ()
90103
@@ -137,6 +150,17 @@ def __del__(self) -> None:
137150 self .index_file .close ()
138151
139152
153+ class BlockBody (rlp .Serializable ):
154+ "This is how geth stores block bodies"
155+ fields = [
156+ ('transactions' , CountableList (BaseTransactionFields )),
157+ ('uncles' , CountableList (BlockHeader )),
158+ ]
159+
160+ def __repr__ (self ) -> str :
161+ return f'BlockBody(txns={ self .transactions } , uncles={ self .uncles } )'
162+
163+
140164class GethDatabase :
141165 def __init__ (self , path ):
142166 self .db = plyvel .DB (
@@ -149,6 +173,7 @@ def __init__(self, path):
149173 ancient_path = os .path .join (path , 'ancient' )
150174 self .ancient_hashes = GethFreezerTable (ancient_path , 'hashes' , False )
151175 self .ancient_headers = GethFreezerTable (ancient_path , 'headers' , True )
176+ self .ancient_bodies = GethFreezerTable (ancient_path , 'bodies' , True )
152177
153178 if self .database_version != b'\x07 ' :
154179 raise Exception (f'geth database version { self .database_version } is not supported' )
@@ -166,8 +191,10 @@ def block_num_for_hash(self, header_hash: bytes) -> int:
166191 raw_num = self .db .get (GethKeys .block_number_for_header_hash (header_hash ))
167192 return struct .unpack ('>Q' , raw_num )[0 ]
168193
169- def block_header (self , block_number : int , header_hash : bytes ) -> BlockHeader :
170- # This also needs to check the ancient db
194+ def block_header (self , block_number : int , header_hash : bytes = None ) -> BlockHeader :
195+ if header_hash is None :
196+ header_hash = self .header_hash_for_block_number (block_number )
197+
171198 raw_data = self .db .get (GethKeys .block_header (block_number , header_hash ))
172199 if raw_data is not None :
173200 return rlp .decode (raw_data , sedes = BlockHeader )
@@ -184,28 +211,34 @@ def header_hash_for_block_number(self, block_number: int) -> bytes:
184211
185212 return self .ancient_hashes .get (block_number )
186213
214+ def block_body (self , block_number : int , header_hash : bytes = None ):
215+ if header_hash is None :
216+ header_hash = self .header_hash_for_block_number (block_number )
217+
218+ raw_data = self .db .get (GethKeys .block_body (block_number , header_hash ))
219+ if raw_data is not None :
220+ return rlp .decode (raw_data , sedes = BlockBody )
221+
222+ raw_data = self .ancient_bodies .get (block_number )
223+ return rlp .decode (raw_data , sedes = BlockBody )
224+
187225
188226def main (args ):
189- # Open geth database
227+ # 1. Open Geth database
228+
190229 gethdb = GethDatabase (args .gethdb )
191230
192231 last_block = gethdb .last_block_hash
193232 last_block_num = gethdb .block_num_for_hash (last_block )
194- print ('geth database opened' )
195- print (f'found chain tip: header_hash={ humanize_hash (last_block )} block_number={ last_block_num } ' )
196-
197- print (f'header: { len (gethdb .block_header (last_block_num , last_block ))} ' )
233+ logger .info ('geth database opened' )
234+ logger .info (f'found geth chain tip: header_hash={ humanize_hash (last_block )} block_number={ last_block_num } ' )
198235
199236 genesis_hash = gethdb .header_hash_for_block_number (0 )
200237 genesis_header = gethdb .block_header (0 , genesis_hash )
201- print (f'genesis header: { genesis_header } ' )
202238 assert genesis_header == MAINNET_GENESIS_HEADER
239+ logger .info (f'geth genesis header matches expected genesis' )
203240
204- first_hash = gethdb .header_hash_for_block_number (1 )
205- first_block = gethdb .block_header (1 , first_hash )
206- print (f'first header: { first_block } ' )
207-
208- # Create trinity database
241+ # 2. Create trinity database
209242
210243 db_already_existed = False
211244 if os .path .exists (args .destdb ):
@@ -214,33 +247,51 @@ def main(args):
214247 leveldb = LevelDB (db_path = Path (args .destdb ), max_open_files = 16 )
215248
216249 if not db_already_existed :
217- print (f'Trinity database did not already exist, initializing it now' )
250+ logger . info (f'Trinity database did not already exist, initializing it now' )
218251 chain = MainnetChain .from_genesis_header (leveldb , MAINNET_GENESIS_HEADER )
219252 else :
220253 chain = MainnetChain (leveldb )
221254
222255 headerdb = chain .headerdb
223256
257+ # 3. Import headers + bodies
258+
224259 canonical_head = headerdb .get_canonical_head ()
225- print (f'starting copy from trinity\' s canonical head: { canonical_head } ' )
260+ logger . info (f'starting import from trinity\' s canonical head: { canonical_head } ' )
226261
227- # verify the trinity database matches what geth has
262+ # fail fast if geth disagrees with trinity's canonical head
228263 geth_header = gethdb .block_header (canonical_head .block_number , canonical_head .hash )
229264 assert geth_header .hash == canonical_head .hash
230265
231266 for i in range (canonical_head .block_number , last_block_num + 1 ):
232267 header_hash = gethdb .header_hash_for_block_number (i )
233268 header = gethdb .block_header (i , header_hash )
234269
235- headerdb .persist_header (header )
270+ body = gethdb .block_body (i )
271+ block_class = chain .get_vm_class (header ).get_block_class ()
272+ block = block_class (header , body .transactions , body .uncles )
273+ chain .chaindb .persist_block (block )
236274
237275 if i % 1000 == 0 :
238- print (f'current canonical header: { headerdb .get_canonical_head ()} ' )
276+ logger . debug (f'current canonical header: { headerdb .get_canonical_head ()} ' )
239277
240- return
278+ # some final checks, these should never fail
279+ canonical_head = headerdb .get_canonical_head ()
280+ geth_last_block_hash = gethdb .last_block_hash
281+ geth_last_block_num = gethdb .block_num_for_hash (geth_last_block_hash )
282+ assert canonical_head .hash == geth_last_block_hash
283+ assert canonical_head .block_number == geth_last_block_num
284+
285+ logger .info ('finished importing headers + bodies' )
241286
242287
243288if __name__ == "__main__" :
289+ logging .basicConfig (
290+ level = logging .DEBUG ,
291+ format = '%(asctime)s.%(msecs)03d %(levelname)s: %(message)s' ,
292+ datefmt = '%H:%M:%S'
293+ )
294+
244295 parser = argparse .ArgumentParser ()
245296 parser .add_argument ('-gethdb' , type = str , required = True )
246297 parser .add_argument ('-destdb' , type = str , required = True )
0 commit comments