From 93b34eca89f6e33f95d9136f1362e273e66b7613 Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 30 Jul 2019 15:38:09 -0700 Subject: [PATCH 1/8] Persist block diffs --- eth/db/account.py | 86 ++++++++++++++++++++++ eth/db/block_diff.py | 110 ++++++++++++++++++++++++++++ eth/db/schema.py | 5 ++ eth/db/storage.py | 12 +++ eth/vm/base.py | 6 +- eth/vm/state.py | 6 ++ tests/core/vm/test_block_diffs.py | 118 ++++++++++++++++++++++++++++++ 7 files changed, 342 insertions(+), 1 deletion(-) create mode 100644 eth/db/block_diff.py create mode 100644 tests/core/vm/test_block_diffs.py diff --git a/eth/db/account.py b/eth/db/account.py index 4acc7a3df3..68490697e3 100644 --- a/eth/db/account.py +++ b/eth/db/account.py @@ -19,7 +19,9 @@ ) from eth_utils import ( + big_endian_to_int, encode_hex, + int_to_big_endian, to_checksum_address, to_tuple, ValidationError, @@ -41,6 +43,9 @@ from eth.db.batch import ( BatchDB, ) +from eth.db.block_diff import ( + BlockDiff, +) from eth.db.cache import ( CacheDB, ) @@ -565,6 +570,87 @@ def persist(self) -> None: self._batchdb.commit_to(write_batch, apply_deletes=False) self._root_hash_at_last_persist = new_root_hash + def persist_with_block_diff(self, block_hash: Hash32) -> None: + """ + Persists, including a diff which can be used to unwind/replay the changes this block makes. + """ + + block_diff = BlockDiff(block_hash) + + # 1. Grab all the changed accounts and their previous values + + changed_accounts = self._changed_accounts() + for deleted_key in changed_accounts.deleted_keys(): + # TODO: test that from_journal=False works + # DBDiff gave me bytes, but I need an address here... + deleted_address = cast(Address, deleted_key) + current_value = self._get_encoded_account(deleted_address, from_journal=False) + if current_value == b'': + raise Exception('a non-existant account cannot be deleted') + block_diff.set_account_changed(deleted_address, current_value, b'') + + for key, value in changed_accounts.pending_items(): + # TODO: key -> address + current_value = self._get_encoded_account(cast(Address, key), from_journal=False) + block_diff.set_account_changed(cast(Address, key), current_value, value) + + # 2. Grab all the changed storage items and their previous values. + dirty_stores = tuple(self._dirty_account_stores()) + for address, store in dirty_stores: + diff = store.diff() + + for key in diff.deleted_keys(): + slot = big_endian_to_int(key) + current_slot_value = store.get(slot, from_journal=False) + current_slot_value_bytes = int_to_big_endian(current_slot_value) + # TODO: Is b'' a valid value for a storage slot? + block_diff.set_storage_changed(address, slot, b'', current_slot_value_bytes) + + for key, new_value in diff.pending_items(): + slot = big_endian_to_int(key) + old_value = store.get(slot, from_journal=False) + # TODO: this seems incredibly wrong + if old_value == 0: + old_value_bytes = b'' + else: + old_value_bytes = int_to_big_endian(old_value) + block_diff.set_storage_changed(address, slot, old_value_bytes, new_value) + + old_account_values: Dict[Address, bytes] = dict() + for address, _ in dirty_stores: + old_account_values[address] = self._get_encoded_account(address, from_journal=False) + + # 3. Persist! + self.persist() + + # 4. Grab the new storage roots + for address, _store in dirty_stores: + old_account_value = old_account_values[address] + new_account_value = self._get_encoded_account(address, from_journal=False) + block_diff.set_account_changed(address, old_account_value, new_account_value) + + # 5. persist the block diff + block_diff.write_to(self._raw_store_db) + + def _changed_accounts(self) -> DBDiff: + """ + Returns all the accounts which will be written to the db when persist() is called. + + Careful! If some storage items have changed then the storage roots for some accounts + should also change but those accounts will not show up here unless something else about + them also changed. + """ + return self._journaltrie.diff() + + def _changed_storage_items(self) -> Dict[Address, DBDiff]: + """ + Returns all the storage items which will be written to the db when persist() is called. + """ + return { + address: store.diff() + for address, store in self._dirty_account_stores() + } + def _validate_generated_root(self) -> None: db_diff = self._journaldb.diff() if len(db_diff): diff --git a/eth/db/block_diff.py b/eth/db/block_diff.py new file mode 100644 index 0000000000..3371cffe49 --- /dev/null +++ b/eth/db/block_diff.py @@ -0,0 +1,110 @@ +from collections import defaultdict +from typing import ( + cast, + Dict, + Iterable, + NamedTuple, + Optional, + Tuple, +) + +from eth_typing import ( + Address, + Hash32, +) +import rlp + +from eth.db.backends.base import BaseDB +from eth.db.schema import SchemaTurbo +from eth.rlp.accounts import Account + + +class Change(NamedTuple): + old: object + new: object + + +class BlockDiff: + """ + TODO: I'm not sure where this class belongs + """ + + def __init__(self, block_hash: Hash32) -> None: + self.block_hash = block_hash + + self.changed_accounts: Dict[Address, Change] = dict() + self.changed_storage_items: Dict[Address, Dict[int, Change]] = defaultdict(dict) + + def set_account_changed(self, address: Address, old: bytes, new: bytes) -> None: + self.changed_accounts[address] = Change(old, new) + + def set_storage_changed(self, address: Address, slot: int, old: bytes, new: bytes) -> None: + self.changed_storage_items[address][slot] = Change(old, new) + + def get_changed_accounts(self) -> Iterable[Address]: + return tuple( + set(self.changed_accounts.keys()) | set(self.changed_storage_items.keys()) + ) + + def get_changed_storage_items(self) -> Iterable[Tuple[Address, int, int, int]]: + def storage_items_to_diff(item: Dict[int, Change]) -> Iterable[Tuple[int, int, int]]: + return [ + (key, cast(int, change.old), cast(int, change.new)) + for key, change in item.items() + ] + + return [ + (acct, key, old_value, new_value) + for acct, value in self.changed_storage_items.items() + for key, old_value, new_value in storage_items_to_diff(value) + ] + + def get_account(self, address: Address, new: bool = True) -> bytes: + change = self.changed_accounts[address] + return cast(bytes, change.new) if new else cast(bytes, change.old) + + def get_decoded_account(self, address: Address, new: bool = True) -> Optional[Account]: + encoded = self.get_account(address, new) + if encoded == b'': + return None # this means the account used to or currently does not exist + return rlp.decode(encoded, sedes=Account) + + @classmethod + def from_db(cls, db: BaseDB, block_hash: Hash32) -> 'BlockDiff': + """ + KeyError is thrown if a diff was not saved for the provided {block_hash} + """ + + encoded_diff = db[SchemaTurbo.make_block_diff_lookup_key(block_hash)] + diff = rlp.decode(encoded_diff) + + accounts, storage_items = diff + + block_diff = cls(block_hash) + + for key, old, new in accounts: + block_diff.set_account_changed(key, old, new) + + for key, slot, old, new in storage_items: + block_diff.set_storage_changed(key, slot, old, new) + + return block_diff + + def write_to(self, db: BaseDB) -> None: + + # TODO: this should probably verify that the state roots have all been added + + accounts = [ + [key, value.old, value.new] + for key, value in self.changed_accounts.items() + ] + + storage_items = self.get_changed_storage_items() + + diff = [ + accounts, + storage_items + ] + + encoded_diff = rlp.encode(diff) + db[SchemaTurbo.make_block_diff_lookup_key(self.block_hash)] = encoded_diff diff --git a/eth/db/schema.py b/eth/db/schema.py index 1efab2c65b..e506729e8a 100644 --- a/eth/db/schema.py +++ b/eth/db/schema.py @@ -62,6 +62,11 @@ def make_transaction_hash_to_block_lookup_key(transaction_hash: Hash32) -> bytes class SchemaTurbo(SchemaV1): current_schema_lookup_key: bytes = b'current-schema' + _block_diff_prefix = b'block-diff' + + @classmethod + def make_block_diff_lookup_key(cls, block_hash: Hash32) -> bytes: + return cls._block_diff_prefix + b':' + block_hash def get_schema(db: BaseDB) -> Schemas: diff --git a/eth/db/storage.py b/eth/db/storage.py index 3b5d79b065..38d85fd8ec 100644 --- a/eth/db/storage.py +++ b/eth/db/storage.py @@ -31,6 +31,9 @@ from eth.db.cache import ( CacheDB, ) +from eth.db.diff import ( + DBDiff +) from eth.db.journal import ( JournalDB, ) @@ -271,3 +274,12 @@ def persist(self, db: BaseDB) -> None: self._validate_flushed() if self._storage_lookup.has_changed_root: self._storage_lookup.commit_to(db) + + def diff(self) -> DBDiff: + """ + Returns all the changes that would be saved if persist() were called. + + Note: Calling make_storage_root() wipes away changes, after it is called this method will + return an empty diff. + """ + return self._journal_storage.diff() diff --git a/eth/vm/base.py b/eth/vm/base.py index a932e0d0ee..04d018f940 100644 --- a/eth/vm/base.py +++ b/eth/vm/base.py @@ -675,7 +675,11 @@ def finalize_block(self, block: BaseBlock) -> BaseBlock: # We need to call `persist` here since the state db batches # all writes until we tell it to write to the underlying db - self.state.persist() + # self.state.persist() + + # TODO: only do this if we're in turbo mode + # TODO: will we always know the hash here? + self.state.persist_with_block_diff(block.hash) return block.copy(header=block.header.copy(state_root=self.state.state_root)) diff --git a/eth/vm/state.py b/eth/vm/state.py index d691bc2c54..840cca5bc7 100644 --- a/eth/vm/state.py +++ b/eth/vm/state.py @@ -253,6 +253,12 @@ def commit(self, snapshot: Tuple[Hash32, UUID]) -> None: def persist(self) -> None: self._account_db.persist() + def persist_with_block_diff(self, block_hash: Hash32) -> None: + """ + Persists all changes and also saves a record of them to the database. + """ + self._account_db.persist_with_block_diff(block_hash) + # # Access self.prev_hashes (Read-only) # diff --git a/tests/core/vm/test_block_diffs.py b/tests/core/vm/test_block_diffs.py new file mode 100644 index 0000000000..db302e84fe --- /dev/null +++ b/tests/core/vm/test_block_diffs.py @@ -0,0 +1,118 @@ +import pytest + +from eth_utils import int_to_big_endian + +from eth_hash.auto import keccak + +from eth.constants import BLANK_ROOT_HASH +from eth.db.atomic import AtomicDB +from eth.db.block_diff import BlockDiff +from eth.db.account import AccountDB +from eth.db.storage import StorageLookup + +ACCOUNT = b'\xaa' * 20 +BLOCK_HASH = keccak(b'one') + +""" +TODO: Some tests remain to be written: +- Are we sure that old accounts are being recorded? Start from an account which already has a value + and check that it is saved. +- Check that deleting an account is correctly saved. +- Test that this behavior is trigger during block import (if Turbo-mode is enabled) +- Test for more fields, such as account balances. +- Test that this works even under calls to things like commit() and snapshot() +- Test that these diffs can be applied to something and the correct resulting state obtained +""" + + +@pytest.fixture +def base_db(): + return AtomicDB() + + +@pytest.fixture +def account_db(base_db): + return AccountDB(base_db) + + +# Some basic tests that BlockDiff works as expected and can round-trip data to the database + + +def test_no_such_diff_raises_key_error(base_db): + with pytest.raises(KeyError): + BlockDiff.from_db(base_db, BLOCK_HASH) + + +def test_can_persist_empty_block_diff(base_db): + orig = BlockDiff(BLOCK_HASH) + orig.write_to(base_db) + + block_diff = BlockDiff.from_db(base_db, BLOCK_HASH) + assert len(block_diff.get_changed_accounts()) == 0 + + +def test_can_persist_changed_account(base_db): + orig = BlockDiff(BLOCK_HASH) + orig.set_account_changed(ACCOUNT, b'old', b'new') # TODO: more realistic data + orig.write_to(base_db) + + block_diff = BlockDiff.from_db(base_db, BLOCK_HASH) + assert block_diff.get_changed_accounts() == (ACCOUNT,) + assert block_diff.get_account(ACCOUNT, new=True) == b'new' + assert block_diff.get_account(ACCOUNT, new=False) == b'old' + + +# Some tests that AccountDB saves a block diff when persist()ing + + +def test_account_diffs(account_db): + account_db.set_nonce(ACCOUNT, 10) + account_db.persist_with_block_diff(BLOCK_HASH) + + diff = BlockDiff.from_db(account_db._raw_store_db, BLOCK_HASH) + assert diff.get_changed_accounts() == (ACCOUNT, ) + new_account = diff.get_decoded_account(ACCOUNT, new=True) + assert new_account.nonce == 10 + + assert diff.get_decoded_account(ACCOUNT, new=False) is None + + +def test_persists_storage_changes(account_db): + account_db.set_storage(ACCOUNT, 1, 10) + account_db.persist_with_block_diff(BLOCK_HASH) + + diff = BlockDiff.from_db(account_db._raw_store_db, BLOCK_HASH) + assert diff.get_changed_accounts() == (ACCOUNT, ) + + key = int_to_big_endian(1) + + # TODO: provide some interface, this shouldn't be reading items directly out of the diff + assert ACCOUNT in diff.changed_storage_items + assert tuple(diff.changed_storage_items[ACCOUNT].keys()) == (key,) + assert diff.changed_storage_items[ACCOUNT][key].old == b'' + assert diff.changed_storage_items[ACCOUNT][key].new == bytes([10]) + + +def test_persists_state_root(account_db): + """ + When the storage items change the account's storage root also changes and that change also + needs to be persisted. + """ + + # First, compute the expected new storage root + db = AtomicDB() + example_lookup = StorageLookup(db, BLANK_ROOT_HASH, ACCOUNT) + key = int_to_big_endian(1) + example_lookup[key] = int_to_big_endian(10) + expected_root = example_lookup.get_changed_root() + + # Next, make the same change to out storage + account_db.set_storage(ACCOUNT, 1, 10) + account_db.persist_with_block_diff(BLOCK_HASH) + + # The new state root should have been included as part of the diff. + + diff = BlockDiff.from_db(account_db._raw_store_db, BLOCK_HASH) + assert diff.get_changed_accounts() == (ACCOUNT, ) + new_account = diff.get_decoded_account(ACCOUNT, new=True) + assert new_account.storage_root == expected_root From 5b53eeba28f60fb8d1a505b3776c8ae25fa04296 Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 30 Jul 2019 19:13:12 -0700 Subject: [PATCH 2/8] Add test for updating a non-None state --- eth/db/account.py | 6 +----- tests/core/vm/test_block_diffs.py | 23 ++++++++++++++++++++++- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/eth/db/account.py b/eth/db/account.py index 68490697e3..4e01793e98 100644 --- a/eth/db/account.py +++ b/eth/db/account.py @@ -609,11 +609,7 @@ def persist_with_block_diff(self, block_hash: Hash32) -> None: for key, new_value in diff.pending_items(): slot = big_endian_to_int(key) old_value = store.get(slot, from_journal=False) - # TODO: this seems incredibly wrong - if old_value == 0: - old_value_bytes = b'' - else: - old_value_bytes = int_to_big_endian(old_value) + old_value_bytes = int_to_big_endian(old_value) block_diff.set_storage_changed(address, slot, old_value_bytes, new_value) old_account_values: Dict[Address, bytes] = dict() diff --git a/tests/core/vm/test_block_diffs.py b/tests/core/vm/test_block_diffs.py index db302e84fe..5915326bf7 100644 --- a/tests/core/vm/test_block_diffs.py +++ b/tests/core/vm/test_block_diffs.py @@ -22,6 +22,8 @@ - Test for more fields, such as account balances. - Test that this works even under calls to things like commit() and snapshot() - Test that these diffs can be applied to something and the correct resulting state obtained +- What if you change an account's balance and also a storage item? +- What do you do if delete_storage is called? """ @@ -89,7 +91,7 @@ def test_persists_storage_changes(account_db): # TODO: provide some interface, this shouldn't be reading items directly out of the diff assert ACCOUNT in diff.changed_storage_items assert tuple(diff.changed_storage_items[ACCOUNT].keys()) == (key,) - assert diff.changed_storage_items[ACCOUNT][key].old == b'' + assert diff.changed_storage_items[ACCOUNT][key].old == bytes([0]) assert diff.changed_storage_items[ACCOUNT][key].new == bytes([10]) @@ -116,3 +118,22 @@ def test_persists_state_root(account_db): assert diff.get_changed_accounts() == (ACCOUNT, ) new_account = diff.get_decoded_account(ACCOUNT, new=True) assert new_account.storage_root == expected_root + + +def test_two_changes(account_db): + account_db.set_storage(ACCOUNT, 1, 10) + account_db.persist() + + account_db.set_storage(ACCOUNT, 1, 20) + account_db.persist_with_block_diff(BLOCK_HASH) + + diff = BlockDiff.from_db(account_db._raw_store_db, BLOCK_HASH) + assert diff.get_changed_accounts() == (ACCOUNT, ) + + key = int_to_big_endian(1) + + # TODO: provide some interface, this shouldn't be reading items directly out of the diff + assert ACCOUNT in diff.changed_storage_items + assert tuple(diff.changed_storage_items[ACCOUNT].keys()) == (key,) + assert diff.changed_storage_items[ACCOUNT][key].old == bytes([10]) + assert diff.changed_storage_items[ACCOUNT][key].new == bytes([20]) From a71da2abddb9457f829c24214e43a64216ebe5b1 Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 30 Jul 2019 19:28:11 -0700 Subject: [PATCH 3/8] Add some more tests --- tests/core/vm/test_block_diffs.py | 43 ++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/tests/core/vm/test_block_diffs.py b/tests/core/vm/test_block_diffs.py index 5915326bf7..48d9fe41d5 100644 --- a/tests/core/vm/test_block_diffs.py +++ b/tests/core/vm/test_block_diffs.py @@ -15,15 +15,9 @@ """ TODO: Some tests remain to be written: -- Are we sure that old accounts are being recorded? Start from an account which already has a value - and check that it is saved. -- Check that deleting an account is correctly saved. - Test that this behavior is trigger during block import (if Turbo-mode is enabled) -- Test for more fields, such as account balances. - Test that this works even under calls to things like commit() and snapshot() - Test that these diffs can be applied to something and the correct resulting state obtained -- What if you change an account's balance and also a storage item? -- What do you do if delete_storage is called? """ @@ -120,7 +114,7 @@ def test_persists_state_root(account_db): assert new_account.storage_root == expected_root -def test_two_changes(account_db): +def test_two_storage_changes(account_db): account_db.set_storage(ACCOUNT, 1, 10) account_db.persist() @@ -137,3 +131,38 @@ def test_two_changes(account_db): assert tuple(diff.changed_storage_items[ACCOUNT].keys()) == (key,) assert diff.changed_storage_items[ACCOUNT][key].old == bytes([10]) assert diff.changed_storage_items[ACCOUNT][key].new == bytes([20]) + + +def test_account_and_storage_change(account_db): + account_db.set_balance(ACCOUNT, 100) + account_db.set_storage(ACCOUNT, 1, 10) + + account_db.persist_with_block_diff(BLOCK_HASH) + + diff = BlockDiff.from_db(account_db._raw_store_db, BLOCK_HASH) + assert diff.get_changed_accounts() == (ACCOUNT, ) + + old_account = diff.get_decoded_account(ACCOUNT, new=False) + assert old_account is None + + new_account = diff.get_decoded_account(ACCOUNT, new=True) + assert new_account.storage_root != BLANK_ROOT_HASH + assert new_account.balance == 100 + + # TODO: also verify that the storage items have changed + + +def test_delete_account(account_db): + account_db.set_balance(ACCOUNT, 100) + account_db.persist() + + account_db.delete_account(ACCOUNT) + account_db.persist_with_block_diff(BLOCK_HASH) + + diff = BlockDiff.from_db(account_db._raw_store_db, BLOCK_HASH) + assert diff.get_changed_accounts() == (ACCOUNT, ) + old_account = diff.get_decoded_account(ACCOUNT, new=False) + new_account = diff.get_decoded_account(ACCOUNT, new=True) + + assert old_account.balance == 100 + assert new_account is None From 4668a0fd29ed0f04b5fbcd1eed949809d1dfbe7c Mon Sep 17 00:00:00 2001 From: Brian Date: Wed, 31 Jul 2019 18:55:59 -0700 Subject: [PATCH 4/8] Clean up block diffs --- eth/db/block_diff.py | 85 +++++++++++++++++++------------ tests/core/vm/test_block_diffs.py | 37 ++++++-------- 2 files changed, 67 insertions(+), 55 deletions(-) diff --git a/eth/db/block_diff.py b/eth/db/block_diff.py index 3371cffe49..b7fa6b6434 100644 --- a/eth/db/block_diff.py +++ b/eth/db/block_diff.py @@ -1,10 +1,9 @@ from collections import defaultdict from typing import ( - cast, Dict, Iterable, - NamedTuple, Optional, + Set, Tuple, ) @@ -12,6 +11,10 @@ Address, Hash32, ) +from eth_utils import ( + big_endian_to_int, + to_tuple +) import rlp from eth.db.backends.base import BaseDB @@ -19,11 +22,6 @@ from eth.rlp.accounts import Account -class Change(NamedTuple): - old: object - new: object - - class BlockDiff: """ TODO: I'm not sure where this class belongs @@ -32,36 +30,56 @@ class BlockDiff: def __init__(self, block_hash: Hash32) -> None: self.block_hash = block_hash - self.changed_accounts: Dict[Address, Change] = dict() - self.changed_storage_items: Dict[Address, Dict[int, Change]] = defaultdict(dict) + self.old_account_values: Dict[Address, Optional[bytes]] = dict() + self.new_account_values: Dict[Address, Optional[bytes]] = dict() - def set_account_changed(self, address: Address, old: bytes, new: bytes) -> None: - self.changed_accounts[address] = Change(old, new) + SLOT_TO_VALUE = Dict[int, bytes] + self.old_storage_items: Dict[Address, SLOT_TO_VALUE] = defaultdict(dict) + self.new_storage_items: Dict[Address, SLOT_TO_VALUE] = defaultdict(dict) - def set_storage_changed(self, address: Address, slot: int, old: bytes, new: bytes) -> None: - self.changed_storage_items[address][slot] = Change(old, new) + def set_account_changed(self, address: Address, old_value: bytes, new_value: bytes) -> None: + self.old_account_values[address] = old_value + self.new_account_values[address] = new_value - def get_changed_accounts(self) -> Iterable[Address]: - return tuple( - set(self.changed_accounts.keys()) | set(self.changed_storage_items.keys()) - ) + def set_storage_changed(self, address: Address, slot: int, + old_value: bytes, new_value: bytes) -> None: + self.old_storage_items[address][slot] = old_value + self.new_storage_items[address][slot] = new_value - def get_changed_storage_items(self) -> Iterable[Tuple[Address, int, int, int]]: - def storage_items_to_diff(item: Dict[int, Change]) -> Iterable[Tuple[int, int, int]]: - return [ - (key, cast(int, change.old), cast(int, change.new)) - for key, change in item.items() - ] + def get_changed_accounts(self) -> Set[Address]: + return set(self.old_account_values.keys()) | set(self.old_storage_items.keys()) - return [ - (acct, key, old_value, new_value) - for acct, value in self.changed_storage_items.items() - for key, old_value, new_value in storage_items_to_diff(value) - ] + @to_tuple + def get_changed_storage_items(self) -> Iterable[Tuple[Address, int, bytes, bytes]]: + for address in self.old_storage_items.keys(): + new_items = self.new_storage_items[address] + old_items = self.old_storage_items[address] + for slot in old_items.keys(): + yield address, slot, old_items[slot], new_items[slot] + + def get_changed_slots(self, address: Address) -> Set[int]: + """ + Returns which slots changed for the given account. + """ + if address not in self.old_storage_items.keys(): + return set() + + return set(self.old_storage_items[address].keys()) + + def get_slot_change(self, address: Address, slot: int) -> Tuple[int, int]: + if address not in self.old_storage_items: + raise Exception(f'account {address} did not change') + old_values = self.old_storage_items[address] + + if slot not in old_values: + raise Exception(f"{address}'s slot {slot} did not change") + + new_values = self.new_storage_items[address] + return big_endian_to_int(old_values[slot]), big_endian_to_int(new_values[slot]) def get_account(self, address: Address, new: bool = True) -> bytes: - change = self.changed_accounts[address] - return cast(bytes, change.new) if new else cast(bytes, change.old) + dictionary = self.new_account_values if new else self.old_account_values + return dictionary[address] def get_decoded_account(self, address: Address, new: bool = True) -> Optional[Account]: encoded = self.get_account(address, new) @@ -86,7 +104,8 @@ def from_db(cls, db: BaseDB, block_hash: Hash32) -> 'BlockDiff': block_diff.set_account_changed(key, old, new) for key, slot, old, new in storage_items: - block_diff.set_storage_changed(key, slot, old, new) + decoded_slot = big_endian_to_int(slot) # rlp.encode turns our ints into bytes + block_diff.set_storage_changed(key, decoded_slot, old, new) return block_diff @@ -95,8 +114,8 @@ def write_to(self, db: BaseDB) -> None: # TODO: this should probably verify that the state roots have all been added accounts = [ - [key, value.old, value.new] - for key, value in self.changed_accounts.items() + [address, self.old_account_values[address], self.new_account_values[address]] + for address in self.old_account_values.keys() ] storage_items = self.get_changed_storage_items() diff --git a/tests/core/vm/test_block_diffs.py b/tests/core/vm/test_block_diffs.py index 48d9fe41d5..f14a06bfac 100644 --- a/tests/core/vm/test_block_diffs.py +++ b/tests/core/vm/test_block_diffs.py @@ -18,6 +18,8 @@ - Test that this behavior is trigger during block import (if Turbo-mode is enabled) - Test that this works even under calls to things like commit() and snapshot() - Test that these diffs can be applied to something and the correct resulting state obtained +- What happens when delete_storage is called? Do all the items change? + Maybe just the deletion is recorded? """ @@ -53,7 +55,7 @@ def test_can_persist_changed_account(base_db): orig.write_to(base_db) block_diff = BlockDiff.from_db(base_db, BLOCK_HASH) - assert block_diff.get_changed_accounts() == (ACCOUNT,) + assert block_diff.get_changed_accounts() == {ACCOUNT} assert block_diff.get_account(ACCOUNT, new=True) == b'new' assert block_diff.get_account(ACCOUNT, new=False) == b'old' @@ -66,7 +68,7 @@ def test_account_diffs(account_db): account_db.persist_with_block_diff(BLOCK_HASH) diff = BlockDiff.from_db(account_db._raw_store_db, BLOCK_HASH) - assert diff.get_changed_accounts() == (ACCOUNT, ) + assert diff.get_changed_accounts() == {ACCOUNT} new_account = diff.get_decoded_account(ACCOUNT, new=True) assert new_account.nonce == 10 @@ -78,15 +80,10 @@ def test_persists_storage_changes(account_db): account_db.persist_with_block_diff(BLOCK_HASH) diff = BlockDiff.from_db(account_db._raw_store_db, BLOCK_HASH) - assert diff.get_changed_accounts() == (ACCOUNT, ) + assert diff.get_changed_accounts() == {ACCOUNT} - key = int_to_big_endian(1) - - # TODO: provide some interface, this shouldn't be reading items directly out of the diff - assert ACCOUNT in diff.changed_storage_items - assert tuple(diff.changed_storage_items[ACCOUNT].keys()) == (key,) - assert diff.changed_storage_items[ACCOUNT][key].old == bytes([0]) - assert diff.changed_storage_items[ACCOUNT][key].new == bytes([10]) + assert diff.get_changed_slots(ACCOUNT) == {1} + assert diff.get_slot_change(ACCOUNT, 1) == (0, 10) def test_persists_state_root(account_db): @@ -109,7 +106,7 @@ def test_persists_state_root(account_db): # The new state root should have been included as part of the diff. diff = BlockDiff.from_db(account_db._raw_store_db, BLOCK_HASH) - assert diff.get_changed_accounts() == (ACCOUNT, ) + assert diff.get_changed_accounts() == {ACCOUNT} new_account = diff.get_decoded_account(ACCOUNT, new=True) assert new_account.storage_root == expected_root @@ -122,15 +119,10 @@ def test_two_storage_changes(account_db): account_db.persist_with_block_diff(BLOCK_HASH) diff = BlockDiff.from_db(account_db._raw_store_db, BLOCK_HASH) - assert diff.get_changed_accounts() == (ACCOUNT, ) - - key = int_to_big_endian(1) + assert diff.get_changed_accounts() == {ACCOUNT} - # TODO: provide some interface, this shouldn't be reading items directly out of the diff - assert ACCOUNT in diff.changed_storage_items - assert tuple(diff.changed_storage_items[ACCOUNT].keys()) == (key,) - assert diff.changed_storage_items[ACCOUNT][key].old == bytes([10]) - assert diff.changed_storage_items[ACCOUNT][key].new == bytes([20]) + assert diff.get_changed_slots(ACCOUNT) == {1} + assert diff.get_slot_change(ACCOUNT, 1) == (10, 20) def test_account_and_storage_change(account_db): @@ -140,7 +132,7 @@ def test_account_and_storage_change(account_db): account_db.persist_with_block_diff(BLOCK_HASH) diff = BlockDiff.from_db(account_db._raw_store_db, BLOCK_HASH) - assert diff.get_changed_accounts() == (ACCOUNT, ) + assert diff.get_changed_accounts() == {ACCOUNT} old_account = diff.get_decoded_account(ACCOUNT, new=False) assert old_account is None @@ -149,7 +141,8 @@ def test_account_and_storage_change(account_db): assert new_account.storage_root != BLANK_ROOT_HASH assert new_account.balance == 100 - # TODO: also verify that the storage items have changed + assert diff.get_changed_slots(ACCOUNT) == {1} + assert diff.get_slot_change(ACCOUNT, 1) == (0, 10) def test_delete_account(account_db): @@ -160,7 +153,7 @@ def test_delete_account(account_db): account_db.persist_with_block_diff(BLOCK_HASH) diff = BlockDiff.from_db(account_db._raw_store_db, BLOCK_HASH) - assert diff.get_changed_accounts() == (ACCOUNT, ) + assert diff.get_changed_accounts() == {ACCOUNT} old_account = diff.get_decoded_account(ACCOUNT, new=False) new_account = diff.get_decoded_account(ACCOUNT, new=True) From ae9d625ae490e67ae8b6eaf6df0495cea84e0e7e Mon Sep 17 00:00:00 2001 From: Brian Date: Wed, 31 Jul 2019 19:27:31 -0700 Subject: [PATCH 5/8] Add test for delete_storage --- eth/db/block_diff.py | 12 +++++++++--- tests/core/vm/test_block_diffs.py | 21 +++++++++++++++++++-- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/eth/db/block_diff.py b/eth/db/block_diff.py index b7fa6b6434..b18bb1c6e9 100644 --- a/eth/db/block_diff.py +++ b/eth/db/block_diff.py @@ -22,10 +22,16 @@ from eth.rlp.accounts import Account +""" +TODO: Decide on the best interface for returning changes: +- diff.get_slot_change() -> [old, new] +- diff.get_slot_change(new=FAlse) -> old +- diff.get_slot_change(kind=BlockDiff.OLD) -> old +- diff.get_old_slot_value() & diff.get_new_slot_value() +""" + + class BlockDiff: - """ - TODO: I'm not sure where this class belongs - """ def __init__(self, block_hash: Hash32) -> None: self.block_hash = block_hash diff --git a/tests/core/vm/test_block_diffs.py b/tests/core/vm/test_block_diffs.py index f14a06bfac..81d8a7e12a 100644 --- a/tests/core/vm/test_block_diffs.py +++ b/tests/core/vm/test_block_diffs.py @@ -18,8 +18,6 @@ - Test that this behavior is trigger during block import (if Turbo-mode is enabled) - Test that this works even under calls to things like commit() and snapshot() - Test that these diffs can be applied to something and the correct resulting state obtained -- What happens when delete_storage is called? Do all the items change? - Maybe just the deletion is recorded? """ @@ -159,3 +157,22 @@ def test_delete_account(account_db): assert old_account.balance == 100 assert new_account is None + + +def test_delete_storage(account_db): + """ + This is only called before a CREATE message is processed, and CREATE messages are not + allowed to overwrite non-empty storage tries (search for "collisions" in EIP 1014), so + this operation *should* always be a no-op. Here's a quick check to ensure block diff + handles it gracefully. + """ + + account_db.set_balance(ACCOUNT, 10) + account_db.delete_storage(ACCOUNT) + account_db.persist_with_block_diff(BLOCK_HASH) + + diff = BlockDiff.from_db(account_db._raw_store_db, BLOCK_HASH) + assert diff.get_changed_slots(ACCOUNT) == set() + + new_account = diff.get_decoded_account(ACCOUNT, new=True) + assert new_account.balance == 10 From 51fa7442422eec5c51422f5dff857e39b4dc9bbd Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 1 Aug 2019 21:51:10 -0700 Subject: [PATCH 6/8] Test that block diffs are saved during block import --- eth/db/account.py | 8 +- eth/db/block_diff.py | 10 +-- eth/vm/base.py | 8 +- eth/vm/state.py | 5 +- setup.py | 1 + tests/core/chain-object/test_turbo_chain.py | 99 +++++++++++++++++++++ tests/core/vm/test_block_diffs.py | 27 +++--- 7 files changed, 133 insertions(+), 25 deletions(-) create mode 100644 tests/core/chain-object/test_turbo_chain.py diff --git a/eth/db/account.py b/eth/db/account.py index 4e01793e98..6a41cc01ae 100644 --- a/eth/db/account.py +++ b/eth/db/account.py @@ -570,12 +570,12 @@ def persist(self) -> None: self._batchdb.commit_to(write_batch, apply_deletes=False) self._root_hash_at_last_persist = new_root_hash - def persist_with_block_diff(self, block_hash: Hash32) -> None: + def persist_returning_block_diff(self) -> BlockDiff: """ Persists, including a diff which can be used to unwind/replay the changes this block makes. """ - block_diff = BlockDiff(block_hash) + block_diff = BlockDiff() # 1. Grab all the changed accounts and their previous values @@ -625,8 +625,8 @@ def persist_with_block_diff(self, block_hash: Hash32) -> None: new_account_value = self._get_encoded_account(address, from_journal=False) block_diff.set_account_changed(address, old_account_value, new_account_value) - # 5. persist the block diff - block_diff.write_to(self._raw_store_db) + # 5. return the block diff + return block_diff def _changed_accounts(self) -> DBDiff: """ diff --git a/eth/db/block_diff.py b/eth/db/block_diff.py index b18bb1c6e9..f5c3667df8 100644 --- a/eth/db/block_diff.py +++ b/eth/db/block_diff.py @@ -33,9 +33,7 @@ class BlockDiff: - def __init__(self, block_hash: Hash32) -> None: - self.block_hash = block_hash - + def __init__(self) -> None: self.old_account_values: Dict[Address, Optional[bytes]] = dict() self.new_account_values: Dict[Address, Optional[bytes]] = dict() @@ -104,7 +102,7 @@ def from_db(cls, db: BaseDB, block_hash: Hash32) -> 'BlockDiff': accounts, storage_items = diff - block_diff = cls(block_hash) + block_diff = cls() for key, old, new in accounts: block_diff.set_account_changed(key, old, new) @@ -115,7 +113,7 @@ def from_db(cls, db: BaseDB, block_hash: Hash32) -> 'BlockDiff': return block_diff - def write_to(self, db: BaseDB) -> None: + def write_to(self, db: BaseDB, block_hash: Hash32) -> None: # TODO: this should probably verify that the state roots have all been added @@ -132,4 +130,4 @@ def write_to(self, db: BaseDB) -> None: ] encoded_diff = rlp.encode(diff) - db[SchemaTurbo.make_block_diff_lookup_key(self.block_hash)] = encoded_diff + db[SchemaTurbo.make_block_diff_lookup_key(block_hash)] = encoded_diff diff --git a/eth/vm/base.py b/eth/vm/base.py index 04d018f940..f47f6a5c19 100644 --- a/eth/vm/base.py +++ b/eth/vm/base.py @@ -679,9 +679,13 @@ def finalize_block(self, block: BaseBlock) -> BaseBlock: # TODO: only do this if we're in turbo mode # TODO: will we always know the hash here? - self.state.persist_with_block_diff(block.hash) + block_diff = self.state.persist_returning_block_diff() - return block.copy(header=block.header.copy(state_root=self.state.state_root)) + result = block.copy(header=block.header.copy(state_root=self.state.state_root)) + + basedb = self.chaindb.db + block_diff.write_to(basedb, result.hash) + return result def pack_block(self, block: BaseBlock, *args: Any, **kwargs: Any) -> BaseBlock: """ diff --git a/eth/vm/state.py b/eth/vm/state.py index 840cca5bc7..c1c61ce326 100644 --- a/eth/vm/state.py +++ b/eth/vm/state.py @@ -26,6 +26,7 @@ from eth.db.account import ( BaseAccountDB, ) +from eth.db.block_diff import BlockDiff from eth.db.backends.base import ( BaseAtomicDB, ) @@ -253,11 +254,11 @@ def commit(self, snapshot: Tuple[Hash32, UUID]) -> None: def persist(self) -> None: self._account_db.persist() - def persist_with_block_diff(self, block_hash: Hash32) -> None: + def persist_returning_block_diff(self) -> BlockDiff: """ Persists all changes and also saves a record of them to the database. """ - self._account_db.persist_with_block_diff(block_hash) + return self._account_db.persist_returning_block_diff() # # Access self.prev_hashes (Read-only) diff --git a/setup.py b/setup.py index 17dcbf8c8e..468df65226 100644 --- a/setup.py +++ b/setup.py @@ -37,6 +37,7 @@ "pytest-cov==2.5.1", "pytest-watch>=4.1.0,<5", "pytest-xdist==1.18.1", + "vyper==0.1.0b11", ], 'lint': [ "flake8==3.5.0", diff --git a/tests/core/chain-object/test_turbo_chain.py b/tests/core/chain-object/test_turbo_chain.py new file mode 100644 index 0000000000..73a3d43bc5 --- /dev/null +++ b/tests/core/chain-object/test_turbo_chain.py @@ -0,0 +1,99 @@ +""" +some tests that chain correctly manipulates the turbo database +""" +import pytest + +from eth_utils.toolz import ( + assoc, +) + +from eth.db.block_diff import BlockDiff +from eth.tools._utils.vyper import ( + compile_vyper_lll, +) + +from tests.core.helpers import ( + new_transaction, +) + + +CONTRACT_ADDRESS = b'\x10' * 20 + + +@pytest.fixture +def genesis_state(base_genesis_state): + """ + A little bit of magic, this overrides the genesis_state fixture which was defined elsewhere so + chain_without_block_validation uses the genesis state specified here. + """ + + # 1. when called this contract makes a simple change to the state + code = ['SSTORE', 0, 42] + bytecode = compile_vyper_lll(code)[0] + + # 2. put that code somewhere useful + return assoc( + base_genesis_state, + CONTRACT_ADDRESS, + { + 'balance': 0, + 'nonce': 0, + 'code': bytecode, + 'storage': {}, + } + ) + + +@pytest.fixture +def chain(chain_without_block_validation): + # make things a little less verbose + return chain_without_block_validation + + +# TODO: why are many different tests run here? +def test_import_block_saves_block_diff(chain, funded_address, funded_address_private_key): + tx = new_transaction( + chain.get_vm(), + funded_address, + CONTRACT_ADDRESS, + data=b'', + private_key=funded_address_private_key, + ) + + new_block, _, _ = chain.build_block_with_transactions([tx]) + imported_block, _, _ = chain.import_block(new_block) + + imported_header = imported_block.header + imported_block_hash = imported_header.hash + + # sanity check, did the transaction go through? + assert len(imported_block.transactions) == 1 + state = chain.get_vm(imported_header).state + assert state.get_storage(CONTRACT_ADDRESS, 0) == 42 + + # the actual test, did we write out all the changes which happened? + base_db = chain.chaindb.db + diff = BlockDiff.from_db(base_db, imported_block_hash) + assert diff.get_changed_accounts() == { + funded_address, CONTRACT_ADDRESS, imported_header.coinbase + } + assert diff.get_changed_slots(CONTRACT_ADDRESS) == {0} + assert diff.get_slot_change(CONTRACT_ADDRESS, 0) == (0, 42) + + assert diff.get_changed_slots(funded_address) == set() + assert diff.get_changed_slots(imported_header.coinbase) == set() + + # do some spot checks to make sure different fields were saved + + assert diff.get_decoded_account(imported_header.coinbase, new=False) is None + new_coinbase_balance = diff.get_decoded_account(imported_header.coinbase, new=True).balance + assert new_coinbase_balance > 0 + + old_sender_balance = diff.get_decoded_account(funded_address, new=False).balance + new_sender_balance = diff.get_decoded_account(funded_address, new=True).balance + assert old_sender_balance > new_sender_balance + + old_contract_nonce = diff.get_decoded_account(CONTRACT_ADDRESS, new=False).nonce + new_contract_nonce = diff.get_decoded_account(CONTRACT_ADDRESS, new=True).nonce + assert old_contract_nonce == 0 + assert new_contract_nonce == 0 diff --git a/tests/core/vm/test_block_diffs.py b/tests/core/vm/test_block_diffs.py index 81d8a7e12a..9610e4a7ca 100644 --- a/tests/core/vm/test_block_diffs.py +++ b/tests/core/vm/test_block_diffs.py @@ -40,17 +40,17 @@ def test_no_such_diff_raises_key_error(base_db): def test_can_persist_empty_block_diff(base_db): - orig = BlockDiff(BLOCK_HASH) - orig.write_to(base_db) + orig = BlockDiff() + orig.write_to(base_db, BLOCK_HASH) block_diff = BlockDiff.from_db(base_db, BLOCK_HASH) assert len(block_diff.get_changed_accounts()) == 0 def test_can_persist_changed_account(base_db): - orig = BlockDiff(BLOCK_HASH) + orig = BlockDiff() orig.set_account_changed(ACCOUNT, b'old', b'new') # TODO: more realistic data - orig.write_to(base_db) + orig.write_to(base_db, BLOCK_HASH) block_diff = BlockDiff.from_db(base_db, BLOCK_HASH) assert block_diff.get_changed_accounts() == {ACCOUNT} @@ -61,9 +61,14 @@ def test_can_persist_changed_account(base_db): # Some tests that AccountDB saves a block diff when persist()ing +def save_block_diff(account_db, block_hash): + diff = account_db.persist_returning_block_diff() + diff.write_to(account_db._raw_store_db, block_hash) + + def test_account_diffs(account_db): account_db.set_nonce(ACCOUNT, 10) - account_db.persist_with_block_diff(BLOCK_HASH) + save_block_diff(account_db, BLOCK_HASH) diff = BlockDiff.from_db(account_db._raw_store_db, BLOCK_HASH) assert diff.get_changed_accounts() == {ACCOUNT} @@ -75,7 +80,7 @@ def test_account_diffs(account_db): def test_persists_storage_changes(account_db): account_db.set_storage(ACCOUNT, 1, 10) - account_db.persist_with_block_diff(BLOCK_HASH) + save_block_diff(account_db, BLOCK_HASH) diff = BlockDiff.from_db(account_db._raw_store_db, BLOCK_HASH) assert diff.get_changed_accounts() == {ACCOUNT} @@ -99,7 +104,7 @@ def test_persists_state_root(account_db): # Next, make the same change to out storage account_db.set_storage(ACCOUNT, 1, 10) - account_db.persist_with_block_diff(BLOCK_HASH) + save_block_diff(account_db, BLOCK_HASH) # The new state root should have been included as part of the diff. @@ -114,7 +119,7 @@ def test_two_storage_changes(account_db): account_db.persist() account_db.set_storage(ACCOUNT, 1, 20) - account_db.persist_with_block_diff(BLOCK_HASH) + save_block_diff(account_db, BLOCK_HASH) diff = BlockDiff.from_db(account_db._raw_store_db, BLOCK_HASH) assert diff.get_changed_accounts() == {ACCOUNT} @@ -127,7 +132,7 @@ def test_account_and_storage_change(account_db): account_db.set_balance(ACCOUNT, 100) account_db.set_storage(ACCOUNT, 1, 10) - account_db.persist_with_block_diff(BLOCK_HASH) + save_block_diff(account_db, BLOCK_HASH) diff = BlockDiff.from_db(account_db._raw_store_db, BLOCK_HASH) assert diff.get_changed_accounts() == {ACCOUNT} @@ -148,7 +153,7 @@ def test_delete_account(account_db): account_db.persist() account_db.delete_account(ACCOUNT) - account_db.persist_with_block_diff(BLOCK_HASH) + save_block_diff(account_db, BLOCK_HASH) diff = BlockDiff.from_db(account_db._raw_store_db, BLOCK_HASH) assert diff.get_changed_accounts() == {ACCOUNT} @@ -169,7 +174,7 @@ def test_delete_storage(account_db): account_db.set_balance(ACCOUNT, 10) account_db.delete_storage(ACCOUNT) - account_db.persist_with_block_diff(BLOCK_HASH) + save_block_diff(account_db, BLOCK_HASH) diff = BlockDiff.from_db(account_db._raw_store_db, BLOCK_HASH) assert diff.get_changed_slots(ACCOUNT) == set() From 356866389b75a36f702492a676667d5a08139d97 Mon Sep 17 00:00:00 2001 From: Brian Cloutier Date: Tue, 6 Aug 2019 19:34:03 -0700 Subject: [PATCH 7/8] Support pre-Byzantium VMs --- eth/db/account.py | 59 +++++++++++++++------ eth/db/storage.py | 13 ++++- tests/core/chain-object/test_turbo_chain.py | 9 ++-- 3 files changed, 61 insertions(+), 20 deletions(-) diff --git a/eth/db/account.py b/eth/db/account.py index 6a41cc01ae..f584275735 100644 --- a/eth/db/account.py +++ b/eth/db/account.py @@ -57,6 +57,7 @@ ) from eth.db.storage import ( AccountStorageDB, + StorageLookup, ) from eth.db.typing import ( JournalDBCheckpoint, @@ -265,6 +266,9 @@ def __init__(self, db: BaseAtomicDB, state_root: Hash32=BLANK_ROOT_HASH) -> None self._dirty_accounts: Set[Address] = set() self._root_hash_at_last_persist = state_root + self._dirty_account_rlps: Set[Address] = set() + self._deleted_accounts: Set[Address] = set() + @property def state_root(self) -> Hash32: return self._trie.root_hash @@ -441,6 +445,7 @@ def delete_account(self, address: Address) -> None: del self._journaltrie[address] self._wipe_storage(address) + self._deleted_accounts.add(address) def account_exists(self, address: Address) -> bool: validate_canonical_address(address, title="Storage Address") @@ -489,6 +494,8 @@ def _set_account(self, address: Address, account: Account) -> None: rlp_account = rlp.encode(account, sedes=Account) self._journaltrie[address] = rlp_account + self._dirty_account_rlps.add(address) + # # Record and discard API # @@ -560,6 +567,8 @@ def persist(self) -> None: # reset local storage trackers self._account_stores = {} self._dirty_accounts = set() + self._dirty_account_rlps = set() + self._deleted_accounts = set() # persist accounts self._validate_generated_root() @@ -579,20 +588,27 @@ def persist_returning_block_diff(self) -> BlockDiff: # 1. Grab all the changed accounts and their previous values - changed_accounts = self._changed_accounts() - for deleted_key in changed_accounts.deleted_keys(): + # pre-Byzantium make_storage_root is called at the end of every transaction, and + # it blows away all the changes. Create an old_trie here so we can peer into the + # state as it was at the beginning of the block. + + old_trie = CacheDB(HashTrie(HexaryTrie( + self._batchtrie, self._root_hash_at_last_persist, prune=False + ))) + + for deleted_address in self._deleted_accounts: # TODO: test that from_journal=False works # DBDiff gave me bytes, but I need an address here... deleted_address = cast(Address, deleted_key) - current_value = self._get_encoded_account(deleted_address, from_journal=False) - if current_value == b'': - raise Exception('a non-existant account cannot be deleted') - block_diff.set_account_changed(deleted_address, current_value, b'') - for key, value in changed_accounts.pending_items(): - # TODO: key -> address - current_value = self._get_encoded_account(cast(Address, key), from_journal=False) - block_diff.set_account_changed(cast(Address, key), current_value, value) + # TODO: this might raise a KeyError + old_value = old_trie[deleted_address] + block_diff.set_account_changed(deleted_address, old_value, b'') + + for address in self._dirty_account_rlps: + old_value = old_trie[address] + new_value = self._get_encoded_account(address, from_journal=True) + block_diff.set_account_changed(address, old_value, new_value) # 2. Grab all the changed storage items and their previous values. dirty_stores = tuple(self._dirty_account_stores()) @@ -601,15 +617,28 @@ def persist_returning_block_diff(self) -> BlockDiff: for key in diff.deleted_keys(): slot = big_endian_to_int(key) - current_slot_value = store.get(slot, from_journal=False) + current_slot_value = store.get(slot) current_slot_value_bytes = int_to_big_endian(current_slot_value) - # TODO: Is b'' a valid value for a storage slot? - block_diff.set_storage_changed(address, slot, b'', current_slot_value_bytes) + # TODO: Is b'' a valid value for a storage slot? 0 might be better + # TODO: this line is untested + block_diff.set_storage_changed(address, slot, current_slot_value_bytes, b'') for key, new_value in diff.pending_items(): slot = big_endian_to_int(key) - old_value = store.get(slot, from_journal=False) - old_value_bytes = int_to_big_endian(old_value) + + # make a new StorageLookup because, pre-Byzantium, make_state_root is + # called at the end of every transaction, and making the state root blows + # away all changes. If we were to ask the store for the old value it would + # tell us the state as of the beginning of the last txn, not the state as + # of the beginnig of the block. + + fresh_store = StorageLookup( + self._raw_store_db, + self._root_hash_at_last_persist, + address + ) + old_value_bytes = fresh_store.get(key) + block_diff.set_storage_changed(address, slot, old_value_bytes, new_value) old_account_values: Dict[Address, bytes] = dict() diff --git a/eth/db/storage.py b/eth/db/storage.py index 38d85fd8ec..da6b2fe4e9 100644 --- a/eth/db/storage.py +++ b/eth/db/storage.py @@ -194,6 +194,7 @@ def __init__(self, db: BaseAtomicDB, storage_root: Hash32, address: Address) -> self._storage_lookup = StorageLookup(db, storage_root, address) self._storage_cache = CacheDB(self._storage_lookup) self._journal_storage = JournalDB(self._storage_cache) + self._changes_since_last_persist = JournalDB(dict()) def get(self, slot: int, from_journal: bool=True) -> int: key = int_to_big_endian(slot) @@ -214,8 +215,10 @@ def set(self, slot: int, value: int) -> None: key = int_to_big_endian(slot) if value: self._journal_storage[key] = rlp.encode(value) + self._changes_since_last_persist[key] = rlp.encode(value) else: del self._journal_storage[key] + del self._changes_since_last_persist[key] def delete(self) -> None: self.logger.debug2( @@ -224,28 +227,34 @@ def delete(self) -> None: keccak(self._address).hex(), ) self._journal_storage.clear() + self._changes_since_last_persist.clear() self._storage_cache.reset_cache() def record(self, checkpoint: JournalDBCheckpoint) -> None: self._journal_storage.record(checkpoint) + self._changes_since_last_persist.record(checkpoint) def discard(self, checkpoint: JournalDBCheckpoint) -> None: self.logger.debug2('discard checkpoint %r', checkpoint) if self._journal_storage.has_checkpoint(checkpoint): self._journal_storage.discard(checkpoint) + self._changes_since_last_persist.discord(checkpoint) else: # if the checkpoint comes before this account started tracking, # then simply reset to the beginning self._journal_storage.reset() + self._changes_since_last_persist.reset() self._storage_cache.reset_cache() def commit(self, checkpoint: JournalDBCheckpoint) -> None: if self._journal_storage.has_checkpoint(checkpoint): self._journal_storage.commit(checkpoint) + self._changes_since_last_persist.commit(checkpoint) else: # if the checkpoint comes before this account started tracking, # then flatten all changes, without persisting self._journal_storage.flatten() + self._changes_since_last_persist.flatten() def make_storage_root(self) -> None: """ @@ -275,6 +284,8 @@ def persist(self, db: BaseDB) -> None: if self._storage_lookup.has_changed_root: self._storage_lookup.commit_to(db) + self._changes_since_last_persist.clear() + def diff(self) -> DBDiff: """ Returns all the changes that would be saved if persist() were called. @@ -282,4 +293,4 @@ def diff(self) -> DBDiff: Note: Calling make_storage_root() wipes away changes, after it is called this method will return an empty diff. """ - return self._journal_storage.diff() + return self._changes_since_last_persist.diff() diff --git a/tests/core/chain-object/test_turbo_chain.py b/tests/core/chain-object/test_turbo_chain.py index 73a3d43bc5..6194f3c3c2 100644 --- a/tests/core/chain-object/test_turbo_chain.py +++ b/tests/core/chain-object/test_turbo_chain.py @@ -50,7 +50,6 @@ def chain(chain_without_block_validation): return chain_without_block_validation -# TODO: why are many different tests run here? def test_import_block_saves_block_diff(chain, funded_address, funded_address_private_key): tx = new_transaction( chain.get_vm(), @@ -74,9 +73,11 @@ def test_import_block_saves_block_diff(chain, funded_address, funded_address_pri # the actual test, did we write out all the changes which happened? base_db = chain.chaindb.db diff = BlockDiff.from_db(base_db, imported_block_hash) - assert diff.get_changed_accounts() == { - funded_address, CONTRACT_ADDRESS, imported_header.coinbase - } + assert len(diff.get_changed_accounts()) == 3 + assert CONTRACT_ADDRESS in diff.get_changed_accounts() + assert imported_header.coinbase in diff.get_changed_accounts() + assert funded_address in diff.get_changed_accounts() + assert diff.get_changed_slots(CONTRACT_ADDRESS) == {0} assert diff.get_slot_change(CONTRACT_ADDRESS, 0) == (0, 42) From c2ebcf53c22d3b7c7c757a9757bed6c7bef873d3 Mon Sep 17 00:00:00 2001 From: Brian Cloutier Date: Tue, 6 Aug 2019 20:18:05 -0700 Subject: [PATCH 8/8] Fix some bugs in the previous commit --- eth/db/account.py | 22 ++++++++++++---------- eth/db/storage.py | 11 ++++++++--- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/eth/db/account.py b/eth/db/account.py index f584275735..0262e74305 100644 --- a/eth/db/account.py +++ b/eth/db/account.py @@ -593,14 +593,10 @@ def persist_returning_block_diff(self) -> BlockDiff: # state as it was at the beginning of the block. old_trie = CacheDB(HashTrie(HexaryTrie( - self._batchtrie, self._root_hash_at_last_persist, prune=False + self._raw_store_db, self._root_hash_at_last_persist, prune=False ))) for deleted_address in self._deleted_accounts: - # TODO: test that from_journal=False works - # DBDiff gave me bytes, but I need an address here... - deleted_address = cast(Address, deleted_key) - # TODO: this might raise a KeyError old_value = old_trie[deleted_address] block_diff.set_account_changed(deleted_address, old_value, b'') @@ -623,6 +619,17 @@ def persist_returning_block_diff(self) -> BlockDiff: # TODO: this line is untested block_diff.set_storage_changed(address, slot, current_slot_value_bytes, b'') + encoded_account = old_trie[address] + if encoded_account: + old_account = rlp.decode(encoded_account, sedes=Account) + else: + old_account = Account() + fresh_store = StorageLookup( + self._raw_store_db, + old_account.storage_root, + address + ) + for key, new_value in diff.pending_items(): slot = big_endian_to_int(key) @@ -632,11 +639,6 @@ def persist_returning_block_diff(self) -> BlockDiff: # tell us the state as of the beginning of the last txn, not the state as # of the beginnig of the block. - fresh_store = StorageLookup( - self._raw_store_db, - self._root_hash_at_last_persist, - address - ) old_value_bytes = fresh_store.get(key) block_diff.set_storage_changed(address, slot, old_value_bytes, new_value) diff --git a/eth/db/storage.py b/eth/db/storage.py index da6b2fe4e9..d881212f25 100644 --- a/eth/db/storage.py +++ b/eth/db/storage.py @@ -173,7 +173,7 @@ def __init__(self, db: BaseAtomicDB, storage_root: Hash32, address: Address) -> Keys are stored as node hashes and rlp-encoded node values. _storage_lookup is itself a pair of databases: (BatchDB -> HexaryTrie), - writes to storage lookup *are* immeditaely applied to a trie, generating + writes to storage lookup *are* immediately applied to a trie, generating the appropriate trie nodes and and root hash (via the HexaryTrie). The writes are *not* persisted to db, until _storage_lookup is explicitly instructed to, via :meth:`StorageLookup.commit_to` @@ -218,7 +218,12 @@ def set(self, slot: int, value: int) -> None: self._changes_since_last_persist[key] = rlp.encode(value) else: del self._journal_storage[key] - del self._changes_since_last_persist[key] + try: + del self._changes_since_last_persist[key] + except KeyError: + self._changes_since_last_persist[key] = b'' + del self._changes_since_last_persist[key] + def delete(self) -> None: self.logger.debug2( @@ -238,7 +243,7 @@ def discard(self, checkpoint: JournalDBCheckpoint) -> None: self.logger.debug2('discard checkpoint %r', checkpoint) if self._journal_storage.has_checkpoint(checkpoint): self._journal_storage.discard(checkpoint) - self._changes_since_last_persist.discord(checkpoint) + self._changes_since_last_persist.discard(checkpoint) else: # if the checkpoint comes before this account started tracking, # then simply reset to the beginning