From b6c62d5887e2bea38df0c294077d30ca0f6a3c97 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Fri, 18 Oct 2024 23:06:31 +0800 Subject: [PATCH] core, trie, triedb: minor changes from snapshot integration (#30599) This change ports some non-important changes from https://github.com/ethereum/go-ethereum/pull/30159, including interface renaming and some trivial refactorings. --- cmd/geth/snapshot.go | 2 +- core/blockchain.go | 6 +- core/state/snapshot/generate.go | 17 +- core/state/snapshot/generate_test.go | 94 +++++---- core/state/statedb.go | 4 +- core/state/statedb_test.go | 4 +- core/state/stateupdate.go | 15 ++ core/state/sync_test.go | 10 +- eth/handler.go | 5 - eth/protocols/snap/sync_test.go | 8 +- trie/database_test.go | 2 +- trie/iterator_test.go | 2 +- trie/secure_trie.go | 6 +- trie/sync_test.go | 16 +- trie/trie.go | 4 +- trie/trie_reader.go | 9 +- trie/trienode/node.go | 9 + trie/triestate/state.go | 53 ----- trie/verkle.go | 2 +- triedb/database.go | 46 ++--- triedb/database/database.go | 16 +- triedb/hashdb/database.go | 9 +- triedb/pathdb/buffer.go | 141 +++++++++++++ triedb/pathdb/database.go | 104 +++++----- triedb/pathdb/database_test.go | 11 +- triedb/pathdb/difflayer.go | 57 ++---- triedb/pathdb/difflayer_test.go | 8 +- triedb/pathdb/disklayer.go | 79 ++++---- triedb/pathdb/execute.go | 6 +- triedb/pathdb/flush.go | 65 ++++++ triedb/pathdb/history.go | 13 +- triedb/pathdb/history_test.go | 11 +- triedb/pathdb/journal.go | 153 ++------------ triedb/pathdb/layertree.go | 5 +- triedb/pathdb/metrics.go | 25 +-- triedb/pathdb/nodebuffer.go | 290 --------------------------- triedb/pathdb/nodes.go | 246 +++++++++++++++++++++++ triedb/pathdb/reader.go | 8 +- triedb/pathdb/states.go | 166 +++++++++++++++ triedb/states.go | 51 +++++ 40 files changed, 991 insertions(+), 787 deletions(-) delete mode 100644 trie/triestate/state.go create mode 100644 triedb/pathdb/buffer.go create mode 100644 triedb/pathdb/flush.go delete mode 100644 triedb/pathdb/nodebuffer.go create mode 100644 triedb/pathdb/nodes.go create mode 100644 triedb/pathdb/states.go create mode 100644 triedb/states.go diff --git a/cmd/geth/snapshot.go b/cmd/geth/snapshot.go index 7d713ad110..14c6826e1d 100644 --- a/cmd/geth/snapshot.go +++ b/cmd/geth/snapshot.go @@ -428,7 +428,7 @@ func traverseRawState(ctx *cli.Context) error { log.Error("Failed to open iterator", "root", root, "err", err) return err } - reader, err := triedb.Reader(root) + reader, err := triedb.NodeReader(root) if err != nil { log.Error("State is non-existent", "root", root) return nil diff --git a/core/blockchain.go b/core/blockchain.go index 02c0bbaad1..1d45a298e4 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -160,9 +160,9 @@ func (c *CacheConfig) triedbConfig(isVerkle bool) *triedb.Config { } if c.StateScheme == rawdb.PathScheme { config.PathDB = &pathdb.Config{ - StateHistory: c.StateHistory, - CleanCacheSize: c.TrieCleanLimit * 1024 * 1024, - DirtyCacheSize: c.TrieDirtyLimit * 1024 * 1024, + StateHistory: c.StateHistory, + CleanCacheSize: c.TrieCleanLimit * 1024 * 1024, + WriteBufferSize: c.TrieDirtyLimit * 1024 * 1024, } } return config diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 6d9e163075..01fb55ea4c 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -31,7 +31,6 @@ import ( "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" - "github.com/ethereum/go-ethereum/trie/trienode" "github.com/ethereum/go-ethereum/triedb" ) @@ -353,20 +352,14 @@ func (dl *diskLayer) generateRange(ctx *generatorContext, trieId *trie.ID, prefi // main account trie as a primary lookup when resolving hashes var resolver trie.NodeResolver if len(result.keys) > 0 { - mdb := rawdb.NewMemoryDatabase() - tdb := triedb.NewDatabase(mdb, triedb.HashDefaults) - defer tdb.Close() - snapTrie := trie.NewEmpty(tdb) + tr := trie.NewEmpty(nil) for i, key := range result.keys { - snapTrie.Update(key, result.vals[i]) - } - root, nodes := snapTrie.Commit(false) - if nodes != nil { - tdb.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil) - tdb.Commit(root, false) + tr.Update(key, result.vals[i]) } + _, nodes := tr.Commit(false) + hashSet := nodes.HashSet() resolver = func(owner common.Hash, path []byte, hash common.Hash) []byte { - return rawdb.ReadTrieNode(mdb, owner, path, hash, tdb.Scheme()) + return hashSet[hash] } } // Construct the trie for state iteration, reuse the trie diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go index 891111973a..56abff348d 100644 --- a/core/state/snapshot/generate_test.go +++ b/core/state/snapshot/generate_test.go @@ -57,14 +57,14 @@ func testGeneration(t *testing.T, scheme string) { // a fake one manually. We're going with a small account trie of 3 accounts, // two of which also has the same 3-slot storage trie attached. var helper = newHelper(scheme) - stRoot := helper.makeStorageTrie(common.Hash{}, []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, false) + stRoot := helper.makeStorageTrie("", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, false) helper.addTrieAccount("acc-1", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addTrieAccount("acc-2", &types.StateAccount{Balance: uint256.NewInt(2), Root: types.EmptyRootHash, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addTrieAccount("acc-3", &types.StateAccount{Balance: uint256.NewInt(3), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) - helper.makeStorageTrie(hashData([]byte("acc-1")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) - helper.makeStorageTrie(hashData([]byte("acc-3")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-3", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) root, snap := helper.CommitAndGenerate() if have, want := root, common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd"); have != want { @@ -97,7 +97,7 @@ func testGenerateExistentState(t *testing.T, scheme string) { // two of which also has the same 3-slot storage trie attached. var helper = newHelper(scheme) - stRoot := helper.makeStorageTrie(hashData([]byte("acc-1")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + stRoot := helper.makeStorageTrie("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addTrieAccount("acc-1", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapAccount("acc-1", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapStorage("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) @@ -105,7 +105,7 @@ func testGenerateExistentState(t *testing.T, scheme string) { helper.addTrieAccount("acc-2", &types.StateAccount{Balance: uint256.NewInt(2), Root: types.EmptyRootHash, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapAccount("acc-2", &types.StateAccount{Balance: uint256.NewInt(2), Root: types.EmptyRootHash, CodeHash: types.EmptyCodeHash.Bytes()}) - stRoot = helper.makeStorageTrie(hashData([]byte("acc-3")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + stRoot = helper.makeStorageTrie("acc-3", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addTrieAccount("acc-3", &types.StateAccount{Balance: uint256.NewInt(3), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapAccount("acc-3", &types.StateAccount{Balance: uint256.NewInt(3), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapStorage("acc-3", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) @@ -159,6 +159,7 @@ type testHelper struct { triedb *triedb.Database accTrie *trie.StateTrie nodes *trienode.MergedNodeSet + states *triedb.StateSet } func newHelper(scheme string) *testHelper { @@ -169,19 +170,24 @@ func newHelper(scheme string) *testHelper { } else { config.HashDB = &hashdb.Config{} // disable caching } - triedb := triedb.NewDatabase(diskdb, config) - accTrie, _ := trie.NewStateTrie(trie.StateTrieID(types.EmptyRootHash), triedb) + db := triedb.NewDatabase(diskdb, config) + accTrie, _ := trie.NewStateTrie(trie.StateTrieID(types.EmptyRootHash), db) return &testHelper{ diskdb: diskdb, - triedb: triedb, + triedb: db, accTrie: accTrie, nodes: trienode.NewMergedNodeSet(), + states: triedb.NewStateSet(), } } func (t *testHelper) addTrieAccount(acckey string, acc *types.StateAccount) { val, _ := rlp.EncodeToBytes(acc) t.accTrie.MustUpdate([]byte(acckey), val) + + accHash := hashData([]byte(acckey)) + t.states.Accounts[accHash] = val + t.states.AccountsOrigin[common.BytesToAddress([]byte(acckey))] = nil } func (t *testHelper) addSnapAccount(acckey string, acc *types.StateAccount) { @@ -201,11 +207,21 @@ func (t *testHelper) addSnapStorage(accKey string, keys []string, vals []string) } } -func (t *testHelper) makeStorageTrie(owner common.Hash, keys []string, vals []string, commit bool) common.Hash { +func (t *testHelper) makeStorageTrie(accKey string, keys []string, vals []string, commit bool) common.Hash { + owner := hashData([]byte(accKey)) + addr := common.BytesToAddress([]byte(accKey)) id := trie.StorageTrieID(types.EmptyRootHash, owner, types.EmptyRootHash) stTrie, _ := trie.NewStateTrie(id, t.triedb) for i, k := range keys { stTrie.MustUpdate([]byte(k), []byte(vals[i])) + if t.states.Storages[owner] == nil { + t.states.Storages[owner] = make(map[common.Hash][]byte) + } + if t.states.StoragesOrigin[addr] == nil { + t.states.StoragesOrigin[addr] = make(map[common.Hash][]byte) + } + t.states.Storages[owner][hashData([]byte(k))] = []byte(vals[i]) + t.states.StoragesOrigin[addr][hashData([]byte(k))] = nil } if !commit { return stTrie.Hash() @@ -222,7 +238,7 @@ func (t *testHelper) Commit() common.Hash { if nodes != nil { t.nodes.Merge(nodes) } - t.triedb.Update(root, types.EmptyRootHash, 0, t.nodes, nil) + t.triedb.Update(root, types.EmptyRootHash, 0, t.nodes, t.states) t.triedb.Commit(root, false) return root } @@ -264,23 +280,23 @@ func testGenerateExistentStateWithWrongStorage(t *testing.T, scheme string) { helper.addSnapStorage("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) // Account two, non empty root but empty database - stRoot := helper.makeStorageTrie(hashData([]byte("acc-2")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + stRoot := helper.makeStorageTrie("acc-2", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addAccount("acc-2", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) // Miss slots { // Account three, non empty root but misses slots in the beginning - helper.makeStorageTrie(hashData([]byte("acc-3")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-3", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addAccount("acc-3", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapStorage("acc-3", []string{"key-2", "key-3"}, []string{"val-2", "val-3"}) // Account four, non empty root but misses slots in the middle - helper.makeStorageTrie(hashData([]byte("acc-4")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-4", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addAccount("acc-4", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapStorage("acc-4", []string{"key-1", "key-3"}, []string{"val-1", "val-3"}) // Account five, non empty root but misses slots in the end - helper.makeStorageTrie(hashData([]byte("acc-5")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-5", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addAccount("acc-5", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapStorage("acc-5", []string{"key-1", "key-2"}, []string{"val-1", "val-2"}) } @@ -288,22 +304,22 @@ func testGenerateExistentStateWithWrongStorage(t *testing.T, scheme string) { // Wrong storage slots { // Account six, non empty root but wrong slots in the beginning - helper.makeStorageTrie(hashData([]byte("acc-6")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-6", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addAccount("acc-6", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapStorage("acc-6", []string{"key-1", "key-2", "key-3"}, []string{"badval-1", "val-2", "val-3"}) // Account seven, non empty root but wrong slots in the middle - helper.makeStorageTrie(hashData([]byte("acc-7")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-7", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addAccount("acc-7", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapStorage("acc-7", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "badval-2", "val-3"}) // Account eight, non empty root but wrong slots in the end - helper.makeStorageTrie(hashData([]byte("acc-8")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-8", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addAccount("acc-8", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapStorage("acc-8", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "badval-3"}) // Account 9, non empty root but rotated slots - helper.makeStorageTrie(hashData([]byte("acc-9")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-9", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addAccount("acc-9", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapStorage("acc-9", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-3", "val-2"}) } @@ -311,17 +327,17 @@ func testGenerateExistentStateWithWrongStorage(t *testing.T, scheme string) { // Extra storage slots { // Account 10, non empty root but extra slots in the beginning - helper.makeStorageTrie(hashData([]byte("acc-10")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-10", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addAccount("acc-10", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapStorage("acc-10", []string{"key-0", "key-1", "key-2", "key-3"}, []string{"val-0", "val-1", "val-2", "val-3"}) // Account 11, non empty root but extra slots in the middle - helper.makeStorageTrie(hashData([]byte("acc-11")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-11", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addAccount("acc-11", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapStorage("acc-11", []string{"key-1", "key-2", "key-2-1", "key-3"}, []string{"val-1", "val-2", "val-2-1", "val-3"}) // Account 12, non empty root but extra slots in the end - helper.makeStorageTrie(hashData([]byte("acc-12")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-12", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addAccount("acc-12", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapStorage("acc-12", []string{"key-1", "key-2", "key-3", "key-4"}, []string{"val-1", "val-2", "val-3", "val-4"}) } @@ -356,11 +372,11 @@ func TestGenerateExistentStateWithWrongAccounts(t *testing.T) { func testGenerateExistentStateWithWrongAccounts(t *testing.T, scheme string) { helper := newHelper(scheme) - helper.makeStorageTrie(hashData([]byte("acc-1")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) - helper.makeStorageTrie(hashData([]byte("acc-2")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) - helper.makeStorageTrie(hashData([]byte("acc-3")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) - helper.makeStorageTrie(hashData([]byte("acc-4")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) - stRoot := helper.makeStorageTrie(hashData([]byte("acc-6")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-2", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-3", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-4", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + stRoot := helper.makeStorageTrie("acc-6", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) // Trie accounts [acc-1, acc-2, acc-3, acc-4, acc-6] // Extra accounts [acc-0, acc-5, acc-7] @@ -463,10 +479,10 @@ func testGenerateMissingStorageTrie(t *testing.T, scheme string) { acc3 = hashData([]byte("acc-3")) helper = newHelper(scheme) ) - stRoot := helper.makeStorageTrie(hashData([]byte("acc-1")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) // 0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67 + stRoot := helper.makeStorageTrie("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) // 0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67 helper.addTrieAccount("acc-1", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e helper.addTrieAccount("acc-2", &types.StateAccount{Balance: uint256.NewInt(2), Root: types.EmptyRootHash, CodeHash: types.EmptyCodeHash.Bytes()}) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7 - stRoot = helper.makeStorageTrie(hashData([]byte("acc-3")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + stRoot = helper.makeStorageTrie("acc-3", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addTrieAccount("acc-3", &types.StateAccount{Balance: uint256.NewInt(3), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) // 0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2 root := helper.Commit() @@ -503,10 +519,10 @@ func testGenerateCorruptStorageTrie(t *testing.T, scheme string) { // two of which also has the same 3-slot storage trie attached. helper := newHelper(scheme) - stRoot := helper.makeStorageTrie(hashData([]byte("acc-1")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) // 0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67 + stRoot := helper.makeStorageTrie("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) // 0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67 helper.addTrieAccount("acc-1", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e helper.addTrieAccount("acc-2", &types.StateAccount{Balance: uint256.NewInt(2), Root: types.EmptyRootHash, CodeHash: types.EmptyCodeHash.Bytes()}) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7 - stRoot = helper.makeStorageTrie(hashData([]byte("acc-3")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + stRoot = helper.makeStorageTrie("acc-3", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addTrieAccount("acc-3", &types.StateAccount{Balance: uint256.NewInt(3), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) // 0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2 root := helper.Commit() @@ -542,7 +558,7 @@ func testGenerateWithExtraAccounts(t *testing.T, scheme string) { helper := newHelper(scheme) { // Account one in the trie - stRoot := helper.makeStorageTrie(hashData([]byte("acc-1")), + stRoot := helper.makeStorageTrie("acc-1", []string{"key-1", "key-2", "key-3", "key-4", "key-5"}, []string{"val-1", "val-2", "val-3", "val-4", "val-5"}, true, @@ -562,7 +578,7 @@ func testGenerateWithExtraAccounts(t *testing.T, scheme string) { } { // Account two exists only in the snapshot - stRoot := helper.makeStorageTrie(hashData([]byte("acc-2")), + stRoot := helper.makeStorageTrie("acc-2", []string{"key-1", "key-2", "key-3", "key-4", "key-5"}, []string{"val-1", "val-2", "val-3", "val-4", "val-5"}, true, @@ -618,7 +634,7 @@ func testGenerateWithManyExtraAccounts(t *testing.T, scheme string) { helper := newHelper(scheme) { // Account one in the trie - stRoot := helper.makeStorageTrie(hashData([]byte("acc-1")), + stRoot := helper.makeStorageTrie("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true, @@ -763,7 +779,7 @@ func testGenerateFromEmptySnap(t *testing.T, scheme string) { helper := newHelper(scheme) // Add 1K accounts to the trie for i := 0; i < 400; i++ { - stRoot := helper.makeStorageTrie(hashData([]byte(fmt.Sprintf("acc-%d", i))), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + stRoot := helper.makeStorageTrie(fmt.Sprintf("acc-%d", i), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addTrieAccount(fmt.Sprintf("acc-%d", i), &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) } @@ -806,7 +822,7 @@ func testGenerateWithIncompleteStorage(t *testing.T, scheme string) { // on the sensitive spots at the boundaries for i := 0; i < 8; i++ { accKey := fmt.Sprintf("acc-%d", i) - stRoot := helper.makeStorageTrie(hashData([]byte(accKey)), stKeys, stVals, true) + stRoot := helper.makeStorageTrie(accKey, stKeys, stVals, true) helper.addAccount(accKey, &types.StateAccount{Balance: uint256.NewInt(uint64(i)), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) var moddedKeys []string var moddedVals []string @@ -903,11 +919,11 @@ func TestGenerateCompleteSnapshotWithDanglingStorage(t *testing.T) { func testGenerateCompleteSnapshotWithDanglingStorage(t *testing.T, scheme string) { var helper = newHelper(scheme) - stRoot := helper.makeStorageTrie(hashData([]byte("acc-1")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + stRoot := helper.makeStorageTrie("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addAccount("acc-1", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addAccount("acc-2", &types.StateAccount{Balance: uint256.NewInt(1), Root: types.EmptyRootHash, CodeHash: types.EmptyCodeHash.Bytes()}) - helper.makeStorageTrie(hashData([]byte("acc-3")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-3", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addAccount("acc-3", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapStorage("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) @@ -943,11 +959,11 @@ func TestGenerateBrokenSnapshotWithDanglingStorage(t *testing.T) { func testGenerateBrokenSnapshotWithDanglingStorage(t *testing.T, scheme string) { var helper = newHelper(scheme) - stRoot := helper.makeStorageTrie(hashData([]byte("acc-1")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + stRoot := helper.makeStorageTrie("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addTrieAccount("acc-1", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addTrieAccount("acc-2", &types.StateAccount{Balance: uint256.NewInt(2), Root: types.EmptyRootHash, CodeHash: types.EmptyCodeHash.Bytes()}) - helper.makeStorageTrie(hashData([]byte("acc-3")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-3", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addTrieAccount("acc-3", &types.StateAccount{Balance: uint256.NewInt(3), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) populateDangling(helper.diskdb) diff --git a/core/state/statedb.go b/core/state/statedb.go index b2b4f8fb97..527d9bc08d 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -38,7 +38,6 @@ import ( "github.com/ethereum/go-ethereum/params" "github.com/ethereum/go-ethereum/trie" "github.com/ethereum/go-ethereum/trie/trienode" - "github.com/ethereum/go-ethereum/trie/triestate" "github.com/ethereum/go-ethereum/trie/utils" "github.com/holiman/uint256" "golang.org/x/sync/errgroup" @@ -1282,8 +1281,7 @@ func (s *StateDB) commitAndFlush(block uint64, deleteEmptyObjects bool) (*stateU // If trie database is enabled, commit the state update as a new layer if db := s.db.TrieDB(); db != nil { start := time.Now() - set := triestate.New(ret.accountsOrigin, ret.storagesOrigin) - if err := db.Update(ret.root, ret.originRoot, block, ret.nodes, set); err != nil { + if err := db.Update(ret.root, ret.originRoot, block, ret.nodes, ret.stateSet()); err != nil { return nil, err } s.TrieDBCommits += time.Since(start) diff --git a/core/state/statedb_test.go b/core/state/statedb_test.go index 9441834c6a..3c19ec0591 100644 --- a/core/state/statedb_test.go +++ b/core/state/statedb_test.go @@ -981,8 +981,8 @@ func testMissingTrieNodes(t *testing.T, scheme string) { ) if scheme == rawdb.PathScheme { tdb = triedb.NewDatabase(memDb, &triedb.Config{PathDB: &pathdb.Config{ - CleanCacheSize: 0, - DirtyCacheSize: 0, + CleanCacheSize: 0, + WriteBufferSize: 0, }}) // disable caching } else { tdb = triedb.NewDatabase(memDb, &triedb.Config{HashDB: &hashdb.Config{ diff --git a/core/state/stateupdate.go b/core/state/stateupdate.go index f3e6af997e..c9231f0526 100644 --- a/core/state/stateupdate.go +++ b/core/state/stateupdate.go @@ -20,6 +20,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/trie/trienode" + "github.com/ethereum/go-ethereum/triedb" ) // contractCode represents a contract code with associated metadata. @@ -131,3 +132,17 @@ func newStateUpdate(originRoot common.Hash, root common.Hash, deletes map[common nodes: nodes, } } + +// stateSet converts the current stateUpdate object into a triedb.StateSet +// object. This function extracts the necessary data from the stateUpdate +// struct and formats it into the StateSet structure consumed by the triedb +// package. +func (sc *stateUpdate) stateSet() *triedb.StateSet { + return &triedb.StateSet{ + Destructs: sc.destructs, + Accounts: sc.accounts, + AccountsOrigin: sc.accountsOrigin, + Storages: sc.storages, + StoragesOrigin: sc.storagesOrigin, + } +} diff --git a/core/state/sync_test.go b/core/state/sync_test.go index cc15422c0c..2416cda873 100644 --- a/core/state/sync_test.go +++ b/core/state/sync_test.go @@ -207,7 +207,7 @@ func testIterativeStateSync(t *testing.T, count int, commit bool, bypath bool, s for i := 0; i < len(codes); i++ { codeElements = append(codeElements, stateElement{code: codes[i]}) } - reader, err := ndb.Reader(srcRoot) + reader, err := ndb.NodeReader(srcRoot) if err != nil { t.Fatalf("state is not existent, %#x", srcRoot) } @@ -326,7 +326,7 @@ func testIterativeDelayedStateSync(t *testing.T, scheme string) { for i := 0; i < len(codes); i++ { codeElements = append(codeElements, stateElement{code: codes[i]}) } - reader, err := ndb.Reader(srcRoot) + reader, err := ndb.NodeReader(srcRoot) if err != nil { t.Fatalf("state is not existent, %#x", srcRoot) } @@ -430,7 +430,7 @@ func testIterativeRandomStateSync(t *testing.T, count int, scheme string) { for _, hash := range codes { codeQueue[hash] = struct{}{} } - reader, err := ndb.Reader(srcRoot) + reader, err := ndb.NodeReader(srcRoot) if err != nil { t.Fatalf("state is not existent, %#x", srcRoot) } @@ -523,7 +523,7 @@ func testIterativeRandomDelayedStateSync(t *testing.T, scheme string) { for _, hash := range codes { codeQueue[hash] = struct{}{} } - reader, err := ndb.Reader(srcRoot) + reader, err := ndb.NodeReader(srcRoot) if err != nil { t.Fatalf("state is not existent, %#x", srcRoot) } @@ -628,7 +628,7 @@ func testIncompleteStateSync(t *testing.T, scheme string) { addedPaths []string addedHashes []common.Hash ) - reader, err := ndb.Reader(srcRoot) + reader, err := ndb.NodeReader(srcRoot) if err != nil { t.Fatalf("state is not available %x", srcRoot) } diff --git a/eth/handler.go b/eth/handler.go index d5117584c0..b28081eef0 100644 --- a/eth/handler.go +++ b/eth/handler.go @@ -27,7 +27,6 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core" "github.com/ethereum/go-ethereum/core/forkid" - "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/txpool" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" @@ -41,7 +40,6 @@ import ( "github.com/ethereum/go-ethereum/metrics" "github.com/ethereum/go-ethereum/p2p" "github.com/ethereum/go-ethereum/p2p/enode" - "github.com/ethereum/go-ethereum/triedb/pathdb" ) const ( @@ -558,7 +556,4 @@ func (h *handler) enableSyncedFeatures() { log.Info("Snap sync complete, auto disabling") h.snapSync.Store(false) } - if h.chain.TrieDB().Scheme() == rawdb.PathScheme { - h.chain.TrieDB().SetBufferSize(pathdb.DefaultBufferSize) - } } diff --git a/eth/protocols/snap/sync_test.go b/eth/protocols/snap/sync_test.go index c97c3b99b3..d318077d99 100644 --- a/eth/protocols/snap/sync_test.go +++ b/eth/protocols/snap/sync_test.go @@ -1515,7 +1515,7 @@ func makeAccountTrieNoStorage(n int, scheme string) (string, *trie.Trie, []*kv) // Commit the state changes into db and re-create the trie // for accessing later. root, nodes := accTrie.Commit(false) - db.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil) + db.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), triedb.NewStateSet()) accTrie, _ = trie.New(trie.StateTrieID(root), db) return db.Scheme(), accTrie, entries @@ -1577,7 +1577,7 @@ func makeBoundaryAccountTrie(scheme string, n int) (string, *trie.Trie, []*kv) { // Commit the state changes into db and re-create the trie // for accessing later. root, nodes := accTrie.Commit(false) - db.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil) + db.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), triedb.NewStateSet()) accTrie, _ = trie.New(trie.StateTrieID(root), db) return db.Scheme(), accTrie, entries @@ -1626,7 +1626,7 @@ func makeAccountTrieWithStorageWithUniqueStorage(scheme string, accounts, slots nodes.Merge(set) // Commit gathered dirty nodes into database - db.Update(root, types.EmptyRootHash, 0, nodes, nil) + db.Update(root, types.EmptyRootHash, 0, nodes, triedb.NewStateSet()) // Re-create tries with new root accTrie, _ = trie.New(trie.StateTrieID(root), db) @@ -1693,7 +1693,7 @@ func makeAccountTrieWithStorage(scheme string, accounts, slots int, code, bounda nodes.Merge(set) // Commit gathered dirty nodes into database - db.Update(root, types.EmptyRootHash, 0, nodes, nil) + db.Update(root, types.EmptyRootHash, 0, nodes, triedb.NewStateSet()) // Re-create tries with new root accTrie, err := trie.New(trie.StateTrieID(root), db) diff --git a/trie/database_test.go b/trie/database_test.go index aed508b368..729d9f699b 100644 --- a/trie/database_test.go +++ b/trie/database_test.go @@ -73,7 +73,7 @@ func newTestDatabase(diskdb ethdb.Database, scheme string) *testDb { } } -func (db *testDb) Reader(stateRoot common.Hash) (database.Reader, error) { +func (db *testDb) NodeReader(stateRoot common.Hash) (database.NodeReader, error) { nodes, _ := db.dirties(stateRoot, true) return &testReader{db: db.disk, scheme: db.scheme, nodes: nodes}, nil } diff --git a/trie/iterator_test.go b/trie/iterator_test.go index b463294b09..74a1aa378c 100644 --- a/trie/iterator_test.go +++ b/trie/iterator_test.go @@ -146,7 +146,7 @@ func testNodeIteratorCoverage(t *testing.T, scheme string) { } } // Cross check the hashes and the database itself - reader, err := nodeDb.Reader(trie.Hash()) + reader, err := nodeDb.NodeReader(trie.Hash()) if err != nil { t.Fatalf("state is not available %x", trie.Hash()) } diff --git a/trie/secure_trie.go b/trie/secure_trie.go index 91fd38269f..f53b10758f 100644 --- a/trie/secure_trie.go +++ b/trie/secure_trie.go @@ -40,7 +40,7 @@ type SecureTrie = StateTrie // NewSecure creates a new StateTrie. // Deprecated: use NewStateTrie. -func NewSecure(stateRoot common.Hash, owner common.Hash, root common.Hash, db database.Database) (*SecureTrie, error) { +func NewSecure(stateRoot common.Hash, owner common.Hash, root common.Hash, db database.NodeDatabase) (*SecureTrie, error) { id := &ID{ StateRoot: stateRoot, Owner: owner, @@ -61,7 +61,7 @@ func NewSecure(stateRoot common.Hash, owner common.Hash, root common.Hash, db da // StateTrie is not safe for concurrent use. type StateTrie struct { trie Trie - db database.Database + db database.NodeDatabase preimages preimageStore hashKeyBuf [common.HashLength]byte secKeyCache map[string][]byte @@ -73,7 +73,7 @@ type StateTrie struct { // If root is the zero hash or the sha3 hash of an empty string, the // trie is initially empty. Otherwise, New will panic if db is nil // and returns MissingNodeError if the root node cannot be found. -func NewStateTrie(id *ID, db database.Database) (*StateTrie, error) { +func NewStateTrie(id *ID, db database.NodeDatabase) (*StateTrie, error) { if db == nil { panic("trie.NewStateTrie called without a database") } diff --git a/trie/sync_test.go b/trie/sync_test.go index ccdee7d014..2ff02576d4 100644 --- a/trie/sync_test.go +++ b/trie/sync_test.go @@ -183,7 +183,7 @@ func testIterativeSync(t *testing.T, count int, bypath bool, scheme string) { syncPath: NewSyncPath([]byte(paths[i])), }) } - reader, err := srcDb.Reader(srcTrie.Hash()) + reader, err := srcDb.NodeReader(srcTrie.Hash()) if err != nil { t.Fatalf("State is not available %x", srcTrie.Hash()) } @@ -258,7 +258,7 @@ func testIterativeDelayedSync(t *testing.T, scheme string) { syncPath: NewSyncPath([]byte(paths[i])), }) } - reader, err := srcDb.Reader(srcTrie.Hash()) + reader, err := srcDb.NodeReader(srcTrie.Hash()) if err != nil { t.Fatalf("State is not available %x", srcTrie.Hash()) } @@ -327,7 +327,7 @@ func testIterativeRandomSync(t *testing.T, count int, scheme string) { syncPath: NewSyncPath([]byte(paths[i])), } } - reader, err := srcDb.Reader(srcTrie.Hash()) + reader, err := srcDb.NodeReader(srcTrie.Hash()) if err != nil { t.Fatalf("State is not available %x", srcTrie.Hash()) } @@ -394,7 +394,7 @@ func testIterativeRandomDelayedSync(t *testing.T, scheme string) { syncPath: NewSyncPath([]byte(path)), } } - reader, err := srcDb.Reader(srcTrie.Hash()) + reader, err := srcDb.NodeReader(srcTrie.Hash()) if err != nil { t.Fatalf("State is not available %x", srcTrie.Hash()) } @@ -466,7 +466,7 @@ func testDuplicateAvoidanceSync(t *testing.T, scheme string) { syncPath: NewSyncPath([]byte(paths[i])), }) } - reader, err := srcDb.Reader(srcTrie.Hash()) + reader, err := srcDb.NodeReader(srcTrie.Hash()) if err != nil { t.Fatalf("State is not available %x", srcTrie.Hash()) } @@ -542,7 +542,7 @@ func testIncompleteSync(t *testing.T, scheme string) { syncPath: NewSyncPath([]byte(paths[i])), }) } - reader, err := srcDb.Reader(srcTrie.Hash()) + reader, err := srcDb.NodeReader(srcTrie.Hash()) if err != nil { t.Fatalf("State is not available %x", srcTrie.Hash()) } @@ -634,7 +634,7 @@ func testSyncOrdering(t *testing.T, scheme string) { }) reqs = append(reqs, NewSyncPath([]byte(paths[i]))) } - reader, err := srcDb.Reader(srcTrie.Hash()) + reader, err := srcDb.NodeReader(srcTrie.Hash()) if err != nil { t.Fatalf("State is not available %x", srcTrie.Hash()) } @@ -704,7 +704,7 @@ func syncWithHookWriter(t *testing.T, root common.Hash, db ethdb.Database, srcDb syncPath: NewSyncPath([]byte(paths[i])), }) } - reader, err := srcDb.Reader(root) + reader, err := srcDb.NodeReader(root) if err != nil { t.Fatalf("State is not available %x", root) } diff --git a/trie/trie.go b/trie/trie.go index 372684683c..e3f3f39248 100644 --- a/trie/trie.go +++ b/trie/trie.go @@ -83,7 +83,7 @@ func (t *Trie) Copy() *Trie { // zero hash or the sha3 hash of an empty string, then trie is initially // empty, otherwise, the root node must be present in database or returns // a MissingNodeError if not. -func New(id *ID, db database.Database) (*Trie, error) { +func New(id *ID, db database.NodeDatabase) (*Trie, error) { reader, err := newTrieReader(id.StateRoot, id.Owner, db) if err != nil { return nil, err @@ -104,7 +104,7 @@ func New(id *ID, db database.Database) (*Trie, error) { } // NewEmpty is a shortcut to create empty tree. It's mostly used in tests. -func NewEmpty(db database.Database) *Trie { +func NewEmpty(db database.NodeDatabase) *Trie { tr, _ := New(TrieID(types.EmptyRootHash), db) return tr } diff --git a/trie/trie_reader.go b/trie/trie_reader.go index adbf43d287..4b8ba808df 100644 --- a/trie/trie_reader.go +++ b/trie/trie_reader.go @@ -27,19 +27,19 @@ import ( // for concurrent usage. type trieReader struct { owner common.Hash - reader database.Reader + reader database.NodeReader banned map[string]struct{} // Marker to prevent node from being accessed, for tests } // newTrieReader initializes the trie reader with the given node reader. -func newTrieReader(stateRoot, owner common.Hash, db database.Database) (*trieReader, error) { +func newTrieReader(stateRoot, owner common.Hash, db database.NodeDatabase) (*trieReader, error) { if stateRoot == (common.Hash{}) || stateRoot == types.EmptyRootHash { if stateRoot == (common.Hash{}) { log.Error("Zero state root hash!") } return &trieReader{owner: owner}, nil } - reader, err := db.Reader(stateRoot) + reader, err := db.NodeReader(stateRoot) if err != nil { return nil, &MissingNodeError{Owner: owner, NodeHash: stateRoot, err: err} } @@ -55,6 +55,9 @@ func newEmptyReader() *trieReader { // node retrieves the rlp-encoded trie node with the provided trie node // information. An MissingNodeError will be returned in case the node is // not found or any error is encountered. +// +// Don't modify the returned byte slice since it's not deep-copied and +// still be referenced by database. func (r *trieReader) node(path []byte, hash common.Hash) ([]byte, error) { // Perform the logics in tests for preventing trie node access. if r.banned != nil { diff --git a/trie/trienode/node.go b/trie/trienode/node.go index 7debe6ecbc..047a7a4bd8 100644 --- a/trie/trienode/node.go +++ b/trie/trienode/node.go @@ -153,6 +153,15 @@ func (set *NodeSet) Size() (int, int) { return set.updates, set.deletes } +// HashSet returns a set of trie nodes keyed by node hash. +func (set *NodeSet) HashSet() map[common.Hash][]byte { + ret := make(map[common.Hash][]byte, len(set.Nodes)) + for _, n := range set.Nodes { + ret[n.Hash] = n.Blob + } + return ret +} + // Summary returns a string-representation of the NodeSet. func (set *NodeSet) Summary() string { var out = new(strings.Builder) diff --git a/trie/triestate/state.go b/trie/triestate/state.go deleted file mode 100644 index 62a9043873..0000000000 --- a/trie/triestate/state.go +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2023 The go-ethereum Authors -// This file is part of the go-ethereum library. -// -// The go-ethereum library is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// The go-ethereum library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with the go-ethereum library. If not, see - -package triestate - -import "github.com/ethereum/go-ethereum/common" - -// Set represents a collection of mutated states during a state transition. -// The value refers to the original content of state before the transition -// is made. Nil means that the state was not present previously. -type Set struct { - Accounts map[common.Address][]byte // Mutated account set, nil means the account was not present - Storages map[common.Address]map[common.Hash][]byte // Mutated storage set, nil means the slot was not present - size common.StorageSize // Approximate size of set -} - -// New constructs the state set with provided data. -func New(accounts map[common.Address][]byte, storages map[common.Address]map[common.Hash][]byte) *Set { - return &Set{ - Accounts: accounts, - Storages: storages, - } -} - -// Size returns the approximate memory size occupied by the set. -func (s *Set) Size() common.StorageSize { - if s.size != 0 { - return s.size - } - for _, account := range s.Accounts { - s.size += common.StorageSize(common.AddressLength + len(account)) - } - for _, slots := range s.Storages { - for _, val := range slots { - s.size += common.StorageSize(common.HashLength + len(val)) - } - s.size += common.StorageSize(common.AddressLength) - } - return s.size -} diff --git a/trie/verkle.go b/trie/verkle.go index 6bd9d3d1af..a4c60e42c4 100644 --- a/trie/verkle.go +++ b/trie/verkle.go @@ -45,7 +45,7 @@ type VerkleTrie struct { } // NewVerkleTrie constructs a verkle tree based on the specified root hash. -func NewVerkleTrie(root common.Hash, db database.Database, cache *utils.PointCache) (*VerkleTrie, error) { +func NewVerkleTrie(root common.Hash, db database.NodeDatabase, cache *utils.PointCache) (*VerkleTrie, error) { reader, err := newTrieReader(root, common.Hash{}, db) if err != nil { return nil, err diff --git a/triedb/database.go b/triedb/database.go index c1e6f9af4e..d3eca57b54 100644 --- a/triedb/database.go +++ b/triedb/database.go @@ -24,7 +24,6 @@ import ( "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/trie/trienode" - "github.com/ethereum/go-ethereum/trie/triestate" "github.com/ethereum/go-ethereum/triedb/database" "github.com/ethereum/go-ethereum/triedb/hashdb" "github.com/ethereum/go-ethereum/triedb/pathdb" @@ -57,6 +56,10 @@ var VerkleDefaults = &Config{ // backend defines the methods needed to access/update trie nodes in different // state scheme. type backend interface { + // NodeReader returns a reader for accessing trie nodes within the specified state. + // An error will be returned if the specified state is not available. + NodeReader(root common.Hash) (database.NodeReader, error) + // Initialized returns an indicator if the state data is already initialized // according to the state scheme. Initialized(genesisRoot common.Hash) bool @@ -68,24 +71,12 @@ type backend interface { // and dirty disk layer nodes, so both are merged into the second return. Size() (common.StorageSize, common.StorageSize) - // Update performs a state transition by committing dirty nodes contained - // in the given set in order to update state from the specified parent to - // the specified root. - // - // The passed in maps(nodes, states) will be retained to avoid copying - // everything. Therefore, these maps must not be changed afterwards. - Update(root common.Hash, parent common.Hash, block uint64, nodes *trienode.MergedNodeSet, states *triestate.Set) error - // Commit writes all relevant trie nodes belonging to the specified state // to disk. Report specifies whether logs will be displayed in info level. Commit(root common.Hash, report bool) error // Close closes the trie database backend and releases all held resources. Close() error - - // Reader returns a reader for accessing all trie nodes with provided state - // root. An error will be returned if the requested state is not available. - Reader(root common.Hash) (database.Reader, error) } // Database is the wrapper of the underlying backend which is shared by different @@ -125,10 +116,10 @@ func NewDatabase(diskdb ethdb.Database, config *Config) *Database { return db } -// Reader returns a reader for accessing all trie nodes with provided state root. -// An error will be returned if the requested state is not available. -func (db *Database) Reader(blockRoot common.Hash) (database.Reader, error) { - return db.backend.Reader(blockRoot) +// NodeReader returns a reader for accessing trie nodes within the specified state. +// An error will be returned if the specified state is not available. +func (db *Database) NodeReader(blockRoot common.Hash) (database.NodeReader, error) { + return db.backend.NodeReader(blockRoot) } // Update performs a state transition by committing dirty nodes contained in the @@ -138,11 +129,17 @@ func (db *Database) Reader(blockRoot common.Hash) (database.Reader, error) { // // The passed in maps(nodes, states) will be retained to avoid copying everything. // Therefore, these maps must not be changed afterwards. -func (db *Database) Update(root common.Hash, parent common.Hash, block uint64, nodes *trienode.MergedNodeSet, states *triestate.Set) error { +func (db *Database) Update(root common.Hash, parent common.Hash, block uint64, nodes *trienode.MergedNodeSet, states *StateSet) error { if db.preimages != nil { db.preimages.commit(false) } - return db.backend.Update(root, parent, block, nodes, states) + switch b := db.backend.(type) { + case *hashdb.Database: + return b.Update(root, parent, block, nodes) + case *pathdb.Database: + return b.Update(root, parent, block, nodes, states.internal()) + } + return errors.New("unknown backend") } // Commit iterates over all the children of a particular node, writes them out @@ -314,17 +311,6 @@ func (db *Database) Journal(root common.Hash) error { return pdb.Journal(root) } -// SetBufferSize sets the node buffer size to the provided value(in bytes). -// It's only supported by path-based database and will return an error for -// others. -func (db *Database) SetBufferSize(size int) error { - pdb, ok := db.backend.(*pathdb.Database) - if !ok { - return errors.New("not supported") - } - return pdb.SetBufferSize(size) -} - // IsVerkle returns the indicator if the database is holding a verkle tree. func (db *Database) IsVerkle() bool { return db.config.IsVerkle diff --git a/triedb/database/database.go b/triedb/database/database.go index 9bd5da08d1..cde8390756 100644 --- a/triedb/database/database.go +++ b/triedb/database/database.go @@ -16,10 +16,12 @@ package database -import "github.com/ethereum/go-ethereum/common" +import ( + "github.com/ethereum/go-ethereum/common" +) -// Reader wraps the Node method of a backing trie reader. -type Reader interface { +// NodeReader wraps the Node method of a backing trie reader. +type NodeReader interface { // Node retrieves the trie node blob with the provided trie identifier, // node path and the corresponding node hash. No error will be returned // if the node is not found. @@ -29,9 +31,9 @@ type Reader interface { Node(owner common.Hash, path []byte, hash common.Hash) ([]byte, error) } -// Database wraps the methods of a backing trie store. -type Database interface { - // Reader returns a node reader associated with the specific state. +// NodeDatabase wraps the methods of a backing trie store. +type NodeDatabase interface { + // NodeReader returns a node reader associated with the specific state. // An error will be returned if the specified state is not available. - Reader(stateRoot common.Hash) (Reader, error) + NodeReader(stateRoot common.Hash) (NodeReader, error) } diff --git a/triedb/hashdb/database.go b/triedb/hashdb/database.go index 4def10e338..5de7805c31 100644 --- a/triedb/hashdb/database.go +++ b/triedb/hashdb/database.go @@ -33,7 +33,6 @@ import ( "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" "github.com/ethereum/go-ethereum/trie/trienode" - "github.com/ethereum/go-ethereum/trie/triestate" "github.com/ethereum/go-ethereum/triedb/database" ) @@ -541,7 +540,7 @@ func (db *Database) Initialized(genesisRoot common.Hash) bool { // Update inserts the dirty nodes in provided nodeset into database and link the // account trie with multiple storage tries if necessary. -func (db *Database) Update(root common.Hash, parent common.Hash, block uint64, nodes *trienode.MergedNodeSet, states *triestate.Set) error { +func (db *Database) Update(root common.Hash, parent common.Hash, block uint64, nodes *trienode.MergedNodeSet) error { // Ensure the parent state is present and signal a warning if not. if parent != types.EmptyRootHash { if blob, _ := db.node(parent); len(blob) == 0 { @@ -616,9 +615,9 @@ func (db *Database) Close() error { return nil } -// Reader retrieves a node reader belonging to the given state root. -// An error will be returned if the requested state is not available. -func (db *Database) Reader(root common.Hash) (database.Reader, error) { +// NodeReader returns a reader for accessing trie nodes within the specified state. +// An error will be returned if the specified state is not available. +func (db *Database) NodeReader(root common.Hash) (database.NodeReader, error) { if _, err := db.node(root); err != nil { return nil, fmt.Errorf("state %#x is not available, %v", root, err) } diff --git a/triedb/pathdb/buffer.go b/triedb/pathdb/buffer.go new file mode 100644 index 0000000000..3f1d0e90ee --- /dev/null +++ b/triedb/pathdb/buffer.go @@ -0,0 +1,141 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "fmt" + "time" + + "github.com/VictoriaMetrics/fastcache" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/trie/trienode" +) + +// buffer is a collection of modified states along with the modified trie nodes. +// They are cached here to aggregate the disk write. The content of the buffer +// must be checked before diving into disk (since it basically is not yet written +// data). +type buffer struct { + layers uint64 // The number of diff layers aggregated inside + limit uint64 // The maximum memory allowance in bytes + nodes *nodeSet // Aggregated trie node set +} + +// newBuffer initializes the buffer with the provided states and trie nodes. +func newBuffer(limit int, nodes *nodeSet, layers uint64) *buffer { + // Don't panic for lazy users if any provided set is nil + if nodes == nil { + nodes = newNodeSet(nil) + } + return &buffer{ + layers: layers, + limit: uint64(limit), + nodes: nodes, + } +} + +// node retrieves the trie node with node path and its trie identifier. +func (b *buffer) node(owner common.Hash, path []byte) (*trienode.Node, bool) { + return b.nodes.node(owner, path) +} + +// commit merges the provided states and trie nodes into the buffer. +func (b *buffer) commit(nodes *nodeSet) *buffer { + b.layers++ + b.nodes.merge(nodes) + return b +} + +// revert is the reverse operation of commit. It also merges the provided states +// and trie nodes into the buffer. The key difference is that the provided state +// set should reverse the changes made by the most recent state transition. +func (b *buffer) revert(db ethdb.KeyValueReader, nodes map[common.Hash]map[string]*trienode.Node) error { + // Short circuit if no embedded state transition to revert + if b.layers == 0 { + return errStateUnrecoverable + } + b.layers-- + + // Reset the entire buffer if only a single transition left + if b.layers == 0 { + b.reset() + return nil + } + b.nodes.revert(db, nodes) + return nil +} + +// reset cleans up the disk cache. +func (b *buffer) reset() { + b.layers = 0 + b.nodes.reset() +} + +// empty returns an indicator if buffer is empty. +func (b *buffer) empty() bool { + return b.layers == 0 +} + +// full returns an indicator if the size of accumulated content exceeds the +// configured threshold. +func (b *buffer) full() bool { + return b.size() > b.limit +} + +// size returns the approximate memory size of the held content. +func (b *buffer) size() uint64 { + return b.nodes.size +} + +// flush persists the in-memory dirty trie node into the disk if the configured +// memory threshold is reached. Note, all data must be written atomically. +func (b *buffer) flush(db ethdb.KeyValueStore, freezer ethdb.AncientWriter, nodesCache *fastcache.Cache, id uint64) error { + // Ensure the target state id is aligned with the internal counter. + head := rawdb.ReadPersistentStateID(db) + if head+b.layers != id { + return fmt.Errorf("buffer layers (%d) cannot be applied on top of persisted state id (%d) to reach requested state id (%d)", b.layers, head, id) + } + // Terminate the state snapshot generation if it's active + var ( + start = time.Now() + batch = db.NewBatchWithSize(b.nodes.dbsize() * 11 / 10) // extra 10% for potential pebble internal stuff + ) + // Explicitly sync the state freezer, ensuring that all written + // data is transferred to disk before updating the key-value store. + if freezer != nil { + if err := freezer.Sync(); err != nil { + return err + } + } + nodes := b.nodes.write(batch, nodesCache) + rawdb.WritePersistentStateID(batch, id) + + // Flush all mutations in a single batch + size := batch.ValueSize() + if err := batch.Write(); err != nil { + return err + } + commitBytesMeter.Mark(int64(size)) + commitNodesMeter.Mark(int64(nodes)) + commitTimeTimer.UpdateSince(start) + b.reset() + log.Debug("Persisted buffer content", "nodes", nodes, "bytes", common.StorageSize(size), "elapsed", common.PrettyDuration(time.Since(start))) + return nil +} diff --git a/triedb/pathdb/database.go b/triedb/pathdb/database.go index 31e478117c..48d46c7b08 100644 --- a/triedb/pathdb/database.go +++ b/triedb/pathdb/database.go @@ -31,7 +31,6 @@ import ( "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/params" "github.com/ethereum/go-ethereum/trie/trienode" - "github.com/ethereum/go-ethereum/trie/triestate" ) const ( @@ -39,17 +38,17 @@ const ( defaultCleanSize = 16 * 1024 * 1024 // maxBufferSize is the maximum memory allowance of node buffer. - // Too large nodebuffer will cause the system to pause for a long + // Too large buffer will cause the system to pause for a long // time when write happens. Also, the largest batch that pebble can // support is 4GB, node will panic if batch size exceeds this limit. maxBufferSize = 256 * 1024 * 1024 - // DefaultBufferSize is the default memory allowance of node buffer + // defaultBufferSize is the default memory allowance of node buffer // that aggregates the writes from above until it's flushed into the // disk. It's meant to be used once the initial sync is finished. // Do not increase the buffer size arbitrarily, otherwise the system // pause time will increase when the database writes happen. - DefaultBufferSize = 64 * 1024 * 1024 + defaultBufferSize = 64 * 1024 * 1024 ) var ( @@ -64,7 +63,9 @@ type layer interface { // if the read operation exits abnormally. Specifically, if the layer is // already stale. // - // Note, no error will be returned if the requested node is not found in database. + // Note: + // - the returned node is not a copy, please don't modify it. + // - no error will be returned if the requested node is not found in database. node(owner common.Hash, path []byte, depth int) ([]byte, common.Hash, *nodeLoc, error) // rootHash returns the root hash for which this layer was made. @@ -80,7 +81,7 @@ type layer interface { // the provided dirty trie nodes along with the state change set. // // Note, the maps are retained by the method to avoid copying everything. - update(root common.Hash, id uint64, block uint64, nodes map[common.Hash]map[string]*trienode.Node, states *triestate.Set) *diffLayer + update(root common.Hash, id uint64, block uint64, nodes *nodeSet, states *StateSetWithOrigin) *diffLayer // journal commits an entire diff hierarchy to disk into a single journal entry. // This is meant to be used during shutdown to persist the layer without @@ -90,28 +91,40 @@ type layer interface { // Config contains the settings for database. type Config struct { - StateHistory uint64 // Number of recent blocks to maintain state history for - CleanCacheSize int // Maximum memory allowance (in bytes) for caching clean nodes - DirtyCacheSize int // Maximum memory allowance (in bytes) for caching dirty nodes - ReadOnly bool // Flag whether the database is opened in read only mode. + StateHistory uint64 // Number of recent blocks to maintain state history for + CleanCacheSize int // Maximum memory allowance (in bytes) for caching clean nodes + WriteBufferSize int // Maximum memory allowance (in bytes) for write buffer + ReadOnly bool // Flag whether the database is opened in read only mode. } // sanitize checks the provided user configurations and changes anything that's // unreasonable or unworkable. func (c *Config) sanitize() *Config { conf := *c - if conf.DirtyCacheSize > maxBufferSize { - log.Warn("Sanitizing invalid node buffer size", "provided", common.StorageSize(conf.DirtyCacheSize), "updated", common.StorageSize(maxBufferSize)) - conf.DirtyCacheSize = maxBufferSize + if conf.WriteBufferSize > maxBufferSize { + log.Warn("Sanitizing invalid node buffer size", "provided", common.StorageSize(conf.WriteBufferSize), "updated", common.StorageSize(maxBufferSize)) + conf.WriteBufferSize = maxBufferSize } return &conf } +// fields returns a list of attributes of config for printing. +func (c *Config) fields() []interface{} { + var list []interface{} + if c.ReadOnly { + list = append(list, "readonly", true) + } + list = append(list, "cache", common.StorageSize(c.CleanCacheSize)) + list = append(list, "buffer", common.StorageSize(c.WriteBufferSize)) + list = append(list, "history", c.StateHistory) + return list +} + // Defaults contains default settings for Ethereum mainnet. var Defaults = &Config{ - StateHistory: params.FullImmutabilityThreshold, - CleanCacheSize: defaultCleanSize, - DirtyCacheSize: DefaultBufferSize, + StateHistory: params.FullImmutabilityThreshold, + CleanCacheSize: defaultCleanSize, + WriteBufferSize: defaultBufferSize, } // ReadOnly is the config in order to open database in read only mode. @@ -132,15 +145,15 @@ type Database struct { // readOnly is the flag whether the mutation is allowed to be applied. // It will be set automatically when the database is journaled during // the shutdown to reject all following unexpected mutations. - readOnly bool // Flag if database is opened in read only mode - waitSync bool // Flag if database is deactivated due to initial state sync - isVerkle bool // Flag if database is used for verkle tree - bufferSize int // Memory allowance (in bytes) for caching dirty nodes - config *Config // Configuration for database - diskdb ethdb.Database // Persistent storage for matured trie nodes - tree *layerTree // The group for all known layers - freezer ethdb.ResettableAncientStore // Freezer for storing trie histories, nil possible in tests - lock sync.RWMutex // Lock to prevent mutations from happening at the same time + readOnly bool // Flag if database is opened in read only mode + waitSync bool // Flag if database is deactivated due to initial state sync + isVerkle bool // Flag if database is used for verkle tree + + config *Config // Configuration for database + diskdb ethdb.Database // Persistent storage for matured trie nodes + tree *layerTree // The group for all known layers + freezer ethdb.ResettableAncientStore // Freezer for storing trie histories, nil possible in tests + lock sync.RWMutex // Lock to prevent mutations from happening at the same time } // New attempts to load an already existing layer from a persistent key-value @@ -161,11 +174,10 @@ func New(diskdb ethdb.Database, config *Config, isVerkle bool) *Database { diskdb = rawdb.NewTable(diskdb, string(rawdb.VerklePrefix)) } db := &Database{ - readOnly: config.ReadOnly, - isVerkle: isVerkle, - bufferSize: config.DirtyCacheSize, - config: config, - diskdb: diskdb, + readOnly: config.ReadOnly, + isVerkle: isVerkle, + config: config, + diskdb: diskdb, } // Construct the layer tree by resolving the in-disk singleton state // and in-memory layer journal. @@ -174,7 +186,7 @@ func New(diskdb ethdb.Database, config *Config, isVerkle bool) *Database { // Repair the state history, which might not be aligned with the state // in the key-value store due to an unclean shutdown. if err := db.repairHistory(); err != nil { - log.Crit("Failed to repair pathdb", "err", err) + log.Crit("Failed to repair state history", "err", err) } // Disable database in case node is still in the initial state sync stage. if rawdb.ReadSnapSyncStatusFlag(diskdb) == rawdb.StateSyncRunning && !db.readOnly { @@ -182,6 +194,11 @@ func New(diskdb ethdb.Database, config *Config, isVerkle bool) *Database { log.Crit("Failed to disable database", "err", err) // impossible to happen } } + fields := config.fields() + if db.isVerkle { + fields = append(fields, "verkle", true) + } + log.Info("Initialized path database", fields...) return db } @@ -241,7 +258,7 @@ func (db *Database) repairHistory() error { // // The passed in maps(nodes, states) will be retained to avoid copying everything. // Therefore, these maps must not be changed afterwards. -func (db *Database) Update(root common.Hash, parentRoot common.Hash, block uint64, nodes *trienode.MergedNodeSet, states *triestate.Set) error { +func (db *Database) Update(root common.Hash, parentRoot common.Hash, block uint64, nodes *trienode.MergedNodeSet, states *StateSetWithOrigin) error { // Hold the lock to prevent concurrent mutations. db.lock.Lock() defer db.lock.Unlock() @@ -341,7 +358,7 @@ func (db *Database) Enable(root common.Hash) error { } // Re-construct a new disk layer backed by persistent state // with **empty clean cache and node buffer**. - db.tree.reset(newDiskLayer(root, 0, db, nil, newNodeBuffer(db.bufferSize, nil, 0))) + db.tree.reset(newDiskLayer(root, 0, db, nil, newBuffer(db.config.WriteBufferSize, nil, 0))) // Re-enable the database as the final step. db.waitSync = false @@ -357,19 +374,19 @@ func (db *Database) Recover(root common.Hash) error { db.lock.Lock() defer db.lock.Unlock() - // Short circuit if rollback operation is not supported. + // Short circuit if rollback operation is not supported if err := db.modifyAllowed(); err != nil { return err } if db.freezer == nil { return errors.New("state rollback is non-supported") } - // Short circuit if the target state is not recoverable. + // Short circuit if the target state is not recoverable root = types.TrieRootHash(root) if !db.Recoverable(root) { return errStateUnrecoverable } - // Apply the state histories upon the disk layer in order. + // Apply the state histories upon the disk layer in order var ( start = time.Now() dl = db.tree.bottom() @@ -454,7 +471,7 @@ func (db *Database) Close() error { func (db *Database) Size() (diffs common.StorageSize, nodes common.StorageSize) { db.tree.forEach(func(layer layer) { if diff, ok := layer.(*diffLayer); ok { - diffs += common.StorageSize(diff.memory) + diffs += common.StorageSize(diff.size()) } if disk, ok := layer.(*diskLayer); ok { nodes += disk.size() @@ -478,19 +495,6 @@ func (db *Database) Initialized(genesisRoot common.Hash) bool { return inited } -// SetBufferSize sets the node buffer size to the provided value(in bytes). -func (db *Database) SetBufferSize(size int) error { - db.lock.Lock() - defer db.lock.Unlock() - - if size > maxBufferSize { - log.Info("Capped node buffer size", "provided", common.StorageSize(size), "adjusted", common.StorageSize(maxBufferSize)) - size = maxBufferSize - } - db.bufferSize = size - return db.tree.bottom().setBufferSize(db.bufferSize) -} - // modifyAllowed returns the indicator if mutation is allowed. This function // assumes the db.lock is already held. func (db *Database) modifyAllowed() error { diff --git a/triedb/pathdb/database_test.go b/triedb/pathdb/database_test.go index f667944784..61e0b0928e 100644 --- a/triedb/pathdb/database_test.go +++ b/triedb/pathdb/database_test.go @@ -31,7 +31,6 @@ import ( "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" "github.com/ethereum/go-ethereum/trie/trienode" - "github.com/ethereum/go-ethereum/trie/triestate" "github.com/holiman/uint256" ) @@ -108,9 +107,9 @@ func newTester(t *testing.T, historyLimit uint64) *tester { var ( disk, _ = rawdb.NewDatabaseWithFreezer(rawdb.NewMemoryDatabase(), t.TempDir(), "", false) db = New(disk, &Config{ - StateHistory: historyLimit, - CleanCacheSize: 16 * 1024, - DirtyCacheSize: 16 * 1024, + StateHistory: historyLimit, + CleanCacheSize: 16 * 1024, + WriteBufferSize: 16 * 1024, }, false) obj = &tester{ db: db, @@ -217,7 +216,7 @@ func (t *tester) clearStorage(ctx *genctx, addr common.Address, root common.Hash return root } -func (t *tester) generate(parent common.Hash) (common.Hash, *trienode.MergedNodeSet, *triestate.Set) { +func (t *tester) generate(parent common.Hash) (common.Hash, *trienode.MergedNodeSet, *StateSetWithOrigin) { var ( ctx = newCtx(parent) dirties = make(map[common.Hash]struct{}) @@ -310,7 +309,7 @@ func (t *tester) generate(parent common.Hash) (common.Hash, *trienode.MergedNode delete(t.storages, addrHash) } } - return root, ctx.nodes, triestate.New(ctx.accountOrigin, ctx.storageOrigin) + return root, ctx.nodes, NewStateSetWithOrigin(ctx.accountOrigin, ctx.storageOrigin) } // lastHash returns the latest root hash, or empty if nothing is cached. diff --git a/triedb/pathdb/difflayer.go b/triedb/pathdb/difflayer.go index 6b87883482..ecc318614f 100644 --- a/triedb/pathdb/difflayer.go +++ b/triedb/pathdb/difflayer.go @@ -22,8 +22,6 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/log" - "github.com/ethereum/go-ethereum/trie/trienode" - "github.com/ethereum/go-ethereum/trie/triestate" ) // diffLayer represents a collection of modifications made to the in-memory tries @@ -33,45 +31,28 @@ import ( // made to the state, that have not yet graduated into a semi-immutable state. type diffLayer struct { // Immutables - root common.Hash // Root hash to which this layer diff belongs to - id uint64 // Corresponding state id - block uint64 // Associated block number - nodes map[common.Hash]map[string]*trienode.Node // Cached trie nodes indexed by owner and path - states *triestate.Set // Associated state change set for building history - memory uint64 // Approximate guess as to how much memory we use + root common.Hash // Root hash to which this layer diff belongs to + id uint64 // Corresponding state id + block uint64 // Associated block number + nodes *nodeSet // Cached trie nodes indexed by owner and path + states *StateSetWithOrigin // Associated state changes along with origin value parent layer // Parent layer modified by this one, never nil, **can be changed** lock sync.RWMutex // Lock used to protect parent } // newDiffLayer creates a new diff layer on top of an existing layer. -func newDiffLayer(parent layer, root common.Hash, id uint64, block uint64, nodes map[common.Hash]map[string]*trienode.Node, states *triestate.Set) *diffLayer { - var ( - size int64 - count int - ) +func newDiffLayer(parent layer, root common.Hash, id uint64, block uint64, nodes *nodeSet, states *StateSetWithOrigin) *diffLayer { dl := &diffLayer{ root: root, id: id, block: block, + parent: parent, nodes: nodes, states: states, - parent: parent, - } - for _, subset := range nodes { - for path, n := range subset { - dl.memory += uint64(n.Size() + len(path)) - size += int64(len(n.Blob) + len(path)) - } - count += len(subset) } - if states != nil { - dl.memory += uint64(states.Size()) - } - dirtyWriteMeter.Mark(size) - diffLayerNodesMeter.Mark(int64(count)) - diffLayerBytesMeter.Mark(int64(dl.memory)) - log.Debug("Created new diff layer", "id", id, "block", block, "nodes", count, "size", common.StorageSize(dl.memory)) + dirtyNodeWriteMeter.Mark(int64(nodes.size)) + log.Debug("Created new diff layer", "id", id, "block", block, "nodesize", common.StorageSize(nodes.size), "statesize", common.StorageSize(states.size)) return dl } @@ -104,15 +85,12 @@ func (dl *diffLayer) node(owner common.Hash, path []byte, depth int) ([]byte, co defer dl.lock.RUnlock() // If the trie node is known locally, return it - subset, ok := dl.nodes[owner] + n, ok := dl.nodes.node(owner, path) if ok { - n, ok := subset[string(path)] - if ok { - dirtyHitMeter.Mark(1) - dirtyNodeHitDepthHist.Update(int64(depth)) - dirtyReadMeter.Mark(int64(len(n.Blob))) - return n.Blob, n.Hash, &nodeLoc{loc: locDiffLayer, depth: depth}, nil - } + dirtyNodeHitMeter.Mark(1) + dirtyNodeHitDepthHist.Update(int64(depth)) + dirtyNodeReadMeter.Mark(int64(len(n.Blob))) + return n.Blob, n.Hash, &nodeLoc{loc: locDiffLayer, depth: depth}, nil } // Trie node unknown to this layer, resolve from parent return dl.parent.node(owner, path, depth+1) @@ -120,7 +98,7 @@ func (dl *diffLayer) node(owner common.Hash, path []byte, depth int) ([]byte, co // update implements the layer interface, creating a new layer on top of the // existing layer tree with the specified data items. -func (dl *diffLayer) update(root common.Hash, id uint64, block uint64, nodes map[common.Hash]map[string]*trienode.Node, states *triestate.Set) *diffLayer { +func (dl *diffLayer) update(root common.Hash, id uint64, block uint64, nodes *nodeSet, states *StateSetWithOrigin) *diffLayer { return newDiffLayer(dl, root, id, block, nodes, states) } @@ -145,6 +123,11 @@ func (dl *diffLayer) persist(force bool) (layer, error) { return diffToDisk(dl, force) } +// size returns the approximate memory size occupied by this diff layer. +func (dl *diffLayer) size() uint64 { + return dl.nodes.size + dl.states.size +} + // diffToDisk merges a bottom-most diff into the persistent disk layer underneath // it. The method will panic if called onto a non-bottom-most diff layer. func diffToDisk(layer *diffLayer, force bool) (layer, error) { diff --git a/triedb/pathdb/difflayer_test.go b/triedb/pathdb/difflayer_test.go index 1e93a3f892..e65f379135 100644 --- a/triedb/pathdb/difflayer_test.go +++ b/triedb/pathdb/difflayer_test.go @@ -30,7 +30,7 @@ import ( func emptyLayer() *diskLayer { return &diskLayer{ db: New(rawdb.NewMemoryDatabase(), nil, false), - buffer: newNodeBuffer(DefaultBufferSize, nil, 0), + buffer: newBuffer(defaultBufferSize, nil, 0), } } @@ -76,7 +76,7 @@ func benchmarkSearch(b *testing.B, depth int, total int) { nblob = common.CopyBytes(blob) } } - return newDiffLayer(parent, common.Hash{}, 0, 0, nodes, nil) + return newDiffLayer(parent, common.Hash{}, 0, 0, newNodeSet(nodes), nil) } var layer layer layer = emptyLayer() @@ -118,7 +118,7 @@ func BenchmarkPersist(b *testing.B) { ) nodes[common.Hash{}][string(path)] = node } - return newDiffLayer(parent, common.Hash{}, 0, 0, nodes, nil) + return newDiffLayer(parent, common.Hash{}, 0, 0, newNodeSet(nodes), nil) } for i := 0; i < b.N; i++ { b.StopTimer() @@ -157,7 +157,7 @@ func BenchmarkJournal(b *testing.B) { nodes[common.Hash{}][string(path)] = node } // TODO(rjl493456442) a non-nil state set is expected. - return newDiffLayer(parent, common.Hash{}, 0, 0, nodes, nil) + return newDiffLayer(parent, common.Hash{}, 0, 0, newNodeSet(nodes), nil) } var layer layer layer = emptyLayer() diff --git a/triedb/pathdb/disklayer.go b/triedb/pathdb/disklayer.go index eadcfacef7..edbe630968 100644 --- a/triedb/pathdb/disklayer.go +++ b/triedb/pathdb/disklayer.go @@ -25,8 +25,6 @@ import ( "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/log" - "github.com/ethereum/go-ethereum/trie/trienode" - "github.com/ethereum/go-ethereum/trie/triestate" ) // diskLayer is a low level persistent layer built on top of a key-value store. @@ -34,25 +32,25 @@ type diskLayer struct { root common.Hash // Immutable, root hash to which this layer was made for id uint64 // Immutable, corresponding state id db *Database // Path-based trie database - cleans *fastcache.Cache // GC friendly memory cache of clean node RLPs - buffer *nodebuffer // Node buffer to aggregate writes + nodes *fastcache.Cache // GC friendly memory cache of clean nodes + buffer *buffer // Dirty buffer to aggregate writes of nodes stale bool // Signals that the layer became stale (state progressed) lock sync.RWMutex // Lock used to protect stale flag } // newDiskLayer creates a new disk layer based on the passing arguments. -func newDiskLayer(root common.Hash, id uint64, db *Database, cleans *fastcache.Cache, buffer *nodebuffer) *diskLayer { +func newDiskLayer(root common.Hash, id uint64, db *Database, nodes *fastcache.Cache, buffer *buffer) *diskLayer { // Initialize a clean cache if the memory allowance is not zero // or reuse the provided cache if it is not nil (inherited from // the original disk layer). - if cleans == nil && db.config.CleanCacheSize != 0 { - cleans = fastcache.New(db.config.CleanCacheSize) + if nodes == nil && db.config.CleanCacheSize != 0 { + nodes = fastcache.New(db.config.CleanCacheSize) } return &diskLayer{ root: root, id: id, db: db, - cleans: cleans, + nodes: nodes, buffer: buffer, } } @@ -108,25 +106,25 @@ func (dl *diskLayer) node(owner common.Hash, path []byte, depth int) ([]byte, co // layer as stale. n, found := dl.buffer.node(owner, path) if found { - dirtyHitMeter.Mark(1) - dirtyReadMeter.Mark(int64(len(n.Blob))) + dirtyNodeHitMeter.Mark(1) + dirtyNodeReadMeter.Mark(int64(len(n.Blob))) dirtyNodeHitDepthHist.Update(int64(depth)) return n.Blob, n.Hash, &nodeLoc{loc: locDirtyCache, depth: depth}, nil } - dirtyMissMeter.Mark(1) + dirtyNodeMissMeter.Mark(1) // Try to retrieve the trie node from the clean memory cache h := newHasher() defer h.release() - key := cacheKey(owner, path) - if dl.cleans != nil { - if blob := dl.cleans.Get(nil, key); len(blob) > 0 { - cleanHitMeter.Mark(1) - cleanReadMeter.Mark(int64(len(blob))) + key := nodeCacheKey(owner, path) + if dl.nodes != nil { + if blob := dl.nodes.Get(nil, key); len(blob) > 0 { + cleanNodeHitMeter.Mark(1) + cleanNodeReadMeter.Mark(int64(len(blob))) return blob, h.hash(blob), &nodeLoc{loc: locCleanCache, depth: depth}, nil } - cleanMissMeter.Mark(1) + cleanNodeMissMeter.Mark(1) } // Try to retrieve the trie node from the disk. var blob []byte @@ -135,16 +133,16 @@ func (dl *diskLayer) node(owner common.Hash, path []byte, depth int) ([]byte, co } else { blob = rawdb.ReadStorageTrieNode(dl.db.diskdb, owner, path) } - if dl.cleans != nil && len(blob) > 0 { - dl.cleans.Set(key, blob) - cleanWriteMeter.Mark(int64(len(blob))) + if dl.nodes != nil && len(blob) > 0 { + dl.nodes.Set(key, blob) + cleanNodeWriteMeter.Mark(int64(len(blob))) } return blob, h.hash(blob), &nodeLoc{loc: locDiskLayer, depth: depth}, nil } // update implements the layer interface, returning a new diff layer on top // with the given state set. -func (dl *diskLayer) update(root common.Hash, id uint64, block uint64, nodes map[common.Hash]map[string]*trienode.Node, states *triestate.Set) *diffLayer { +func (dl *diskLayer) update(root common.Hash, id uint64, block uint64, nodes *nodeSet, states *StateSetWithOrigin) *diffLayer { return newDiffLayer(dl, root, id, block, nodes, states) } @@ -190,11 +188,6 @@ func (dl *diskLayer) commit(bottom *diffLayer, force bool) (*diskLayer, error) { } rawdb.WriteStateID(dl.db.diskdb, bottom.rootHash(), bottom.stateID()) - // Construct a new disk layer by merging the nodes from the provided diff - // layer, and flush the content in disk layer if there are too many nodes - // cached. The clean cache is inherited from the original disk layer. - ndl := newDiskLayer(bottom.root, bottom.stateID(), dl.db, dl.cleans, dl.buffer.commit(bottom.nodes)) - // In a unique scenario where the ID of the oldest history object (after tail // truncation) surpasses the persisted state ID, we take the necessary action // of forcibly committing the cached dirty nodes to ensure that the persisted @@ -202,9 +195,16 @@ func (dl *diskLayer) commit(bottom *diffLayer, force bool) (*diskLayer, error) { if !force && rawdb.ReadPersistentStateID(dl.db.diskdb) < oldest { force = true } - if err := ndl.buffer.flush(ndl.db.diskdb, ndl.db.freezer, ndl.cleans, ndl.id, force); err != nil { - return nil, err + // Merge the trie nodes of the bottom-most diff layer into the buffer as the + // combined layer. + combined := dl.buffer.commit(bottom.nodes) + if combined.full() || force { + if err := combined.flush(dl.db.diskdb, dl.db.freezer, dl.nodes, bottom.stateID()); err != nil { + return nil, err + } } + ndl := newDiskLayer(bottom.root, bottom.stateID(), dl.db, dl.nodes, combined) + // To remove outdated history objects from the end, we set the 'tail' parameter // to 'oldest-1' due to the offset between the freezer index and the history ID. if overflow { @@ -250,24 +250,13 @@ func (dl *diskLayer) revert(h *history) (*diskLayer, error) { } } else { batch := dl.db.diskdb.NewBatch() - writeNodes(batch, nodes, dl.cleans) + writeNodes(batch, nodes, dl.nodes) rawdb.WritePersistentStateID(batch, dl.id-1) if err := batch.Write(); err != nil { log.Crit("Failed to write states", "err", err) } } - return newDiskLayer(h.meta.parent, dl.id-1, dl.db, dl.cleans, dl.buffer), nil -} - -// setBufferSize sets the node buffer size to the provided value. -func (dl *diskLayer) setBufferSize(size int) error { - dl.lock.RLock() - defer dl.lock.RUnlock() - - if dl.stale { - return errSnapshotStale - } - return dl.buffer.setSize(size, dl.db.diskdb, dl.db.freezer, dl.cleans, dl.id) + return newDiskLayer(h.meta.parent, dl.id-1, dl.db, dl.nodes, dl.buffer), nil } // size returns the approximate size of cached nodes in the disk layer. @@ -278,7 +267,7 @@ func (dl *diskLayer) size() common.StorageSize { if dl.stale { return 0 } - return common.StorageSize(dl.buffer.size) + return common.StorageSize(dl.buffer.size()) } // resetCache releases the memory held by clean cache to prevent memory leak. @@ -286,12 +275,12 @@ func (dl *diskLayer) resetCache() { dl.lock.RLock() defer dl.lock.RUnlock() - // Stale disk layer loses the ownership of clean cache. + // Stale disk layer loses the ownership of clean caches. if dl.stale { return } - if dl.cleans != nil { - dl.cleans.Reset() + if dl.nodes != nil { + dl.nodes.Reset() } } diff --git a/triedb/pathdb/execute.go b/triedb/pathdb/execute.go index 9074e4debf..e24d0710f3 100644 --- a/triedb/pathdb/execute.go +++ b/triedb/pathdb/execute.go @@ -43,7 +43,7 @@ type context struct { // apply processes the given state diffs, updates the corresponding post-state // and returns the trie nodes that have been modified. -func apply(db database.Database, prevRoot common.Hash, postRoot common.Hash, accounts map[common.Address][]byte, storages map[common.Address]map[common.Hash][]byte) (map[common.Hash]map[string]*trienode.Node, error) { +func apply(db database.NodeDatabase, prevRoot common.Hash, postRoot common.Hash, accounts map[common.Address][]byte, storages map[common.Address]map[common.Hash][]byte) (map[common.Hash]map[string]*trienode.Node, error) { tr, err := trie.New(trie.TrieID(postRoot), db) if err != nil { return nil, err @@ -80,7 +80,7 @@ func apply(db database.Database, prevRoot common.Hash, postRoot common.Hash, acc // updateAccount the account was present in prev-state, and may or may not // existent in post-state. Apply the reverse diff and verify if the storage // root matches the one in prev-state account. -func updateAccount(ctx *context, db database.Database, addr common.Address) error { +func updateAccount(ctx *context, db database.NodeDatabase, addr common.Address) error { // The account was present in prev-state, decode it from the // 'slim-rlp' format bytes. h := newHasher() @@ -141,7 +141,7 @@ func updateAccount(ctx *context, db database.Database, addr common.Address) erro // deleteAccount the account was not present in prev-state, and is expected // to be existent in post-state. Apply the reverse diff and verify if the // account and storage is wiped out correctly. -func deleteAccount(ctx *context, db database.Database, addr common.Address) error { +func deleteAccount(ctx *context, db database.NodeDatabase, addr common.Address) error { // The account must be existent in post-state, load the account. h := newHasher() defer h.release() diff --git a/triedb/pathdb/flush.go b/triedb/pathdb/flush.go new file mode 100644 index 0000000000..baa0bfb292 --- /dev/null +++ b/triedb/pathdb/flush.go @@ -0,0 +1,65 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "github.com/VictoriaMetrics/fastcache" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/trie/trienode" +) + +// nodeCacheKey constructs the unique key of clean cache. The assumption is held +// that zero address does not have any associated storage slots. +func nodeCacheKey(owner common.Hash, path []byte) []byte { + if owner == (common.Hash{}) { + return path + } + return append(owner.Bytes(), path...) +} + +// writeNodes writes the trie nodes into the provided database batch. +// Note this function will also inject all the newly written nodes +// into clean cache. +func writeNodes(batch ethdb.Batch, nodes map[common.Hash]map[string]*trienode.Node, clean *fastcache.Cache) (total int) { + for owner, subset := range nodes { + for path, n := range subset { + if n.IsDeleted() { + if owner == (common.Hash{}) { + rawdb.DeleteAccountTrieNode(batch, []byte(path)) + } else { + rawdb.DeleteStorageTrieNode(batch, owner, []byte(path)) + } + if clean != nil { + clean.Del(nodeCacheKey(owner, []byte(path))) + } + } else { + if owner == (common.Hash{}) { + rawdb.WriteAccountTrieNode(batch, []byte(path), n.Blob) + } else { + rawdb.WriteStorageTrieNode(batch, owner, []byte(path), n.Blob) + } + if clean != nil { + clean.Set(nodeCacheKey(owner, []byte(path)), n.Blob) + } + } + } + total += len(subset) + } + return total +} diff --git a/triedb/pathdb/history.go b/triedb/pathdb/history.go index d77f7aa04d..e1cd981153 100644 --- a/triedb/pathdb/history.go +++ b/triedb/pathdb/history.go @@ -28,7 +28,6 @@ import ( "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" - "github.com/ethereum/go-ethereum/trie/triestate" "golang.org/x/exp/maps" ) @@ -243,14 +242,14 @@ type history struct { } // newHistory constructs the state history object with provided state change set. -func newHistory(root common.Hash, parent common.Hash, block uint64, states *triestate.Set) *history { +func newHistory(root common.Hash, parent common.Hash, block uint64, accounts map[common.Address][]byte, storages map[common.Address]map[common.Hash][]byte) *history { var ( - accountList = maps.Keys(states.Accounts) + accountList = maps.Keys(accounts) storageList = make(map[common.Address][]common.Hash) ) slices.SortFunc(accountList, common.Address.Cmp) - for addr, slots := range states.Storages { + for addr, slots := range storages { slist := maps.Keys(slots) slices.SortFunc(slist, common.Hash.Cmp) storageList[addr] = slist @@ -262,9 +261,9 @@ func newHistory(root common.Hash, parent common.Hash, block uint64, states *trie root: root, block: block, }, - accounts: states.Accounts, + accounts: accounts, accountList: accountList, - storages: states.Storages, + storages: storages, storageList: storageList, } } @@ -499,7 +498,7 @@ func writeHistory(writer ethdb.AncientWriter, dl *diffLayer) error { } var ( start = time.Now() - history = newHistory(dl.rootHash(), dl.parentLayer().rootHash(), dl.block, dl.states) + history = newHistory(dl.rootHash(), dl.parentLayer().rootHash(), dl.block, dl.states.accountOrigin, dl.states.storageOrigin) ) accountData, storageData, accountIndex, storageIndex := history.encode() dataSize := common.StorageSize(len(accountData) + len(storageData)) diff --git a/triedb/pathdb/history_test.go b/triedb/pathdb/history_test.go index 586f907fe4..d430706dee 100644 --- a/triedb/pathdb/history_test.go +++ b/triedb/pathdb/history_test.go @@ -28,11 +28,10 @@ import ( "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/internal/testrand" "github.com/ethereum/go-ethereum/rlp" - "github.com/ethereum/go-ethereum/trie/triestate" ) // randomStateSet generates a random state change set. -func randomStateSet(n int) *triestate.Set { +func randomStateSet(n int) (map[common.Address][]byte, map[common.Address]map[common.Hash][]byte) { var ( accounts = make(map[common.Address][]byte) storages = make(map[common.Address]map[common.Hash][]byte) @@ -47,11 +46,12 @@ func randomStateSet(n int) *triestate.Set { account := generateAccount(types.EmptyRootHash) accounts[addr] = types.SlimAccountRLP(account) } - return triestate.New(accounts, storages) + return accounts, storages } func makeHistory() *history { - return newHistory(testrand.Hash(), types.EmptyRootHash, 0, randomStateSet(3)) + accounts, storages := randomStateSet(3) + return newHistory(testrand.Hash(), types.EmptyRootHash, 0, accounts, storages) } func makeHistories(n int) []*history { @@ -61,7 +61,8 @@ func makeHistories(n int) []*history { ) for i := 0; i < n; i++ { root := testrand.Hash() - h := newHistory(root, parent, uint64(i), randomStateSet(3)) + accounts, storages := randomStateSet(3) + h := newHistory(root, parent, uint64(i), accounts, storages) parent = root result = append(result, h) } diff --git a/triedb/pathdb/journal.go b/triedb/pathdb/journal.go index 1740ec5935..70fa1fb9f8 100644 --- a/triedb/pathdb/journal.go +++ b/triedb/pathdb/journal.go @@ -29,8 +29,6 @@ import ( "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/rlp" - "github.com/ethereum/go-ethereum/trie/trienode" - "github.com/ethereum/go-ethereum/trie/triestate" ) var ( @@ -49,32 +47,6 @@ var ( // - Version 1: storage.Incomplete field is removed const journalVersion uint64 = 1 -// journalNode represents a trie node persisted in the journal. -type journalNode struct { - Path []byte // Path of the node in the trie - Blob []byte // RLP-encoded trie node blob, nil means the node is deleted -} - -// journalNodes represents a list trie nodes belong to a single account -// or the main account trie. -type journalNodes struct { - Owner common.Hash - Nodes []journalNode -} - -// journalAccounts represents a list accounts belong to the layer. -type journalAccounts struct { - Addresses []common.Address - Accounts [][]byte -} - -// journalStorage represents a list of storage slots belong to an account. -type journalStorage struct { - Account common.Address - Hashes []common.Hash - Slots [][]byte -} - // loadJournal tries to parse the layer journal from the disk. func (db *Database) loadJournal(diskRoot common.Hash) (layer, error) { journal := rawdb.ReadTrieJournal(db.diskdb) @@ -136,7 +108,7 @@ func (db *Database) loadLayers() layer { log.Info("Failed to load journal, discard it", "err", err) } // Return single layer with persistent state. - return newDiskLayer(root, rawdb.ReadPersistentStateID(db.diskdb), db, nil, newNodeBuffer(db.bufferSize, nil, 0)) + return newDiskLayer(root, rawdb.ReadPersistentStateID(db.diskdb), db, nil, newBuffer(db.config.WriteBufferSize, nil, 0)) } // loadDiskLayer reads the binary blob from the layer journal, reconstructing @@ -158,26 +130,12 @@ func (db *Database) loadDiskLayer(r *rlp.Stream) (layer, error) { if stored > id { return nil, fmt.Errorf("invalid state id: stored %d resolved %d", stored, id) } - // Resolve nodes cached in node buffer - var encoded []journalNodes - if err := r.Decode(&encoded); err != nil { - return nil, fmt.Errorf("load disk nodes: %v", err) - } - nodes := make(map[common.Hash]map[string]*trienode.Node) - for _, entry := range encoded { - subset := make(map[string]*trienode.Node) - for _, n := range entry.Nodes { - if len(n.Blob) > 0 { - subset[string(n.Path)] = trienode.New(crypto.Keccak256Hash(n.Blob), n.Blob) - } else { - subset[string(n.Path)] = trienode.NewDeleted() - } - } - nodes[entry.Owner] = subset + // Resolve nodes cached in aggregated buffer + var nodes nodeSet + if err := nodes.decode(r); err != nil { + return nil, err } - // Calculate the internal state transitions by id difference. - base := newDiskLayer(root, id, db, nil, newNodeBuffer(db.bufferSize, nodes, id-stored)) - return base, nil + return newDiskLayer(root, id, db, nil, newBuffer(db.config.WriteBufferSize, &nodes, id-stored)), nil } // loadDiffLayer reads the next sections of a layer journal, reconstructing a new @@ -197,50 +155,16 @@ func (db *Database) loadDiffLayer(parent layer, r *rlp.Stream) (layer, error) { return nil, fmt.Errorf("load block number: %v", err) } // Read in-memory trie nodes from journal - var encoded []journalNodes - if err := r.Decode(&encoded); err != nil { - return nil, fmt.Errorf("load diff nodes: %v", err) - } - nodes := make(map[common.Hash]map[string]*trienode.Node) - for _, entry := range encoded { - subset := make(map[string]*trienode.Node) - for _, n := range entry.Nodes { - if len(n.Blob) > 0 { - subset[string(n.Path)] = trienode.New(crypto.Keccak256Hash(n.Blob), n.Blob) - } else { - subset[string(n.Path)] = trienode.NewDeleted() - } - } - nodes[entry.Owner] = subset - } - // Read state changes from journal - var ( - jaccounts journalAccounts - jstorages []journalStorage - accounts = make(map[common.Address][]byte) - storages = make(map[common.Address]map[common.Hash][]byte) - ) - if err := r.Decode(&jaccounts); err != nil { - return nil, fmt.Errorf("load diff accounts: %v", err) - } - for i, addr := range jaccounts.Addresses { - accounts[addr] = jaccounts.Accounts[i] - } - if err := r.Decode(&jstorages); err != nil { - return nil, fmt.Errorf("load diff storages: %v", err) - } - for _, entry := range jstorages { - set := make(map[common.Hash][]byte) - for i, h := range entry.Hashes { - if len(entry.Slots[i]) > 0 { - set[h] = entry.Slots[i] - } else { - set[h] = nil - } - } - storages[entry.Account] = set + var nodes nodeSet + if err := nodes.decode(r); err != nil { + return nil, err + } + // Read flat states set (with original value attached) from journal + var stateSet StateSetWithOrigin + if err := stateSet.decode(r); err != nil { + return nil, err } - return db.loadDiffLayer(newDiffLayer(parent, root, parent.stateID()+1, block, nodes, triestate.New(accounts, storages)), r) + return db.loadDiffLayer(newDiffLayer(parent, root, parent.stateID()+1, block, &nodes, &stateSet), r) } // journal implements the layer interface, marshaling the un-flushed trie nodes @@ -261,19 +185,11 @@ func (dl *diskLayer) journal(w io.Writer) error { if err := rlp.Encode(w, dl.id); err != nil { return err } - // Step three, write all unwritten nodes into the journal - nodes := make([]journalNodes, 0, len(dl.buffer.nodes)) - for owner, subset := range dl.buffer.nodes { - entry := journalNodes{Owner: owner} - for path, node := range subset { - entry.Nodes = append(entry.Nodes, journalNode{Path: []byte(path), Blob: node.Blob}) - } - nodes = append(nodes, entry) - } - if err := rlp.Encode(w, nodes); err != nil { + // Step three, write the accumulated trie nodes into the journal + if err := dl.buffer.nodes.encode(w); err != nil { return err } - log.Debug("Journaled pathdb disk layer", "root", dl.root, "nodes", len(dl.buffer.nodes)) + log.Debug("Journaled pathdb disk layer", "root", dl.root) return nil } @@ -295,39 +211,14 @@ func (dl *diffLayer) journal(w io.Writer) error { return err } // Write the accumulated trie nodes into buffer - nodes := make([]journalNodes, 0, len(dl.nodes)) - for owner, subset := range dl.nodes { - entry := journalNodes{Owner: owner} - for path, node := range subset { - entry.Nodes = append(entry.Nodes, journalNode{Path: []byte(path), Blob: node.Blob}) - } - nodes = append(nodes, entry) - } - if err := rlp.Encode(w, nodes); err != nil { - return err - } - // Write the accumulated state changes into buffer - var jacct journalAccounts - for addr, account := range dl.states.Accounts { - jacct.Addresses = append(jacct.Addresses, addr) - jacct.Accounts = append(jacct.Accounts, account) - } - if err := rlp.Encode(w, jacct); err != nil { + if err := dl.nodes.encode(w); err != nil { return err } - storage := make([]journalStorage, 0, len(dl.states.Storages)) - for addr, slots := range dl.states.Storages { - entry := journalStorage{Account: addr} - for slotHash, slot := range slots { - entry.Hashes = append(entry.Hashes, slotHash) - entry.Slots = append(entry.Slots, slot) - } - storage = append(storage, entry) - } - if err := rlp.Encode(w, storage); err != nil { + // Write the associated flat state set into buffer + if err := dl.states.encode(w); err != nil { return err } - log.Debug("Journaled pathdb diff layer", "root", dl.root, "parent", dl.parent.rootHash(), "id", dl.stateID(), "block", dl.block, "nodes", len(dl.nodes)) + log.Debug("Journaled pathdb diff layer", "root", dl.root, "parent", dl.parent.rootHash(), "id", dl.stateID(), "block", dl.block) return nil } diff --git a/triedb/pathdb/layertree.go b/triedb/pathdb/layertree.go index d314779910..cf6b14e744 100644 --- a/triedb/pathdb/layertree.go +++ b/triedb/pathdb/layertree.go @@ -24,7 +24,6 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/trie/trienode" - "github.com/ethereum/go-ethereum/trie/triestate" ) // layerTree is a group of state layers identified by the state root. @@ -86,7 +85,7 @@ func (tree *layerTree) len() int { } // add inserts a new layer into the tree if it can be linked to an existing old parent. -func (tree *layerTree) add(root common.Hash, parentRoot common.Hash, block uint64, nodes *trienode.MergedNodeSet, states *triestate.Set) error { +func (tree *layerTree) add(root common.Hash, parentRoot common.Hash, block uint64, nodes *trienode.MergedNodeSet, states *StateSetWithOrigin) error { // Reject noop updates to avoid self-loops. This is a special case that can // happen for clique networks and proof-of-stake networks where empty blocks // don't modify the state (0 block subsidy). @@ -101,7 +100,7 @@ func (tree *layerTree) add(root common.Hash, parentRoot common.Hash, block uint6 if parent == nil { return fmt.Errorf("triedb parent [%#x] layer missing", parentRoot) } - l := parent.update(root, parent.stateID()+1, block, nodes.Flatten(), states) + l := parent.update(root, parent.stateID()+1, block, newNodeSet(nodes.Flatten()), states) tree.lock.Lock() tree.layers[l.rootHash()] = l diff --git a/triedb/pathdb/metrics.go b/triedb/pathdb/metrics.go index a250f703cb..8897cc4450 100644 --- a/triedb/pathdb/metrics.go +++ b/triedb/pathdb/metrics.go @@ -19,16 +19,16 @@ package pathdb import "github.com/ethereum/go-ethereum/metrics" var ( - cleanHitMeter = metrics.NewRegisteredMeter("pathdb/clean/hit", nil) - cleanMissMeter = metrics.NewRegisteredMeter("pathdb/clean/miss", nil) - cleanReadMeter = metrics.NewRegisteredMeter("pathdb/clean/read", nil) - cleanWriteMeter = metrics.NewRegisteredMeter("pathdb/clean/write", nil) + cleanNodeHitMeter = metrics.NewRegisteredMeter("pathdb/clean/node/hit", nil) + cleanNodeMissMeter = metrics.NewRegisteredMeter("pathdb/clean/node/miss", nil) + cleanNodeReadMeter = metrics.NewRegisteredMeter("pathdb/clean/node/read", nil) + cleanNodeWriteMeter = metrics.NewRegisteredMeter("pathdb/clean/node/write", nil) - dirtyHitMeter = metrics.NewRegisteredMeter("pathdb/dirty/hit", nil) - dirtyMissMeter = metrics.NewRegisteredMeter("pathdb/dirty/miss", nil) - dirtyReadMeter = metrics.NewRegisteredMeter("pathdb/dirty/read", nil) - dirtyWriteMeter = metrics.NewRegisteredMeter("pathdb/dirty/write", nil) - dirtyNodeHitDepthHist = metrics.NewRegisteredHistogram("pathdb/dirty/depth", nil, metrics.NewExpDecaySample(1028, 0.015)) + dirtyNodeHitMeter = metrics.NewRegisteredMeter("pathdb/dirty/node/hit", nil) + dirtyNodeMissMeter = metrics.NewRegisteredMeter("pathdb/dirty/node/miss", nil) + dirtyNodeReadMeter = metrics.NewRegisteredMeter("pathdb/dirty/node/read", nil) + dirtyNodeWriteMeter = metrics.NewRegisteredMeter("pathdb/dirty/node/write", nil) + dirtyNodeHitDepthHist = metrics.NewRegisteredHistogram("pathdb/dirty/node/depth", nil, metrics.NewExpDecaySample(1028, 0.015)) cleanFalseMeter = metrics.NewRegisteredMeter("pathdb/clean/false", nil) dirtyFalseMeter = metrics.NewRegisteredMeter("pathdb/dirty/false", nil) @@ -39,11 +39,8 @@ var ( commitNodesMeter = metrics.NewRegisteredMeter("pathdb/commit/nodes", nil) commitBytesMeter = metrics.NewRegisteredMeter("pathdb/commit/bytes", nil) - gcNodesMeter = metrics.NewRegisteredMeter("pathdb/gc/nodes", nil) - gcBytesMeter = metrics.NewRegisteredMeter("pathdb/gc/bytes", nil) - - diffLayerBytesMeter = metrics.NewRegisteredMeter("pathdb/diff/bytes", nil) - diffLayerNodesMeter = metrics.NewRegisteredMeter("pathdb/diff/nodes", nil) + gcTrieNodeMeter = metrics.NewRegisteredMeter("pathdb/gc/node/count", nil) + gcTrieNodeBytesMeter = metrics.NewRegisteredMeter("pathdb/gc/node/bytes", nil) historyBuildTimeMeter = metrics.NewRegisteredTimer("pathdb/history/time", nil) historyDataBytesMeter = metrics.NewRegisteredMeter("pathdb/history/bytes/data", nil) diff --git a/triedb/pathdb/nodebuffer.go b/triedb/pathdb/nodebuffer.go deleted file mode 100644 index a4788ff9ba..0000000000 --- a/triedb/pathdb/nodebuffer.go +++ /dev/null @@ -1,290 +0,0 @@ -// Copyright 2022 The go-ethereum Authors -// This file is part of the go-ethereum library. -// -// The go-ethereum library is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// The go-ethereum library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with the go-ethereum library. If not, see . - -package pathdb - -import ( - "bytes" - "fmt" - "maps" - "time" - - "github.com/VictoriaMetrics/fastcache" - "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/core/rawdb" - "github.com/ethereum/go-ethereum/crypto" - "github.com/ethereum/go-ethereum/ethdb" - "github.com/ethereum/go-ethereum/log" - "github.com/ethereum/go-ethereum/trie/trienode" -) - -// nodebuffer is a collection of modified trie nodes to aggregate the disk -// write. The content of the nodebuffer must be checked before diving into -// disk (since it basically is not-yet-written data). -type nodebuffer struct { - layers uint64 // The number of diff layers aggregated inside - size uint64 // The size of aggregated writes - limit uint64 // The maximum memory allowance in bytes - nodes map[common.Hash]map[string]*trienode.Node // The dirty node set, mapped by owner and path -} - -// newNodeBuffer initializes the node buffer with the provided nodes. -func newNodeBuffer(limit int, nodes map[common.Hash]map[string]*trienode.Node, layers uint64) *nodebuffer { - if nodes == nil { - nodes = make(map[common.Hash]map[string]*trienode.Node) - } - var size uint64 - for _, subset := range nodes { - for path, n := range subset { - size += uint64(len(n.Blob) + len(path)) - } - } - return &nodebuffer{ - layers: layers, - nodes: nodes, - size: size, - limit: uint64(limit), - } -} - -// node retrieves the trie node with given node info. -func (b *nodebuffer) node(owner common.Hash, path []byte) (*trienode.Node, bool) { - subset, ok := b.nodes[owner] - if !ok { - return nil, false - } - n, ok := subset[string(path)] - if !ok { - return nil, false - } - return n, true -} - -// commit merges the dirty nodes into the nodebuffer. This operation won't take -// the ownership of the nodes map which belongs to the bottom-most diff layer. -// It will just hold the node references from the given map which are safe to -// copy. -func (b *nodebuffer) commit(nodes map[common.Hash]map[string]*trienode.Node) *nodebuffer { - var ( - delta int64 - overwrite int64 - overwriteSize int64 - ) - for owner, subset := range nodes { - current, exist := b.nodes[owner] - if !exist { - // Allocate a new map for the subset instead of claiming it directly - // from the passed map to avoid potential concurrent map read/write. - // The nodes belong to original diff layer are still accessible even - // after merging, thus the ownership of nodes map should still belong - // to original layer and any mutation on it should be prevented. - for path, n := range subset { - delta += int64(len(n.Blob) + len(path)) - } - b.nodes[owner] = maps.Clone(subset) - continue - } - for path, n := range subset { - if orig, exist := current[path]; !exist { - delta += int64(len(n.Blob) + len(path)) - } else { - delta += int64(len(n.Blob) - len(orig.Blob)) - overwrite++ - overwriteSize += int64(len(orig.Blob) + len(path)) - } - current[path] = n - } - b.nodes[owner] = current - } - b.updateSize(delta) - b.layers++ - gcNodesMeter.Mark(overwrite) - gcBytesMeter.Mark(overwriteSize) - return b -} - -// revert is the reverse operation of commit. It also merges the provided nodes -// into the nodebuffer, the difference is that the provided node set should -// revert the changes made by the last state transition. -func (b *nodebuffer) revert(db ethdb.KeyValueReader, nodes map[common.Hash]map[string]*trienode.Node) error { - // Short circuit if no embedded state transition to revert. - if b.layers == 0 { - return errStateUnrecoverable - } - b.layers-- - - // Reset the entire buffer if only a single transition left. - if b.layers == 0 { - b.reset() - return nil - } - var delta int64 - for owner, subset := range nodes { - current, ok := b.nodes[owner] - if !ok { - panic(fmt.Sprintf("non-existent subset (%x)", owner)) - } - for path, n := range subset { - orig, ok := current[path] - if !ok { - // There is a special case in MPT that one child is removed from - // a fullNode which only has two children, and then a new child - // with different position is immediately inserted into the fullNode. - // In this case, the clean child of the fullNode will also be - // marked as dirty because of node collapse and expansion. - // - // In case of database rollback, don't panic if this "clean" - // node occurs which is not present in buffer. - var blob []byte - if owner == (common.Hash{}) { - blob = rawdb.ReadAccountTrieNode(db, []byte(path)) - } else { - blob = rawdb.ReadStorageTrieNode(db, owner, []byte(path)) - } - // Ignore the clean node in the case described above. - if bytes.Equal(blob, n.Blob) { - continue - } - panic(fmt.Sprintf("non-existent node (%x %v) blob: %v", owner, path, crypto.Keccak256Hash(n.Blob).Hex())) - } - current[path] = n - delta += int64(len(n.Blob)) - int64(len(orig.Blob)) - } - } - b.updateSize(delta) - return nil -} - -// updateSize updates the total cache size by the given delta. -func (b *nodebuffer) updateSize(delta int64) { - size := int64(b.size) + delta - if size >= 0 { - b.size = uint64(size) - return - } - s := b.size - b.size = 0 - log.Error("Invalid pathdb buffer size", "prev", common.StorageSize(s), "delta", common.StorageSize(delta)) -} - -// reset cleans up the disk cache. -func (b *nodebuffer) reset() { - b.layers = 0 - b.size = 0 - b.nodes = make(map[common.Hash]map[string]*trienode.Node) -} - -// empty returns an indicator if nodebuffer contains any state transition inside. -func (b *nodebuffer) empty() bool { - return b.layers == 0 -} - -// setSize sets the buffer size to the provided number, and invokes a flush -// operation if the current memory usage exceeds the new limit. -func (b *nodebuffer) setSize(size int, db ethdb.KeyValueStore, freezer ethdb.AncientStore, clean *fastcache.Cache, id uint64) error { - b.limit = uint64(size) - return b.flush(db, freezer, clean, id, false) -} - -// allocBatch returns a database batch with pre-allocated buffer. -func (b *nodebuffer) allocBatch(db ethdb.KeyValueStore) ethdb.Batch { - var metasize int - for owner, nodes := range b.nodes { - if owner == (common.Hash{}) { - metasize += len(nodes) * len(rawdb.TrieNodeAccountPrefix) // database key prefix - } else { - metasize += len(nodes) * (len(rawdb.TrieNodeStoragePrefix) + common.HashLength) // database key prefix + owner - } - } - return db.NewBatchWithSize((metasize + int(b.size)) * 11 / 10) // extra 10% for potential pebble internal stuff -} - -// flush persists the in-memory dirty trie node into the disk if the configured -// memory threshold is reached. Note, all data must be written atomically. -func (b *nodebuffer) flush(db ethdb.KeyValueStore, freezer ethdb.AncientWriter, clean *fastcache.Cache, id uint64, force bool) error { - if b.size <= b.limit && !force { - return nil - } - // Ensure the target state id is aligned with the internal counter. - head := rawdb.ReadPersistentStateID(db) - if head+b.layers != id { - return fmt.Errorf("buffer layers (%d) cannot be applied on top of persisted state id (%d) to reach requested state id (%d)", b.layers, head, id) - } - var ( - start = time.Now() - batch = b.allocBatch(db) - ) - // Explicitly sync the state freezer, ensuring that all written - // data is transferred to disk before updating the key-value store. - if freezer != nil { - if err := freezer.Sync(); err != nil { - return err - } - } - nodes := writeNodes(batch, b.nodes, clean) - rawdb.WritePersistentStateID(batch, id) - - // Flush all mutations in a single batch - size := batch.ValueSize() - if err := batch.Write(); err != nil { - return err - } - commitBytesMeter.Mark(int64(size)) - commitNodesMeter.Mark(int64(nodes)) - commitTimeTimer.UpdateSince(start) - log.Debug("Persisted pathdb nodes", "nodes", len(b.nodes), "bytes", common.StorageSize(size), "elapsed", common.PrettyDuration(time.Since(start))) - b.reset() - return nil -} - -// writeNodes writes the trie nodes into the provided database batch. -// Note this function will also inject all the newly written nodes -// into clean cache. -func writeNodes(batch ethdb.Batch, nodes map[common.Hash]map[string]*trienode.Node, clean *fastcache.Cache) (total int) { - for owner, subset := range nodes { - for path, n := range subset { - if n.IsDeleted() { - if owner == (common.Hash{}) { - rawdb.DeleteAccountTrieNode(batch, []byte(path)) - } else { - rawdb.DeleteStorageTrieNode(batch, owner, []byte(path)) - } - if clean != nil { - clean.Del(cacheKey(owner, []byte(path))) - } - } else { - if owner == (common.Hash{}) { - rawdb.WriteAccountTrieNode(batch, []byte(path), n.Blob) - } else { - rawdb.WriteStorageTrieNode(batch, owner, []byte(path), n.Blob) - } - if clean != nil { - clean.Set(cacheKey(owner, []byte(path)), n.Blob) - } - } - } - total += len(subset) - } - return total -} - -// cacheKey constructs the unique key of clean cache. -func cacheKey(owner common.Hash, path []byte) []byte { - if owner == (common.Hash{}) { - return path - } - return append(owner.Bytes(), path...) -} diff --git a/triedb/pathdb/nodes.go b/triedb/pathdb/nodes.go new file mode 100644 index 0000000000..ade669512e --- /dev/null +++ b/triedb/pathdb/nodes.go @@ -0,0 +1,246 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see + +package pathdb + +import ( + "bytes" + "fmt" + "io" + "maps" + + "github.com/VictoriaMetrics/fastcache" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie/trienode" +) + +// nodeSet represents a collection of modified trie nodes resulting from a state +// transition, typically corresponding to a block execution. It can also represent +// the combined trie node set from several aggregated state transitions. +type nodeSet struct { + size uint64 // aggregated size of the trie node + nodes map[common.Hash]map[string]*trienode.Node // node set, mapped by owner and path +} + +// newNodeSet constructs the set with the provided dirty trie nodes. +func newNodeSet(nodes map[common.Hash]map[string]*trienode.Node) *nodeSet { + // Don't panic for the lazy callers, initialize the nil map instead + if nodes == nil { + nodes = make(map[common.Hash]map[string]*trienode.Node) + } + s := &nodeSet{nodes: nodes} + s.computeSize() + return s +} + +// computeSize calculates the database size of the held trie nodes. +func (s *nodeSet) computeSize() { + var size uint64 + for owner, subset := range s.nodes { + var prefix int + if owner != (common.Hash{}) { + prefix = common.HashLength // owner (32 bytes) for storage trie nodes + } + for path, n := range subset { + size += uint64(prefix + len(n.Blob) + len(path)) + } + } + s.size = size +} + +// updateSize updates the total cache size by the given delta. +func (s *nodeSet) updateSize(delta int64) { + size := int64(s.size) + delta + if size >= 0 { + s.size = uint64(size) + return + } + log.Error("Nodeset size underflow", "prev", common.StorageSize(s.size), "delta", common.StorageSize(delta)) + s.size = 0 +} + +// node retrieves the trie node with node path and its trie identifier. +func (s *nodeSet) node(owner common.Hash, path []byte) (*trienode.Node, bool) { + subset, ok := s.nodes[owner] + if !ok { + return nil, false + } + n, ok := subset[string(path)] + if !ok { + return nil, false + } + return n, true +} + +// merge integrates the provided dirty nodes into the set. The provided nodeset +// will remain unchanged, as it may still be referenced by other layers. +func (s *nodeSet) merge(set *nodeSet) { + var ( + delta int64 // size difference resulting from node merging + overwrite counter // counter of nodes being overwritten + ) + for owner, subset := range set.nodes { + var prefix int + if owner != (common.Hash{}) { + prefix = common.HashLength + } + current, exist := s.nodes[owner] + if !exist { + for path, n := range subset { + delta += int64(prefix + len(n.Blob) + len(path)) + } + // Perform a shallow copy of the map for the subset instead of claiming it + // directly from the provided nodeset to avoid potential concurrent map + // read/write issues. The nodes belonging to the original diff layer remain + // accessible even after merging. Therefore, ownership of the nodes map + // should still belong to the original layer, and any modifications to it + // should be prevented. + s.nodes[owner] = maps.Clone(subset) + continue + } + for path, n := range subset { + if orig, exist := current[path]; !exist { + delta += int64(prefix + len(n.Blob) + len(path)) + } else { + delta += int64(len(n.Blob) - len(orig.Blob)) + overwrite.add(prefix + len(orig.Blob) + len(path)) + } + current[path] = n + } + s.nodes[owner] = current + } + overwrite.report(gcTrieNodeMeter, gcTrieNodeBytesMeter) + s.updateSize(delta) +} + +// revert merges the provided trie nodes into the set. This should reverse the +// changes made by the most recent state transition. +func (s *nodeSet) revert(db ethdb.KeyValueReader, nodes map[common.Hash]map[string]*trienode.Node) { + var delta int64 + for owner, subset := range nodes { + current, ok := s.nodes[owner] + if !ok { + panic(fmt.Sprintf("non-existent subset (%x)", owner)) + } + for path, n := range subset { + orig, ok := current[path] + if !ok { + // There is a special case in merkle tree that one child is removed + // from a fullNode which only has two children, and then a new child + // with different position is immediately inserted into the fullNode. + // In this case, the clean child of the fullNode will also be marked + // as dirty because of node collapse and expansion. In case of database + // rollback, don't panic if this "clean" node occurs which is not + // present in buffer. + var blob []byte + if owner == (common.Hash{}) { + blob = rawdb.ReadAccountTrieNode(db, []byte(path)) + } else { + blob = rawdb.ReadStorageTrieNode(db, owner, []byte(path)) + } + // Ignore the clean node in the case described above. + if bytes.Equal(blob, n.Blob) { + continue + } + panic(fmt.Sprintf("non-existent node (%x %v) blob: %v", owner, path, crypto.Keccak256Hash(n.Blob).Hex())) + } + current[path] = n + delta += int64(len(n.Blob)) - int64(len(orig.Blob)) + } + } + s.updateSize(delta) +} + +// journalNode represents a trie node persisted in the journal. +type journalNode struct { + Path []byte // Path of the node in the trie + Blob []byte // RLP-encoded trie node blob, nil means the node is deleted +} + +// journalNodes represents a list trie nodes belong to a single account +// or the main account trie. +type journalNodes struct { + Owner common.Hash + Nodes []journalNode +} + +// encode serializes the content of trie nodes into the provided writer. +func (s *nodeSet) encode(w io.Writer) error { + nodes := make([]journalNodes, 0, len(s.nodes)) + for owner, subset := range s.nodes { + entry := journalNodes{Owner: owner} + for path, node := range subset { + entry.Nodes = append(entry.Nodes, journalNode{ + Path: []byte(path), + Blob: node.Blob, + }) + } + nodes = append(nodes, entry) + } + return rlp.Encode(w, nodes) +} + +// decode deserializes the content from the rlp stream into the nodeset. +func (s *nodeSet) decode(r *rlp.Stream) error { + var encoded []journalNodes + if err := r.Decode(&encoded); err != nil { + return fmt.Errorf("load nodes: %v", err) + } + nodes := make(map[common.Hash]map[string]*trienode.Node) + for _, entry := range encoded { + subset := make(map[string]*trienode.Node) + for _, n := range entry.Nodes { + if len(n.Blob) > 0 { + subset[string(n.Path)] = trienode.New(crypto.Keccak256Hash(n.Blob), n.Blob) + } else { + subset[string(n.Path)] = trienode.NewDeleted() + } + } + nodes[entry.Owner] = subset + } + s.nodes = nodes + s.computeSize() + return nil +} + +// write flushes nodes into the provided database batch as a whole. +func (s *nodeSet) write(batch ethdb.Batch, clean *fastcache.Cache) int { + return writeNodes(batch, s.nodes, clean) +} + +// reset clears all cached trie node data. +func (s *nodeSet) reset() { + s.nodes = make(map[common.Hash]map[string]*trienode.Node) + s.size = 0 +} + +// dbsize returns the approximate size of db write. +func (s *nodeSet) dbsize() int { + var m int + for owner, nodes := range s.nodes { + if owner == (common.Hash{}) { + m += len(nodes) * len(rawdb.TrieNodeAccountPrefix) // database key prefix + } else { + m += len(nodes) * (len(rawdb.TrieNodeStoragePrefix)) // database key prefix + } + } + return m + int(s.size) +} diff --git a/triedb/pathdb/reader.go b/triedb/pathdb/reader.go index 6a58493ba6..2ca4a0205b 100644 --- a/triedb/pathdb/reader.go +++ b/triedb/pathdb/reader.go @@ -45,14 +45,14 @@ func (loc *nodeLoc) string() string { return fmt.Sprintf("loc: %s, depth: %d", loc.loc, loc.depth) } -// reader implements the database.Reader interface, providing the functionalities to +// reader implements the database.NodeReader interface, providing the functionalities to // retrieve trie nodes by wrapping the internal state layer. type reader struct { layer layer noHashCheck bool } -// Node implements database.Reader interface, retrieving the node with specified +// Node implements database.NodeReader interface, retrieving the node with specified // node info. Don't modify the returned byte slice since it's not deep-copied // and still be referenced by database. func (r *reader) Node(owner common.Hash, path []byte, hash common.Hash) ([]byte, error) { @@ -84,8 +84,8 @@ func (r *reader) Node(owner common.Hash, path []byte, hash common.Hash) ([]byte, return blob, nil } -// Reader retrieves a layer belonging to the given state root. -func (db *Database) Reader(root common.Hash) (database.Reader, error) { +// NodeReader retrieves a layer belonging to the given state root. +func (db *Database) NodeReader(root common.Hash) (database.NodeReader, error) { layer := db.tree.get(root) if layer == nil { return nil, fmt.Errorf("state %#x is not available", root) diff --git a/triedb/pathdb/states.go b/triedb/pathdb/states.go new file mode 100644 index 0000000000..da8befab95 --- /dev/null +++ b/triedb/pathdb/states.go @@ -0,0 +1,166 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see + +package pathdb + +import ( + "fmt" + "io" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/metrics" + "github.com/ethereum/go-ethereum/rlp" +) + +// counter helps in tracking items and their corresponding sizes. +type counter struct { + n int + size int +} + +// add size to the counter and increase the item counter. +func (c *counter) add(size int) { + c.n++ + c.size += size +} + +// report uploads the cached statistics to meters. +func (c *counter) report(count metrics.Meter, size metrics.Meter) { + count.Mark(int64(c.n)) + size.Mark(int64(c.size)) +} + +// StateSetWithOrigin wraps the state set with additional original values of the +// mutated states. +type StateSetWithOrigin struct { + // AccountOrigin represents the account data before the state transition, + // corresponding to both the accountData and destructSet. It's keyed by the + // account address. The nil value means the account was not present before. + accountOrigin map[common.Address][]byte + + // StorageOrigin represents the storage data before the state transition, + // corresponding to storageData and deleted slots of destructSet. It's keyed + // by the account address and slot key hash. The nil value means the slot was + // not present. + storageOrigin map[common.Address]map[common.Hash][]byte + + // Memory size of the state data (accountOrigin and storageOrigin) + size uint64 +} + +// NewStateSetWithOrigin constructs the state set with the provided data. +func NewStateSetWithOrigin(accountOrigin map[common.Address][]byte, storageOrigin map[common.Address]map[common.Hash][]byte) *StateSetWithOrigin { + // Don't panic for the lazy callers, initialize the nil maps instead. + if accountOrigin == nil { + accountOrigin = make(map[common.Address][]byte) + } + if storageOrigin == nil { + storageOrigin = make(map[common.Address]map[common.Hash][]byte) + } + // Count the memory size occupied by the set. Note that each slot key here + // uses 2*common.HashLength to keep consistent with the calculation method + // of stateSet. + var size int + for _, data := range accountOrigin { + size += common.HashLength + len(data) + } + for _, slots := range storageOrigin { + for _, data := range slots { + size += 2*common.HashLength + len(data) + } + } + return &StateSetWithOrigin{ + accountOrigin: accountOrigin, + storageOrigin: storageOrigin, + size: uint64(size), + } +} + +// encode serializes the content of state set into the provided writer. +func (s *StateSetWithOrigin) encode(w io.Writer) error { + // Encode accounts + type Accounts struct { + Addresses []common.Address + Accounts [][]byte + } + var accounts Accounts + for address, blob := range s.accountOrigin { + accounts.Addresses = append(accounts.Addresses, address) + accounts.Accounts = append(accounts.Accounts, blob) + } + if err := rlp.Encode(w, accounts); err != nil { + return err + } + // Encode storages + type Storage struct { + Address common.Address + Keys []common.Hash + Blobs [][]byte + } + storages := make([]Storage, 0, len(s.storageOrigin)) + for address, slots := range s.storageOrigin { + keys := make([]common.Hash, 0, len(slots)) + vals := make([][]byte, 0, len(slots)) + for key, val := range slots { + keys = append(keys, key) + vals = append(vals, val) + } + storages = append(storages, Storage{Address: address, Keys: keys, Blobs: vals}) + } + return rlp.Encode(w, storages) +} + +// decode deserializes the content from the rlp stream into the state set. +func (s *StateSetWithOrigin) decode(r *rlp.Stream) error { + // Decode account origin + type Accounts struct { + Addresses []common.Address + Accounts [][]byte + } + var ( + accounts Accounts + accountSet = make(map[common.Address][]byte) + ) + if err := r.Decode(&accounts); err != nil { + return fmt.Errorf("load diff account origin set: %v", err) + } + for i := 0; i < len(accounts.Accounts); i++ { + accountSet[accounts.Addresses[i]] = accounts.Accounts[i] + } + s.accountOrigin = accountSet + + // Decode storage origin + type Storage struct { + Address common.Address + Keys []common.Hash + Blobs [][]byte + } + var ( + storages []Storage + storageSet = make(map[common.Address]map[common.Hash][]byte) + ) + if err := r.Decode(&storages); err != nil { + return fmt.Errorf("load diff storage origin: %v", err) + } + for _, storage := range storages { + storageSet[storage.Address] = make(map[common.Hash][]byte) + for i := 0; i < len(storage.Keys); i++ { + storageSet[storage.Address][storage.Keys[i]] = storage.Blobs[i] + } + } + s.storageOrigin = storageSet + return nil +} diff --git a/triedb/states.go b/triedb/states.go new file mode 100644 index 0000000000..1f9a0de522 --- /dev/null +++ b/triedb/states.go @@ -0,0 +1,51 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see + +package triedb + +import ( + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/triedb/pathdb" +) + +// StateSet represents a collection of mutated states during a state transition. +type StateSet struct { + Destructs map[common.Hash]struct{} // Destructed accounts + Accounts map[common.Hash][]byte // Mutated accounts in 'slim RLP' encoding + AccountsOrigin map[common.Address][]byte // Original values of mutated accounts in 'slim RLP' encoding + Storages map[common.Hash]map[common.Hash][]byte // Mutated storage slots in 'prefix-zero-trimmed' RLP format + StoragesOrigin map[common.Address]map[common.Hash][]byte // Original values of mutated storage slots in 'prefix-zero-trimmed' RLP format +} + +// NewStateSet initializes an empty state set. +func NewStateSet() *StateSet { + return &StateSet{ + Destructs: make(map[common.Hash]struct{}), + Accounts: make(map[common.Hash][]byte), + AccountsOrigin: make(map[common.Address][]byte), + Storages: make(map[common.Hash]map[common.Hash][]byte), + StoragesOrigin: make(map[common.Address]map[common.Hash][]byte), + } +} + +// internal returns a state set for path database internal usage. +func (set *StateSet) internal() *pathdb.StateSetWithOrigin { + // the nil state set is possible in tests. + if set == nil { + return nil + } + return pathdb.NewStateSetWithOrigin(set.AccountsOrigin, set.StoragesOrigin) +}