From 1b1611b8d003007eed45d36f3f30c0596ffa16a4 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Tue, 17 Oct 2023 20:09:25 +0800 Subject: [PATCH] core, trie, eth: refactor stacktrie constructor (#28350) This change enhances the stacktrie constructor by introducing an option struct. It also simplifies the `Hash` and `Commit` operations, getting rid of the special handling round root node. --- core/state/snapshot/conversion.go | 16 +-- core/state/statedb.go | 4 +- eth/protocols/snap/sync.go | 51 ++++++---- tests/fuzzers/stacktrie/trie_fuzzer.go | 11 +- trie/stacktrie.go | 135 ++++++++++++------------- trie/trie_test.go | 20 ++-- 6 files changed, 119 insertions(+), 118 deletions(-) diff --git a/core/state/snapshot/conversion.go b/core/state/snapshot/conversion.go index 321bfbc6a2..681be7ebc0 100644 --- a/core/state/snapshot/conversion.go +++ b/core/state/snapshot/conversion.go @@ -362,21 +362,15 @@ func generateTrieRoot(db ethdb.KeyValueWriter, scheme string, it Iterator, accou } func stackTrieGenerate(db ethdb.KeyValueWriter, scheme string, owner common.Hash, in chan trieKV, out chan common.Hash) { - var nodeWriter trie.NodeWriteFunc + options := trie.NewStackTrieOptions() if db != nil { - nodeWriter = func(path []byte, hash common.Hash, blob []byte) { + options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { rawdb.WriteTrieNode(db, owner, path, hash, blob, scheme) - } + }) } - t := trie.NewStackTrie(nodeWriter) + t := trie.NewStackTrie(options) for leaf := range in { t.Update(leaf.key[:], leaf.value) } - var root common.Hash - if db == nil { - root = t.Hash() - } else { - root, _ = t.Commit() - } - out <- root + out <- t.Commit() } diff --git a/core/state/statedb.go b/core/state/statedb.go index d28cd29b30..195e463c28 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -964,10 +964,12 @@ func (s *StateDB) fastDeleteStorage(addrHash common.Hash, root common.Hash) (boo nodes = trienode.NewNodeSet(addrHash) slots = make(map[common.Hash][]byte) ) - stack := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) { + options := trie.NewStackTrieOptions() + options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { nodes.AddNode(path, trienode.NewDeleted()) size += common.StorageSize(len(path)) }) + stack := trie.NewStackTrie(options) for iter.Next() { if size > storageDeleteLimit { return true, size, nil, nil, nil diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index 22638d0473..041bf32b99 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -738,9 +738,11 @@ func (s *Syncer) loadSyncStatus() { s.accountBytes += common.StorageSize(len(key) + len(value)) }, } - task.genTrie = trie.NewStackTrie(func(path []byte, hash common.Hash, val []byte) { - rawdb.WriteTrieNode(task.genBatch, common.Hash{}, path, hash, val, s.scheme) + options := trie.NewStackTrieOptions() + options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { + rawdb.WriteTrieNode(task.genBatch, common.Hash{}, path, hash, blob, s.scheme) }) + task.genTrie = trie.NewStackTrie(options) for accountHash, subtasks := range task.SubTasks { for _, subtask := range subtasks { subtask := subtask // closure for subtask.genBatch in the stacktrie writer callback @@ -752,9 +754,11 @@ func (s *Syncer) loadSyncStatus() { }, } owner := accountHash // local assignment for stacktrie writer closure - subtask.genTrie = trie.NewStackTrie(func(path []byte, hash common.Hash, val []byte) { - rawdb.WriteTrieNode(subtask.genBatch, owner, path, hash, val, s.scheme) + options := trie.NewStackTrieOptions() + options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { + rawdb.WriteTrieNode(subtask.genBatch, owner, path, hash, blob, s.scheme) }) + subtask.genTrie = trie.NewStackTrie(options) } } } @@ -806,14 +810,16 @@ func (s *Syncer) loadSyncStatus() { s.accountBytes += common.StorageSize(len(key) + len(value)) }, } + options := trie.NewStackTrieOptions() + options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { + rawdb.WriteTrieNode(batch, common.Hash{}, path, hash, blob, s.scheme) + }) s.tasks = append(s.tasks, &accountTask{ Next: next, Last: last, SubTasks: make(map[common.Hash][]*storageTask), genBatch: batch, - genTrie: trie.NewStackTrie(func(path []byte, hash common.Hash, val []byte) { - rawdb.WriteTrieNode(batch, common.Hash{}, path, hash, val, s.scheme) - }), + genTrie: trie.NewStackTrie(options), }) log.Debug("Created account sync task", "from", next, "last", last) next = common.BigToHash(new(big.Int).Add(last.Big(), common.Big1)) @@ -2006,14 +2012,16 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { }, } owner := account // local assignment for stacktrie writer closure + options := trie.NewStackTrieOptions() + options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { + rawdb.WriteTrieNode(batch, owner, path, hash, blob, s.scheme) + }) tasks = append(tasks, &storageTask{ Next: common.Hash{}, Last: r.End(), root: acc.Root, genBatch: batch, - genTrie: trie.NewStackTrie(func(path []byte, hash common.Hash, val []byte) { - rawdb.WriteTrieNode(batch, owner, path, hash, val, s.scheme) - }), + genTrie: trie.NewStackTrie(options), }) for r.Next() { batch := ethdb.HookedBatch{ @@ -2022,14 +2030,16 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { s.storageBytes += common.StorageSize(len(key) + len(value)) }, } + options := trie.NewStackTrieOptions() + options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { + rawdb.WriteTrieNode(batch, owner, path, hash, blob, s.scheme) + }) tasks = append(tasks, &storageTask{ Next: r.Start(), Last: r.End(), root: acc.Root, genBatch: batch, - genTrie: trie.NewStackTrie(func(path []byte, hash common.Hash, val []byte) { - rawdb.WriteTrieNode(batch, owner, path, hash, val, s.scheme) - }), + genTrie: trie.NewStackTrie(options), }) } for _, task := range tasks { @@ -2075,9 +2085,11 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { if i < len(res.hashes)-1 || res.subTask == nil { // no need to make local reassignment of account: this closure does not outlive the loop - tr := trie.NewStackTrie(func(path []byte, hash common.Hash, val []byte) { - rawdb.WriteTrieNode(batch, account, path, hash, val, s.scheme) + options := trie.NewStackTrieOptions() + options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { + rawdb.WriteTrieNode(batch, account, path, hash, blob, s.scheme) }) + tr := trie.NewStackTrie(options) for j := 0; j < len(res.hashes[i]); j++ { tr.Update(res.hashes[i][j][:], res.slots[i][j]) } @@ -2099,9 +2111,8 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { // Large contracts could have generated new trie nodes, flush them to disk if res.subTask != nil { if res.subTask.done { - if root, err := res.subTask.genTrie.Commit(); err != nil { - log.Error("Failed to commit stack slots", "err", err) - } else if root == res.subTask.root { + root := res.subTask.genTrie.Commit() + if root == res.subTask.root { // If the chunk's root is an overflown but full delivery, clear the heal request for i, account := range res.mainTask.res.hashes { if account == res.accounts[len(res.accounts)-1] { @@ -2317,9 +2328,7 @@ func (s *Syncer) forwardAccountTask(task *accountTask) { // flush after finalizing task.done. It's fine even if we crash and lose this // write as it will only cause more data to be downloaded during heal. if task.done { - if _, err := task.genTrie.Commit(); err != nil { - log.Error("Failed to commit stack account", "err", err) - } + task.genTrie.Commit() } if task.genBatch.ValueSize() > ethdb.IdealBatchSize || task.done { if err := task.genBatch.Write(); err != nil { diff --git a/tests/fuzzers/stacktrie/trie_fuzzer.go b/tests/fuzzers/stacktrie/trie_fuzzer.go index 20b8ca24b3..b818f2ec3c 100644 --- a/tests/fuzzers/stacktrie/trie_fuzzer.go +++ b/tests/fuzzers/stacktrie/trie_fuzzer.go @@ -140,9 +140,11 @@ func (f *fuzzer) fuzz() int { trieA = trie.NewEmpty(dbA) spongeB = &spongeDb{sponge: sha3.NewLegacyKeccak256()} dbB = trie.NewDatabase(rawdb.NewDatabase(spongeB), nil) - trieB = trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) { + + options = trie.NewStackTrieOptions().WithWriter(func(path []byte, hash common.Hash, blob []byte) { rawdb.WriteTrieNode(spongeB, common.Hash{}, path, hash, blob, dbB.Scheme()) }) + trieB = trie.NewStackTrie(options) vals []kv useful bool maxElements = 10000 @@ -204,19 +206,20 @@ func (f *fuzzer) fuzz() int { // Ensure all the nodes are persisted correctly var ( - nodeset = make(map[string][]byte) // path -> blob - trieC = trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) { + nodeset = make(map[string][]byte) // path -> blob + optionsC = trie.NewStackTrieOptions().WithWriter(func(path []byte, hash common.Hash, blob []byte) { if crypto.Keccak256Hash(blob) != hash { panic("invalid node blob") } nodeset[string(path)] = common.CopyBytes(blob) }) + trieC = trie.NewStackTrie(optionsC) checked int ) for _, kv := range vals { trieC.MustUpdate(kv.k, kv.v) } - rootC, _ := trieC.Commit() + rootC := trieC.Commit() if rootA != rootC { panic(fmt.Sprintf("roots differ: (trie) %x != %x (stacktrie)", rootA, rootC)) } diff --git a/trie/stacktrie.go b/trie/stacktrie.go index 35208e1cb3..09c3402663 100644 --- a/trie/stacktrie.go +++ b/trie/stacktrie.go @@ -17,7 +17,6 @@ package trie import ( - "errors" "sync" "github.com/ethereum/go-ethereum/common" @@ -26,28 +25,40 @@ import ( ) var ( - ErrCommitDisabled = errors.New("no database for committing") - stPool = sync.Pool{New: func() any { return new(stNode) }} - _ = types.TrieHasher((*StackTrie)(nil)) + stPool = sync.Pool{New: func() any { return new(stNode) }} + _ = types.TrieHasher((*StackTrie)(nil)) ) -// NodeWriteFunc is used to provide all information of a dirty node for committing -// so that callers can flush nodes into database with desired scheme. -type NodeWriteFunc = func(path []byte, hash common.Hash, blob []byte) +// StackTrieOptions contains the configured options for manipulating the stackTrie. +type StackTrieOptions struct { + Writer func(path []byte, hash common.Hash, blob []byte) // The function to commit the dirty nodes +} + +// NewStackTrieOptions initializes an empty options for stackTrie. +func NewStackTrieOptions() *StackTrieOptions { return &StackTrieOptions{} } + +// WithWriter configures trie node writer within the options. +func (o *StackTrieOptions) WithWriter(writer func(path []byte, hash common.Hash, blob []byte)) *StackTrieOptions { + o.Writer = writer + return o +} // StackTrie is a trie implementation that expects keys to be inserted // in order. Once it determines that a subtree will no longer be inserted // into, it will hash it and free up the memory it uses. type StackTrie struct { - writeFn NodeWriteFunc // function for committing nodes, can be nil + options *StackTrieOptions root *stNode h *hasher } // NewStackTrie allocates and initializes an empty trie. -func NewStackTrie(writeFn NodeWriteFunc) *StackTrie { +func NewStackTrie(options *StackTrieOptions) *StackTrie { + if options == nil { + options = NewStackTrieOptions() + } return &StackTrie{ - writeFn: writeFn, + options: options, root: stPool.Get().(*stNode), h: newHasher(false), } @@ -59,7 +70,9 @@ func (t *StackTrie) Update(key, value []byte) error { if len(value) == 0 { panic("deletion not supported") } - t.insert(t.root, k[:len(k)-1], value, nil) + k = k[:len(k)-1] // chop the termination flag + + t.insert(t.root, k, value, nil) return nil } @@ -71,8 +84,9 @@ func (t *StackTrie) MustUpdate(key, value []byte) { } } +// Reset resets the stack trie object to empty state. func (t *StackTrie) Reset() { - t.writeFn = nil + t.options = NewStackTrieOptions() t.root = stPool.Get().(*stNode) } @@ -138,7 +152,7 @@ func (n *stNode) getDiffIndex(key []byte) int { // Helper function to that inserts a (key, value) pair into // the trie. -func (t *StackTrie) insert(st *stNode, key, value []byte, prefix []byte) { +func (t *StackTrie) insert(st *stNode, key, value []byte, path []byte) { switch st.typ { case branchNode: /* Branch */ idx := int(key[0]) @@ -147,7 +161,7 @@ func (t *StackTrie) insert(st *stNode, key, value []byte, prefix []byte) { for i := idx - 1; i >= 0; i-- { if st.children[i] != nil { if st.children[i].typ != hashedNode { - t.hash(st.children[i], append(prefix, byte(i))) + t.hash(st.children[i], append(path, byte(i))) } break } @@ -157,7 +171,7 @@ func (t *StackTrie) insert(st *stNode, key, value []byte, prefix []byte) { if st.children[idx] == nil { st.children[idx] = newLeaf(key[1:], value) } else { - t.insert(st.children[idx], key[1:], value, append(prefix, key[0])) + t.insert(st.children[idx], key[1:], value, append(path, key[0])) } case extNode: /* Ext */ @@ -172,7 +186,7 @@ func (t *StackTrie) insert(st *stNode, key, value []byte, prefix []byte) { if diffidx == len(st.key) { // Ext key and key segment are identical, recurse into // the child node. - t.insert(st.children[0], key[diffidx:], value, append(prefix, key[:diffidx]...)) + t.insert(st.children[0], key[diffidx:], value, append(path, key[:diffidx]...)) return } // Save the original part. Depending if the break is @@ -185,14 +199,14 @@ func (t *StackTrie) insert(st *stNode, key, value []byte, prefix []byte) { // extension. The path prefix of the newly-inserted // extension should also contain the different byte. n = newExt(st.key[diffidx+1:], st.children[0]) - t.hash(n, append(prefix, st.key[:diffidx+1]...)) + t.hash(n, append(path, st.key[:diffidx+1]...)) } else { // Break on the last byte, no need to insert // an extension node: reuse the current node. // The path prefix of the original part should // still be same. n = st.children[0] - t.hash(n, append(prefix, st.key...)) + t.hash(n, append(path, st.key...)) } var p *stNode if diffidx == 0 { @@ -257,7 +271,7 @@ func (t *StackTrie) insert(st *stNode, key, value []byte, prefix []byte) { // is hashed directly in order to free up some memory. origIdx := st.key[diffidx] p.children[origIdx] = newLeaf(st.key[diffidx+1:], st.val) - t.hash(p.children[origIdx], append(prefix, st.key[:diffidx+1]...)) + t.hash(p.children[origIdx], append(path, st.key[:diffidx+1]...)) newIdx := key[diffidx] p.children[newIdx] = newLeaf(key[diffidx+1:], value) @@ -292,8 +306,7 @@ func (t *StackTrie) insert(st *stNode, key, value []byte, prefix []byte) { // // This method also sets 'st.type' to hashedNode, and clears 'st.key'. func (t *StackTrie) hash(st *stNode, path []byte) { - // The switch below sets this to the RLP-encoding of this node. - var encodedNode []byte + var blob []byte // RLP-encoded node blob switch st.typ { case hashedNode: @@ -323,11 +336,13 @@ func (t *StackTrie) hash(st *stNode, path []byte) { stPool.Put(child.reset()) // Release child back to pool. } nodes.encode(t.h.encbuf) - encodedNode = t.h.encodedBytes() + blob = t.h.encodedBytes() case extNode: + // recursively hash and commit child as the first step t.hash(st.children[0], append(path, st.key...)) + // encode the extension node n := shortNode{Key: hexToCompactInPlace(st.key)} if len(st.children[0].val) < 32 { n.Val = rawNode(st.children[0].val) @@ -335,7 +350,7 @@ func (t *StackTrie) hash(st *stNode, path []byte) { n.Val = hashNode(st.children[0].val) } n.encode(t.h.encbuf) - encodedNode = t.h.encodedBytes() + blob = t.h.encodedBytes() stPool.Put(st.children[0].reset()) // Release child back to pool. st.children[0] = nil @@ -345,7 +360,7 @@ func (t *StackTrie) hash(st *stNode, path []byte) { n := shortNode{Key: hexToCompactInPlace(st.key), Val: valueNode(st.val)} n.encode(t.h.encbuf) - encodedNode = t.h.encodedBytes() + blob = t.h.encodedBytes() default: panic("invalid node type") @@ -353,60 +368,38 @@ func (t *StackTrie) hash(st *stNode, path []byte) { st.typ = hashedNode st.key = st.key[:0] - if len(encodedNode) < 32 { - st.val = common.CopyBytes(encodedNode) + + // Skip committing the non-root node if the size is smaller than 32 bytes. + if len(blob) < 32 && len(path) > 0 { + st.val = common.CopyBytes(blob) return } - // Write the hash to the 'val'. We allocate a new val here to not mutate - // input values - st.val = t.h.hashData(encodedNode) - if t.writeFn != nil { - t.writeFn(path, common.BytesToHash(st.val), encodedNode) + // input values. + st.val = t.h.hashData(blob) + + // Commit the trie node if the writer is configured. + if t.options.Writer != nil { + t.options.Writer(path, common.BytesToHash(st.val), blob) } } -// Hash returns the hash of the current node. -func (t *StackTrie) Hash() (h common.Hash) { - st := t.root - t.hash(st, nil) - if len(st.val) == 32 { - copy(h[:], st.val) - return h - } - // If the node's RLP isn't 32 bytes long, the node will not - // be hashed, and instead contain the rlp-encoding of the - // node. For the top level node, we need to force the hashing. - t.h.sha.Reset() - t.h.sha.Write(st.val) - t.h.sha.Read(h[:]) - return h +// Hash will firstly hash the entire trie if it's still not hashed and then commit +// all nodes to the associated database. Actually most of the trie nodes have been +// committed already. The main purpose here is to commit the nodes on right boundary. +// +// For stack trie, Hash and Commit are functionally identical. +func (t *StackTrie) Hash() common.Hash { + n := t.root + t.hash(n, nil) + return common.BytesToHash(n.val) } -// Commit will firstly hash the entire trie if it's still not hashed -// and then commit all nodes to the associated database. Actually most -// of the trie nodes MAY have been committed already. The main purpose -// here is to commit the root node. +// Commit will firstly hash the entire trie if it's still not hashed and then commit +// all nodes to the associated database. Actually most of the trie nodes have been +// committed already. The main purpose here is to commit the nodes on right boundary. // -// The associated database is expected, otherwise the whole commit -// functionality should be disabled. -func (t *StackTrie) Commit() (h common.Hash, err error) { - if t.writeFn == nil { - return common.Hash{}, ErrCommitDisabled - } - st := t.root - t.hash(st, nil) - if len(st.val) == 32 { - copy(h[:], st.val) - return h, nil - } - // If the node's RLP isn't 32 bytes long, the node will not - // be hashed (and committed), and instead contain the rlp-encoding of the - // node. For the top level node, we need to force the hashing+commit. - t.h.sha.Reset() - t.h.sha.Write(st.val) - t.h.sha.Read(h[:]) - - t.writeFn(nil, h, st.val) - return h, nil +// For stack trie, Hash and Commit are functionally identical. +func (t *StackTrie) Commit() common.Hash { + return t.Hash() } diff --git a/trie/trie_test.go b/trie/trie_test.go index 2dfe81ef81..4315753548 100644 --- a/trie/trie_test.go +++ b/trie/trie_test.go @@ -912,9 +912,12 @@ func TestCommitSequenceStackTrie(t *testing.T) { trie := NewEmpty(db) // Another sponge is used for the stacktrie commits stackTrieSponge := &spongeDb{sponge: sha3.NewLegacyKeccak256(), id: "b"} - stTrie := NewStackTrie(func(path []byte, hash common.Hash, blob []byte) { + + options := NewStackTrieOptions() + options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { rawdb.WriteTrieNode(stackTrieSponge, common.Hash{}, path, hash, blob, db.Scheme()) }) + stTrie := NewStackTrie(options) // Fill the trie with elements for i := 0; i < count; i++ { // For the stack trie, we need to do inserts in proper order @@ -937,10 +940,7 @@ func TestCommitSequenceStackTrie(t *testing.T) { db.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil) db.Commit(root, false) // And flush stacktrie -> disk - stRoot, err := stTrie.Commit() - if err != nil { - t.Fatalf("Failed to commit stack trie %v", err) - } + stRoot := stTrie.Commit() if stRoot != root { t.Fatalf("root wrong, got %x exp %x", stRoot, root) } @@ -971,9 +971,12 @@ func TestCommitSequenceSmallRoot(t *testing.T) { trie := NewEmpty(db) // Another sponge is used for the stacktrie commits stackTrieSponge := &spongeDb{sponge: sha3.NewLegacyKeccak256(), id: "b"} - stTrie := NewStackTrie(func(path []byte, hash common.Hash, blob []byte) { + + options := NewStackTrieOptions() + options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { rawdb.WriteTrieNode(stackTrieSponge, common.Hash{}, path, hash, blob, db.Scheme()) }) + stTrie := NewStackTrie(options) // Add a single small-element to the trie(s) key := make([]byte, 5) key[0] = 1 @@ -985,10 +988,7 @@ func TestCommitSequenceSmallRoot(t *testing.T) { db.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil) db.Commit(root, false) // And flush stacktrie -> disk - stRoot, err := stTrie.Commit() - if err != nil { - t.Fatalf("Failed to commit stack trie %v", err) - } + stRoot := stTrie.Commit() if stRoot != root { t.Fatalf("root wrong, got %x exp %x", stRoot, root) }