diff --git a/core/state/snapshot/conversion.go b/core/state/snapshot/conversion.go index 681be7ebc0..8a0fd1989a 100644 --- a/core/state/snapshot/conversion.go +++ b/core/state/snapshot/conversion.go @@ -362,15 +362,15 @@ func generateTrieRoot(db ethdb.KeyValueWriter, scheme string, it Iterator, accou } func stackTrieGenerate(db ethdb.KeyValueWriter, scheme string, owner common.Hash, in chan trieKV, out chan common.Hash) { - options := trie.NewStackTrieOptions() + var onTrieNode trie.OnTrieNode if db != nil { - options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { + onTrieNode = func(path []byte, hash common.Hash, blob []byte) { rawdb.WriteTrieNode(db, owner, path, hash, blob, scheme) - }) + } } - t := trie.NewStackTrie(options) + t := trie.NewStackTrie(onTrieNode) for leaf := range in { t.Update(leaf.key[:], leaf.value) } - out <- t.Commit() + out <- t.Hash() } diff --git a/core/state/statedb.go b/core/state/statedb.go index a4b8cf93e2..ebd2143882 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -961,12 +961,10 @@ func (s *StateDB) fastDeleteStorage(addrHash common.Hash, root common.Hash) (boo nodes = trienode.NewNodeSet(addrHash) slots = make(map[common.Hash][]byte) ) - options := trie.NewStackTrieOptions() - options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { + stack := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) { nodes.AddNode(path, trienode.NewDeleted()) size += common.StorageSize(len(path)) }) - stack := trie.NewStackTrie(options) for iter.Next() { if size > storageDeleteLimit { return true, size, nil, nil, nil diff --git a/eth/protocols/snap/gentrie.go b/eth/protocols/snap/gentrie.go new file mode 100644 index 0000000000..8ef1a00753 --- /dev/null +++ b/eth/protocols/snap/gentrie.go @@ -0,0 +1,287 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snap + +import ( + "bytes" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/trie" +) + +// genTrie interface is used by the snap syncer to generate merkle tree nodes +// based on a received batch of states. +type genTrie interface { + // update inserts the state item into generator trie. + update(key, value []byte) error + + // commit flushes the right boundary nodes if complete flag is true. This + // function must be called before flushing the associated database batch. + commit(complete bool) common.Hash +} + +// pathTrie is a wrapper over the stackTrie, incorporating numerous additional +// logics to handle the semi-completed trie and potential leftover dangling +// nodes in the database. It is utilized for constructing the merkle tree nodes +// in path mode during the snap sync process. +type pathTrie struct { + owner common.Hash // identifier of trie owner, empty for account trie + tr *trie.StackTrie // underlying raw stack trie + first []byte // the path of first committed node by stackTrie + last []byte // the path of last committed node by stackTrie + + // This flag indicates whether nodes on the left boundary are skipped for + // committing. If set, the left boundary nodes are considered incomplete + // due to potentially missing left children. + skipLeftBoundary bool + db ethdb.KeyValueReader + batch ethdb.Batch +} + +// newPathTrie initializes the path trie. +func newPathTrie(owner common.Hash, skipLeftBoundary bool, db ethdb.KeyValueReader, batch ethdb.Batch) *pathTrie { + tr := &pathTrie{ + owner: owner, + skipLeftBoundary: skipLeftBoundary, + db: db, + batch: batch, + } + tr.tr = trie.NewStackTrie(tr.onTrieNode) + return tr +} + +// onTrieNode is invoked whenever a new node is committed by the stackTrie. +// +// As the committed nodes might be incomplete if they are on the boundaries +// (left or right), this function has the ability to detect the incomplete +// ones and filter them out for committing. +// +// Additionally, the assumption is made that there may exist leftover dangling +// nodes in the database. This function has the ability to detect the dangling +// nodes that fall within the path space of committed nodes (specifically on +// the path covered by internal extension nodes) and remove them from the +// database. This property ensures that the entire path space is uniquely +// occupied by committed nodes. +// +// Furthermore, all leftover dangling nodes along the path from committed nodes +// to the trie root (left and right boundaries) should be removed as well; +// otherwise, they might potentially disrupt the state healing process. +func (t *pathTrie) onTrieNode(path []byte, hash common.Hash, blob []byte) { + // Filter out the nodes on the left boundary if skipLeftBoundary is + // configured. Nodes are considered to be on the left boundary if + // it's the first one to be committed, or the parent/ancestor of the + // first committed node. + if t.skipLeftBoundary && (t.first == nil || bytes.HasPrefix(t.first, path)) { + if t.first == nil { + // Memorize the path of first committed node, which is regarded + // as left boundary. Deep-copy is necessary as the path given + // is volatile. + t.first = append([]byte{}, path...) + + // The left boundary can be uniquely determined by the first committed node + // from stackTrie (e.g., N_1), as the shared path prefix between the first + // two inserted state items is deterministic (the path of N_3). The path + // from trie root towards the first committed node is considered the left + // boundary. The potential leftover dangling nodes on left boundary should + // be cleaned out. + // + // +-----+ + // | N_3 | shared path prefix of state_1 and state_2 + // +-----+ + // /- -\ + // +-----+ +-----+ + // First committed node | N_1 | | N_2 | latest inserted node (contain state_2) + // +-----+ +-----+ + // + // The node with the path of the first committed one (e.g, N_1) is not + // removed because it's a sibling of the nodes we want to commit, not + // the parent or ancestor. + for i := 0; i < len(path); i++ { + t.delete(path[:i], false) + } + } + return + } + // If boundary filtering is not configured, or the node is not on the left + // boundary, commit it to database. + // + // Note: If the current committed node is an extension node, then the nodes + // falling within the path between itself and its standalone (not embedded + // in parent) child should be cleaned out for exclusively occupy the inner + // path. + // + // This is essential in snap sync to avoid leaving dangling nodes within + // this range covered by extension node which could potentially break the + // state healing. + // + // The extension node is detected if its path is the prefix of last committed + // one and path gap is larger than one. If the path gap is only one byte, + // the current node could either be a full node, or a extension with single + // byte key. In either case, no gaps will be left in the path. + if t.last != nil && bytes.HasPrefix(t.last, path) && len(t.last)-len(path) > 1 { + for i := len(path) + 1; i < len(t.last); i++ { + t.delete(t.last[:i], true) + } + } + t.write(path, blob) + + // Update the last flag. Deep-copy is necessary as the provided path is volatile. + if t.last == nil { + t.last = append([]byte{}, path...) + } else { + t.last = append(t.last[:0], path...) + } +} + +// write commits the node write to provided database batch in path mode. +func (t *pathTrie) write(path []byte, blob []byte) { + if t.owner == (common.Hash{}) { + rawdb.WriteAccountTrieNode(t.batch, path, blob) + } else { + rawdb.WriteStorageTrieNode(t.batch, t.owner, path, blob) + } +} + +func (t *pathTrie) deleteAccountNode(path []byte, inner bool) { + if inner { + accountInnerLookupGauge.Inc(1) + } else { + accountOuterLookupGauge.Inc(1) + } + if !rawdb.ExistsAccountTrieNode(t.db, path) { + return + } + if inner { + accountInnerDeleteGauge.Inc(1) + } else { + accountOuterDeleteGauge.Inc(1) + } + rawdb.DeleteAccountTrieNode(t.batch, path) +} + +func (t *pathTrie) deleteStorageNode(path []byte, inner bool) { + if inner { + storageInnerLookupGauge.Inc(1) + } else { + storageOuterLookupGauge.Inc(1) + } + if !rawdb.ExistsStorageTrieNode(t.db, t.owner, path) { + return + } + if inner { + storageInnerDeleteGauge.Inc(1) + } else { + storageOuterDeleteGauge.Inc(1) + } + rawdb.DeleteStorageTrieNode(t.batch, t.owner, path) +} + +// delete commits the node deletion to provided database batch in path mode. +func (t *pathTrie) delete(path []byte, inner bool) { + if t.owner == (common.Hash{}) { + t.deleteAccountNode(path, inner) + } else { + t.deleteStorageNode(path, inner) + } +} + +// update implements genTrie interface, inserting a (key, value) pair into the +// stack trie. +func (t *pathTrie) update(key, value []byte) error { + return t.tr.Update(key, value) +} + +// commit implements genTrie interface, flushing the right boundary if it's +// considered as complete. Otherwise, the nodes on the right boundary are +// discarded and cleaned up. +// +// Note, this function must be called before flushing database batch, otherwise, +// dangling nodes might be left in database. +func (t *pathTrie) commit(complete bool) common.Hash { + // If the right boundary is claimed as complete, flush them out. + // The nodes on both left and right boundary will still be filtered + // out if left boundary filtering is configured. + if complete { + // Commit all inserted but not yet committed nodes(on the right + // boundary) in the stackTrie. + hash := t.tr.Hash() + if t.skipLeftBoundary { + return common.Hash{} // hash is meaningless if left side is incomplete + } + return hash + } + // Discard nodes on the right boundary as it's claimed as incomplete. These + // nodes might be incomplete due to missing children on the right side. + // Furthermore, the potential leftover nodes on right boundary should also + // be cleaned out. + // + // The right boundary can be uniquely determined by the last committed node + // from stackTrie (e.g., N_1), as the shared path prefix between the last + // two inserted state items is deterministic (the path of N_3). The path + // from trie root towards the last committed node is considered the right + // boundary (root to N_3). + // + // +-----+ + // | N_3 | shared path prefix of last two states + // +-----+ + // /- -\ + // +-----+ +-----+ + // Last committed node | N_1 | | N_2 | latest inserted node (contain last state) + // +-----+ +-----+ + // + // Another interesting scenario occurs when the trie is committed due to + // too many items being accumulated in the batch. To flush them out to + // the database, the path of the last inserted node (N_2) is temporarily + // treated as an incomplete right boundary, and nodes on this path are + // removed (e.g. from root to N_3). + // However, this path will be reclaimed as an internal path by inserting + // more items after the batch flush. New nodes on this path can be committed + // with no issues as they are actually complete. Also, from a database + // perspective, first deleting and then rewriting is a valid data update. + for i := 0; i < len(t.last); i++ { + t.delete(t.last[:i], false) + } + return common.Hash{} // the hash is meaningless for incomplete commit +} + +// hashTrie is a wrapper over the stackTrie for implementing genTrie interface. +type hashTrie struct { + tr *trie.StackTrie +} + +// newHashTrie initializes the hash trie. +func newHashTrie(batch ethdb.Batch) *hashTrie { + return &hashTrie{tr: trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) { + rawdb.WriteLegacyTrieNode(batch, hash, blob) + })} +} + +// update implements genTrie interface, inserting a (key, value) pair into +// the stack trie. +func (t *hashTrie) update(key, value []byte) error { + return t.tr.Update(key, value) +} + +// commit implements genTrie interface, committing the nodes on right boundary. +func (t *hashTrie) commit(complete bool) common.Hash { + if !complete { + return common.Hash{} // the hash is meaningless for incomplete commit + } + return t.tr.Hash() // return hash only if it's claimed as complete +} diff --git a/eth/protocols/snap/gentrie_test.go b/eth/protocols/snap/gentrie_test.go new file mode 100644 index 0000000000..1fb2dbce75 --- /dev/null +++ b/eth/protocols/snap/gentrie_test.go @@ -0,0 +1,553 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snap + +import ( + "bytes" + "math/rand" + "slices" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/internal/testrand" + "github.com/ethereum/go-ethereum/trie" +) + +type replayer struct { + paths []string // sort in fifo order + hashes []common.Hash // empty for deletion + unknowns int // counter for unknown write +} + +func newBatchReplay() *replayer { + return &replayer{} +} + +func (r *replayer) decode(key []byte, value []byte) { + account := rawdb.IsAccountTrieNode(key) + storage := rawdb.IsStorageTrieNode(key) + if !account && !storage { + r.unknowns += 1 + return + } + var path []byte + if account { + _, path = rawdb.ResolveAccountTrieNodeKey(key) + } else { + _, owner, inner := rawdb.ResolveStorageTrieNode(key) + path = append(owner.Bytes(), inner...) + } + r.paths = append(r.paths, string(path)) + + if len(value) == 0 { + r.hashes = append(r.hashes, common.Hash{}) + } else { + r.hashes = append(r.hashes, crypto.Keccak256Hash(value)) + } +} + +// updates returns a set of effective mutations. Multiple mutations targeting +// the same node path will be merged in FIFO order. +func (r *replayer) modifies() map[string]common.Hash { + set := make(map[string]common.Hash) + for i, path := range r.paths { + set[path] = r.hashes[i] + } + return set +} + +// updates returns the number of updates. +func (r *replayer) updates() int { + var count int + for _, hash := range r.modifies() { + if hash == (common.Hash{}) { + continue + } + count++ + } + return count +} + +// Put inserts the given value into the key-value data store. +func (r *replayer) Put(key []byte, value []byte) error { + r.decode(key, value) + return nil +} + +// Delete removes the key from the key-value data store. +func (r *replayer) Delete(key []byte) error { + r.decode(key, nil) + return nil +} + +func byteToHex(str []byte) []byte { + l := len(str) * 2 + var nibbles = make([]byte, l) + for i, b := range str { + nibbles[i*2] = b / 16 + nibbles[i*2+1] = b % 16 + } + return nibbles +} + +// innerNodes returns the internal nodes narrowed by two boundaries along with +// the leftmost and rightmost sub-trie roots. +func innerNodes(first, last []byte, includeLeft, includeRight bool, nodes map[string]common.Hash, t *testing.T) (map[string]common.Hash, []byte, []byte) { + var ( + leftRoot []byte + rightRoot []byte + firstHex = byteToHex(first) + lastHex = byteToHex(last) + inner = make(map[string]common.Hash) + ) + for path, hash := range nodes { + if hash == (common.Hash{}) { + t.Fatalf("Unexpected deletion, %v", []byte(path)) + } + // Filter out the siblings on the left side or the left boundary nodes. + if !includeLeft && (bytes.Compare(firstHex, []byte(path)) > 0 || bytes.HasPrefix(firstHex, []byte(path))) { + continue + } + // Filter out the siblings on the right side or the right boundary nodes. + if !includeRight && (bytes.Compare(lastHex, []byte(path)) < 0 || bytes.HasPrefix(lastHex, []byte(path))) { + continue + } + inner[path] = hash + + // Track the path of the leftmost sub trie root + if leftRoot == nil || bytes.Compare(leftRoot, []byte(path)) > 0 { + leftRoot = []byte(path) + } + // Track the path of the rightmost sub trie root + if rightRoot == nil || + (bytes.Compare(rightRoot, []byte(path)) < 0) || + (bytes.Compare(rightRoot, []byte(path)) > 0 && bytes.HasPrefix(rightRoot, []byte(path))) { + rightRoot = []byte(path) + } + } + return inner, leftRoot, rightRoot +} + +func buildPartial(owner common.Hash, db ethdb.KeyValueReader, batch ethdb.Batch, entries []*kv, first, last int) *replayer { + tr := newPathTrie(owner, first != 0, db, batch) + for i := first; i <= last; i++ { + tr.update(entries[i].k, entries[i].v) + } + tr.commit(last == len(entries)-1) + + replay := newBatchReplay() + batch.Replay(replay) + + return replay +} + +// TestPartialGentree verifies if the trie constructed with partial states can +// generate consistent trie nodes that match those of the full trie. +func TestPartialGentree(t *testing.T) { + for round := 0; round < 100; round++ { + var ( + n = rand.Intn(1024) + 10 + entries []*kv + ) + for i := 0; i < n; i++ { + var val []byte + if rand.Intn(3) == 0 { + val = testrand.Bytes(3) + } else { + val = testrand.Bytes(32) + } + entries = append(entries, &kv{ + k: testrand.Bytes(32), + v: val, + }) + } + slices.SortFunc(entries, (*kv).cmp) + + nodes := make(map[string]common.Hash) + tr := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) { + nodes[string(path)] = hash + }) + for i := 0; i < len(entries); i++ { + tr.Update(entries[i].k, entries[i].v) + } + tr.Hash() + + check := func(first, last int) { + var ( + db = rawdb.NewMemoryDatabase() + batch = db.NewBatch() + ) + // Build the partial tree with specific boundaries + r := buildPartial(common.Hash{}, db, batch, entries, first, last) + if r.unknowns > 0 { + t.Fatalf("Unknown database write: %d", r.unknowns) + } + + // Ensure all the internal nodes are produced + var ( + set = r.modifies() + inner, _, _ = innerNodes(entries[first].k, entries[last].k, first == 0, last == len(entries)-1, nodes, t) + ) + for path, hash := range inner { + if _, ok := set[path]; !ok { + t.Fatalf("Missing nodes %v", []byte(path)) + } + if hash != set[path] { + t.Fatalf("Inconsistent node, want %x, got: %x", hash, set[path]) + } + } + if r.updates() != len(inner) { + t.Fatalf("Unexpected node write detected, want: %d, got: %d", len(inner), r.updates()) + } + } + for j := 0; j < 100; j++ { + var ( + first int + last int + ) + for { + first = rand.Intn(len(entries)) + last = rand.Intn(len(entries)) + if first <= last { + break + } + } + check(first, last) + } + var cases = []struct { + first int + last int + }{ + {0, len(entries) - 1}, // full + {1, len(entries) - 1}, // no left + {2, len(entries) - 1}, // no left + {2, len(entries) - 2}, // no left and right + {2, len(entries) - 2}, // no left and right + {len(entries) / 2, len(entries) / 2}, // single + {0, 0}, // single first + {len(entries) - 1, len(entries) - 1}, // single last + } + for _, c := range cases { + check(c.first, c.last) + } + } +} + +// TestGentreeDanglingClearing tests if the dangling nodes falling within the +// path space of constructed tree can be correctly removed. +func TestGentreeDanglingClearing(t *testing.T) { + for round := 0; round < 100; round++ { + var ( + n = rand.Intn(1024) + 10 + entries []*kv + ) + for i := 0; i < n; i++ { + var val []byte + if rand.Intn(3) == 0 { + val = testrand.Bytes(3) + } else { + val = testrand.Bytes(32) + } + entries = append(entries, &kv{ + k: testrand.Bytes(32), + v: val, + }) + } + slices.SortFunc(entries, (*kv).cmp) + + nodes := make(map[string]common.Hash) + tr := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) { + nodes[string(path)] = hash + }) + for i := 0; i < len(entries); i++ { + tr.Update(entries[i].k, entries[i].v) + } + tr.Hash() + + check := func(first, last int) { + var ( + db = rawdb.NewMemoryDatabase() + batch = db.NewBatch() + ) + // Write the junk nodes as the dangling + var injects []string + for path := range nodes { + for i := 0; i < len(path); i++ { + _, ok := nodes[path[:i]] + if ok { + continue + } + injects = append(injects, path[:i]) + } + } + if len(injects) == 0 { + return + } + for _, path := range injects { + rawdb.WriteAccountTrieNode(db, []byte(path), testrand.Bytes(32)) + } + + // Build the partial tree with specific range + replay := buildPartial(common.Hash{}, db, batch, entries, first, last) + if replay.unknowns > 0 { + t.Fatalf("Unknown database write: %d", replay.unknowns) + } + set := replay.modifies() + + // Make sure the injected junks falling within the path space of + // committed trie nodes are correctly deleted. + _, leftRoot, rightRoot := innerNodes(entries[first].k, entries[last].k, first == 0, last == len(entries)-1, nodes, t) + for _, path := range injects { + if bytes.Compare([]byte(path), leftRoot) < 0 && !bytes.HasPrefix(leftRoot, []byte(path)) { + continue + } + if bytes.Compare([]byte(path), rightRoot) > 0 { + continue + } + if hash, ok := set[path]; !ok || hash != (common.Hash{}) { + t.Fatalf("Missing delete, %v", []byte(path)) + } + } + } + for j := 0; j < 100; j++ { + var ( + first int + last int + ) + for { + first = rand.Intn(len(entries)) + last = rand.Intn(len(entries)) + if first <= last { + break + } + } + check(first, last) + } + var cases = []struct { + first int + last int + }{ + {0, len(entries) - 1}, // full + {1, len(entries) - 1}, // no left + {2, len(entries) - 1}, // no left + {2, len(entries) - 2}, // no left and right + {2, len(entries) - 2}, // no left and right + {len(entries) / 2, len(entries) / 2}, // single + {0, 0}, // single first + {len(entries) - 1, len(entries) - 1}, // single last + } + for _, c := range cases { + check(c.first, c.last) + } + } +} + +// TestFlushPartialTree tests the gentrie can produce complete inner trie nodes +// even with lots of batch flushes. +func TestFlushPartialTree(t *testing.T) { + var entries []*kv + for i := 0; i < 1024; i++ { + var val []byte + if rand.Intn(3) == 0 { + val = testrand.Bytes(3) + } else { + val = testrand.Bytes(32) + } + entries = append(entries, &kv{ + k: testrand.Bytes(32), + v: val, + }) + } + slices.SortFunc(entries, (*kv).cmp) + + nodes := make(map[string]common.Hash) + tr := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) { + nodes[string(path)] = hash + }) + for i := 0; i < len(entries); i++ { + tr.Update(entries[i].k, entries[i].v) + } + tr.Hash() + + var cases = []struct { + first int + last int + }{ + {0, len(entries) - 1}, // full + {1, len(entries) - 1}, // no left + {10, len(entries) - 1}, // no left + {10, len(entries) - 2}, // no left and right + {10, len(entries) - 10}, // no left and right + {11, 11}, // single + {0, 0}, // single first + {len(entries) - 1, len(entries) - 1}, // single last + } + for _, c := range cases { + var ( + db = rawdb.NewMemoryDatabase() + batch = db.NewBatch() + combined = db.NewBatch() + ) + inner, _, _ := innerNodes(entries[c.first].k, entries[c.last].k, c.first == 0, c.last == len(entries)-1, nodes, t) + + tr := newPathTrie(common.Hash{}, c.first != 0, db, batch) + for i := c.first; i <= c.last; i++ { + tr.update(entries[i].k, entries[i].v) + if rand.Intn(2) == 0 { + tr.commit(false) + + batch.Replay(combined) + batch.Write() + batch.Reset() + } + } + tr.commit(c.last == len(entries)-1) + + batch.Replay(combined) + batch.Write() + batch.Reset() + + r := newBatchReplay() + combined.Replay(r) + + // Ensure all the internal nodes are produced + set := r.modifies() + for path, hash := range inner { + if _, ok := set[path]; !ok { + t.Fatalf("Missing nodes %v", []byte(path)) + } + if hash != set[path] { + t.Fatalf("Inconsistent node, want %x, got: %x", hash, set[path]) + } + } + if r.updates() != len(inner) { + t.Fatalf("Unexpected node write detected, want: %d, got: %d", len(inner), r.updates()) + } + } +} + +// TestBoundSplit ensures two consecutive trie chunks are not overlapped with +// each other. +func TestBoundSplit(t *testing.T) { + var entries []*kv + for i := 0; i < 1024; i++ { + var val []byte + if rand.Intn(3) == 0 { + val = testrand.Bytes(3) + } else { + val = testrand.Bytes(32) + } + entries = append(entries, &kv{ + k: testrand.Bytes(32), + v: val, + }) + } + slices.SortFunc(entries, (*kv).cmp) + + for j := 0; j < 100; j++ { + var ( + next int + last int + db = rawdb.NewMemoryDatabase() + + lastRightRoot []byte + ) + for { + if next == len(entries) { + break + } + last = rand.Intn(len(entries)-next) + next + + r := buildPartial(common.Hash{}, db, db.NewBatch(), entries, next, last) + set := r.modifies() + + // Skip if the chunk is zero-size + if r.updates() == 0 { + next = last + 1 + continue + } + + // Ensure the updates in two consecutive chunks are not overlapped. + // The only overlapping part should be deletion. + if lastRightRoot != nil && len(set) > 0 { + // Derive the path of left-most node in this chunk + var leftRoot []byte + for path, hash := range r.modifies() { + if hash == (common.Hash{}) { + t.Fatalf("Unexpected deletion %v", []byte(path)) + } + if leftRoot == nil || bytes.Compare(leftRoot, []byte(path)) > 0 { + leftRoot = []byte(path) + } + } + if bytes.HasPrefix(lastRightRoot, leftRoot) || bytes.HasPrefix(leftRoot, lastRightRoot) { + t.Fatalf("Two chunks are not correctly separated, lastRight: %v, left: %v", lastRightRoot, leftRoot) + } + } + + // Track the updates as the last chunk + var rightRoot []byte + for path := range set { + if rightRoot == nil || + (bytes.Compare(rightRoot, []byte(path)) < 0) || + (bytes.Compare(rightRoot, []byte(path)) > 0 && bytes.HasPrefix(rightRoot, []byte(path))) { + rightRoot = []byte(path) + } + } + lastRightRoot = rightRoot + next = last + 1 + } + } +} + +// TestTinyPartialTree tests if the partial tree is too tiny(has less than two +// states), then nothing should be committed. +func TestTinyPartialTree(t *testing.T) { + var entries []*kv + for i := 0; i < 1024; i++ { + var val []byte + if rand.Intn(3) == 0 { + val = testrand.Bytes(3) + } else { + val = testrand.Bytes(32) + } + entries = append(entries, &kv{ + k: testrand.Bytes(32), + v: val, + }) + } + slices.SortFunc(entries, (*kv).cmp) + + for i := 0; i < len(entries); i++ { + next := i + last := i + 1 + if last >= len(entries) { + last = len(entries) - 1 + } + db := rawdb.NewMemoryDatabase() + r := buildPartial(common.Hash{}, db, db.NewBatch(), entries, next, last) + + if next != 0 && last != len(entries)-1 { + if r.updates() != 0 { + t.Fatalf("Unexpected data writes, got: %d", r.updates()) + } + } + } +} diff --git a/eth/protocols/snap/metrics.go b/eth/protocols/snap/metrics.go index ffaf5f3f9d..25dbcc6386 100644 --- a/eth/protocols/snap/metrics.go +++ b/eth/protocols/snap/metrics.go @@ -27,21 +27,28 @@ var ( IngressRegistrationErrorMeter = metrics.NewRegisteredMeter(ingressRegistrationErrorName, nil) EgressRegistrationErrorMeter = metrics.NewRegisteredMeter(egressRegistrationErrorName, nil) - // deletionGauge is the metric to track how many trie node deletions - // are performed in total during the sync process. - deletionGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete", nil) + // accountInnerDeleteGauge is the metric to track how many dangling trie nodes + // covered by extension node in account trie are deleted during the sync. + accountInnerDeleteGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete/account/inner", nil) - // lookupGauge is the metric to track how many trie node lookups are - // performed to determine if node needs to be deleted. - lookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/lookup", nil) + // storageInnerDeleteGauge is the metric to track how many dangling trie nodes + // covered by extension node in storage trie are deleted during the sync. + storageInnerDeleteGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete/storage/inner", nil) + + // accountOuterDeleteGauge is the metric to track how many dangling trie nodes + // above the committed nodes in account trie are deleted during the sync. + accountOuterDeleteGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete/account/outer", nil) - // boundaryAccountNodesGauge is the metric to track how many boundary trie - // nodes in account trie are met. - boundaryAccountNodesGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/boundary/account", nil) + // storageOuterDeleteGauge is the metric to track how many dangling trie nodes + // above the committed nodes in storage trie are deleted during the sync. + storageOuterDeleteGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete/storage/outer", nil) - // boundaryAccountNodesGauge is the metric to track how many boundary trie - // nodes in storage tries are met. - boundaryStorageNodesGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/boundary/storage", nil) + // lookupGauge is the metric to track how many trie node lookups are + // performed to determine if node needs to be deleted. + accountInnerLookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/account/lookup/inner", nil) + accountOuterLookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/account/lookup/outer", nil) + storageInnerLookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/storage/lookup/inner", nil) + storageOuterLookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/storage/lookup/outer", nil) // smallStorageGauge is the metric to track how many storages are small enough // to retrieved in one or two request. diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index 3a4fecb9fb..208d3ba3bc 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -94,6 +94,9 @@ const ( // trienodeHealThrottleDecrease is the divisor for the throttle when the // rate of arriving data is lower than the rate of processing it. trienodeHealThrottleDecrease = 1.25 + + // batchSizeThreshold is the maximum size allowed for gentrie batch. + batchSizeThreshold = 8 * 1024 * 1024 ) var ( @@ -321,8 +324,8 @@ type accountTask struct { stateTasks map[common.Hash]common.Hash // Account hashes->roots that need full state retrieval stateCompleted map[common.Hash]struct{} // Account hashes whose storage have been completed - genBatch ethdb.Batch // Batch used by the node generator - genTrie *trie.StackTrie // Node generator from storage slots + genBatch ethdb.Batch // Batch used by the node generator + genTrie genTrie // Node generator from storage slots done bool // Flag whether the task can be removed } @@ -360,8 +363,8 @@ type storageTask struct { root common.Hash // Storage root hash for this instance req *storageRequest // Pending request to fill this task - genBatch ethdb.Batch // Batch used by the node generator - genTrie *trie.StackTrie // Node generator from storage slots + genBatch ethdb.Batch // Batch used by the node generator + genTrie genTrie // Node generator from storage slots done bool // Flag whether the task can be removed } @@ -749,19 +752,6 @@ func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error { } } -// cleanPath is used to remove the dangling nodes in the stackTrie. -func (s *Syncer) cleanPath(batch ethdb.Batch, owner common.Hash, path []byte) { - if owner == (common.Hash{}) && rawdb.ExistsAccountTrieNode(s.db, path) { - rawdb.DeleteAccountTrieNode(batch, path) - deletionGauge.Inc(1) - } - if owner != (common.Hash{}) && rawdb.ExistsStorageTrieNode(s.db, owner, path) { - rawdb.DeleteStorageTrieNode(batch, owner, path) - deletionGauge.Inc(1) - } - lookupGauge.Inc(1) -} - // loadSyncStatus retrieves a previously aborted sync status from the database, // or generates a fresh one if none is available. func (s *Syncer) loadSyncStatus() { @@ -792,23 +782,12 @@ func (s *Syncer) loadSyncStatus() { s.accountBytes += common.StorageSize(len(key) + len(value)) }, } - options := trie.NewStackTrieOptions() - options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { - rawdb.WriteTrieNode(task.genBatch, common.Hash{}, path, hash, blob, s.scheme) - }) + if s.scheme == rawdb.HashScheme { + task.genTrie = newHashTrie(task.genBatch) + } if s.scheme == rawdb.PathScheme { - // Configure the dangling node cleaner and also filter out boundary nodes - // only in the context of the path scheme. Deletion is forbidden in the - // hash scheme, as it can disrupt state completeness. - options = options.WithCleaner(func(path []byte) { - s.cleanPath(task.genBatch, common.Hash{}, path) - }) - // Skip the left boundary if it's not the first range. - // Skip the right boundary if it's not the last range. - options = options.WithSkipBoundary(task.Next != (common.Hash{}), task.Last != common.MaxHash, boundaryAccountNodesGauge) + task.genTrie = newPathTrie(common.Hash{}, task.Next != common.Hash{}, s.db, task.genBatch) } - task.genTrie = trie.NewStackTrie(options) - // Restore leftover storage tasks for accountHash, subtasks := range task.SubTasks { for _, subtask := range subtasks { @@ -820,23 +799,12 @@ func (s *Syncer) loadSyncStatus() { s.storageBytes += common.StorageSize(len(key) + len(value)) }, } - owner := accountHash // local assignment for stacktrie writer closure - options := trie.NewStackTrieOptions() - options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { - rawdb.WriteTrieNode(subtask.genBatch, owner, path, hash, blob, s.scheme) - }) + if s.scheme == rawdb.HashScheme { + subtask.genTrie = newHashTrie(subtask.genBatch) + } if s.scheme == rawdb.PathScheme { - // Configure the dangling node cleaner and also filter out boundary nodes - // only in the context of the path scheme. Deletion is forbidden in the - // hash scheme, as it can disrupt state completeness. - options = options.WithCleaner(func(path []byte) { - s.cleanPath(subtask.genBatch, owner, path) - }) - // Skip the left boundary if it's not the first range. - // Skip the right boundary if it's not the last range. - options = options.WithSkipBoundary(subtask.Next != common.Hash{}, subtask.Last != common.MaxHash, boundaryStorageNodesGauge) + subtask.genTrie = newPathTrie(accountHash, subtask.Next != common.Hash{}, s.db, subtask.genBatch) } - subtask.genTrie = trie.NewStackTrie(options) } } } @@ -888,20 +856,12 @@ func (s *Syncer) loadSyncStatus() { s.accountBytes += common.StorageSize(len(key) + len(value)) }, } - options := trie.NewStackTrieOptions() - options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { - rawdb.WriteTrieNode(batch, common.Hash{}, path, hash, blob, s.scheme) - }) + var tr genTrie + if s.scheme == rawdb.HashScheme { + tr = newHashTrie(batch) + } if s.scheme == rawdb.PathScheme { - // Configure the dangling node cleaner and also filter out boundary nodes - // only in the context of the path scheme. Deletion is forbidden in the - // hash scheme, as it can disrupt state completeness. - options = options.WithCleaner(func(path []byte) { - s.cleanPath(batch, common.Hash{}, path) - }) - // Skip the left boundary if it's not the first range. - // Skip the right boundary if it's not the last range. - options = options.WithSkipBoundary(next != common.Hash{}, last != common.MaxHash, boundaryAccountNodesGauge) + tr = newPathTrie(common.Hash{}, next != common.Hash{}, s.db, batch) } s.tasks = append(s.tasks, &accountTask{ Next: next, @@ -909,7 +869,7 @@ func (s *Syncer) loadSyncStatus() { SubTasks: make(map[common.Hash][]*storageTask), genBatch: batch, stateCompleted: make(map[common.Hash]struct{}), - genTrie: trie.NewStackTrie(options), + genTrie: tr, }) log.Debug("Created account sync task", "from", next, "last", last) next = common.BigToHash(new(big.Int).Add(last.Big(), common.Big1)) @@ -920,11 +880,18 @@ func (s *Syncer) loadSyncStatus() { func (s *Syncer) saveSyncStatus() { // Serialize any partial progress to disk before spinning down for _, task := range s.tasks { + // Claim the right boundary as incomplete before flushing the + // accumulated nodes in batch, the nodes on right boundary + // will be discarded and cleaned up by this call. + task.genTrie.commit(false) if err := task.genBatch.Write(); err != nil { log.Error("Failed to persist account slots", "err", err) } for _, subtasks := range task.SubTasks { for _, subtask := range subtasks { + // Same for account trie, discard and cleanup the + // incomplete right boundary. + subtask.genTrie.commit(false) if err := subtask.genBatch.Write(); err != nil { log.Error("Failed to persist storage slots", "err", err) } @@ -2155,25 +2122,20 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { s.storageBytes += common.StorageSize(len(key) + len(value)) }, } - owner := account // local assignment for stacktrie writer closure - options := trie.NewStackTrieOptions() - options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { - rawdb.WriteTrieNode(batch, owner, path, hash, blob, s.scheme) - }) + var tr genTrie + if s.scheme == rawdb.HashScheme { + tr = newHashTrie(batch) + } if s.scheme == rawdb.PathScheme { - options = options.WithCleaner(func(path []byte) { - s.cleanPath(batch, owner, path) - }) // Keep the left boundary as it's the first range. - // Skip the right boundary if it's not the last range. - options = options.WithSkipBoundary(false, r.End() != common.MaxHash, boundaryStorageNodesGauge) + tr = newPathTrie(account, false, s.db, batch) } tasks = append(tasks, &storageTask{ Next: common.Hash{}, Last: r.End(), root: acc.Root, genBatch: batch, - genTrie: trie.NewStackTrie(options), + genTrie: tr, }) for r.Next() { batch := ethdb.HookedBatch{ @@ -2182,27 +2144,19 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { s.storageBytes += common.StorageSize(len(key) + len(value)) }, } - options := trie.NewStackTrieOptions() - options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { - rawdb.WriteTrieNode(batch, owner, path, hash, blob, s.scheme) - }) + var tr genTrie + if s.scheme == rawdb.HashScheme { + tr = newHashTrie(batch) + } if s.scheme == rawdb.PathScheme { - // Configure the dangling node cleaner and also filter out boundary nodes - // only in the context of the path scheme. Deletion is forbidden in the - // hash scheme, as it can disrupt state completeness. - options = options.WithCleaner(func(path []byte) { - s.cleanPath(batch, owner, path) - }) - // Skip the left boundary as it's not the first range - // Skip the right boundary if it's not the last range. - options = options.WithSkipBoundary(true, r.End() != common.MaxHash, boundaryStorageNodesGauge) + tr = newPathTrie(account, true, s.db, batch) } tasks = append(tasks, &storageTask{ Next: r.Start(), Last: r.End(), root: acc.Root, genBatch: batch, - genTrie: trie.NewStackTrie(options), + genTrie: tr, }) } for _, task := range tasks { @@ -2248,26 +2202,18 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { if i < len(res.hashes)-1 || res.subTask == nil { // no need to make local reassignment of account: this closure does not outlive the loop - options := trie.NewStackTrieOptions() - options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { - rawdb.WriteTrieNode(batch, account, path, hash, blob, s.scheme) - }) + var tr genTrie + if s.scheme == rawdb.HashScheme { + tr = newHashTrie(batch) + } if s.scheme == rawdb.PathScheme { - // Configure the dangling node cleaner only in the context of the - // path scheme. Deletion is forbidden in the hash scheme, as it can - // disrupt state completeness. - // - // Notably, boundary nodes can be also kept because the whole storage - // trie is complete. - options = options.WithCleaner(func(path []byte) { - s.cleanPath(batch, account, path) - }) + // Keep the left boundary as it's complete + tr = newPathTrie(account, false, s.db, batch) } - tr := trie.NewStackTrie(options) for j := 0; j < len(res.hashes[i]); j++ { - tr.Update(res.hashes[i][j][:], res.slots[i][j]) + tr.update(res.hashes[i][j][:], res.slots[i][j]) } - tr.Commit() + tr.commit(true) } // Persist the received storage segments. These flat state maybe // outdated during the sync, but it can be fixed later during the @@ -2278,14 +2224,14 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { // If we're storing large contracts, generate the trie nodes // on the fly to not trash the gluing points if i == len(res.hashes)-1 && res.subTask != nil { - res.subTask.genTrie.Update(res.hashes[i][j][:], res.slots[i][j]) + res.subTask.genTrie.update(res.hashes[i][j][:], res.slots[i][j]) } } } // Large contracts could have generated new trie nodes, flush them to disk if res.subTask != nil { if res.subTask.done { - root := res.subTask.genTrie.Commit() + root := res.subTask.genTrie.commit(res.subTask.Last == common.MaxHash) if err := res.subTask.genBatch.Write(); err != nil { log.Error("Failed to persist stack slots", "err", err) } @@ -2302,8 +2248,8 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { } } } - } - if res.subTask.genBatch.ValueSize() > ethdb.IdealBatchSize { + } else if res.subTask.genBatch.ValueSize() > batchSizeThreshold { + res.subTask.genTrie.commit(false) if err := res.subTask.genBatch.Write(); err != nil { log.Error("Failed to persist stack slots", "err", err) } @@ -2486,7 +2432,7 @@ func (s *Syncer) forwardAccountTask(task *accountTask) { if err != nil { panic(err) // Really shouldn't ever happen } - task.genTrie.Update(hash[:], full) + task.genTrie.update(hash[:], full) } } // Flush anything written just now and update the stats @@ -2519,9 +2465,13 @@ func (s *Syncer) forwardAccountTask(task *accountTask) { // flush after finalizing task.done. It's fine even if we crash and lose this // write as it will only cause more data to be downloaded during heal. if task.done { - task.genTrie.Commit() - } - if task.genBatch.ValueSize() > ethdb.IdealBatchSize || task.done { + task.genTrie.commit(task.Last == common.MaxHash) + if err := task.genBatch.Write(); err != nil { + log.Error("Failed to persist stack account", "err", err) + } + task.genBatch.Reset() + } else if task.genBatch.ValueSize() > batchSizeThreshold { + task.genTrie.commit(false) if err := task.genBatch.Write(); err != nil { log.Error("Failed to persist stack account", "err", err) } diff --git a/internal/testrand/rand.go b/internal/testrand/rand.go new file mode 100644 index 0000000000..690993de05 --- /dev/null +++ b/internal/testrand/rand.go @@ -0,0 +1,53 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package testrand + +import ( + crand "crypto/rand" + "encoding/binary" + mrand "math/rand" + + "github.com/ethereum/go-ethereum/common" +) + +// prng is a pseudo random number generator seeded by strong randomness. +// The randomness is printed on startup in order to make failures reproducible. +var prng = initRand() + +func initRand() *mrand.Rand { + var seed [8]byte + crand.Read(seed[:]) + rnd := mrand.New(mrand.NewSource(int64(binary.LittleEndian.Uint64(seed[:])))) + return rnd +} + +// Bytes generates a random byte slice with specified length. +func Bytes(n int) []byte { + r := make([]byte, n) + prng.Read(r) + return r +} + +// Hash generates a random hash. +func Hash() common.Hash { + return common.BytesToHash(Bytes(common.HashLength)) +} + +// Address generates a random address. +func Address() common.Address { + return common.BytesToAddress(Bytes(common.AddressLength)) +} diff --git a/trie/stacktrie.go b/trie/stacktrie.go index f2f5355c49..9c574db0bf 100644 --- a/trie/stacktrie.go +++ b/trie/stacktrie.go @@ -23,8 +23,6 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" - "github.com/ethereum/go-ethereum/log" - "github.com/ethereum/go-ethereum/metrics" ) var ( @@ -32,62 +30,32 @@ var ( _ = types.TrieHasher((*StackTrie)(nil)) ) -// StackTrieOptions contains the configured options for manipulating the stackTrie. -type StackTrieOptions struct { - Writer func(path []byte, hash common.Hash, blob []byte) // The function to commit the dirty nodes - Cleaner func(path []byte) // The function to clean up dangling nodes - - SkipLeftBoundary bool // Flag whether the nodes on the left boundary are skipped for committing - SkipRightBoundary bool // Flag whether the nodes on the right boundary are skipped for committing - boundaryGauge metrics.Gauge // Gauge to track how many boundary nodes are met -} - -// NewStackTrieOptions initializes an empty options for stackTrie. -func NewStackTrieOptions() *StackTrieOptions { return &StackTrieOptions{} } - -// WithWriter configures trie node writer within the options. -func (o *StackTrieOptions) WithWriter(writer func(path []byte, hash common.Hash, blob []byte)) *StackTrieOptions { - o.Writer = writer - return o -} - -// WithCleaner configures the cleaner in the option for removing dangling nodes. -func (o *StackTrieOptions) WithCleaner(cleaner func(path []byte)) *StackTrieOptions { - o.Cleaner = cleaner - return o -} - -// WithSkipBoundary configures whether the left and right boundary nodes are -// filtered for committing, along with a gauge metrics to track how many -// boundary nodes are met. -func (o *StackTrieOptions) WithSkipBoundary(skipLeft, skipRight bool, gauge metrics.Gauge) *StackTrieOptions { - o.SkipLeftBoundary = skipLeft - o.SkipRightBoundary = skipRight - o.boundaryGauge = gauge - return o -} +// OnTrieNode is a callback method invoked when a trie node is committed +// by the stack trie. The node is only committed if it's considered complete. +// +// The caller should not modify the contents of the returned path and blob +// slice, and their contents may be changed after the call. It is up to the +// `onTrieNode` receiver function to deep-copy the data if it wants to retain +// it after the call ends. +type OnTrieNode func(path []byte, hash common.Hash, blob []byte) // StackTrie is a trie implementation that expects keys to be inserted // in order. Once it determines that a subtree will no longer be inserted // into, it will hash it and free up the memory it uses. type StackTrie struct { - options *StackTrieOptions - root *stNode - h *hasher - - first []byte // The (hex-encoded without terminator) key of first inserted entry, tracked as left boundary. - last []byte // The (hex-encoded without terminator) key of last inserted entry, tracked as right boundary. + root *stNode + h *hasher + last []byte + onTrieNode OnTrieNode } -// NewStackTrie allocates and initializes an empty trie. -func NewStackTrie(options *StackTrieOptions) *StackTrie { - if options == nil { - options = NewStackTrieOptions() - } +// NewStackTrie allocates and initializes an empty trie. The committed nodes +// will be discarded immediately if no callback is configured. +func NewStackTrie(onTrieNode OnTrieNode) *StackTrie { return &StackTrie{ - options: options, - root: stPool.Get().(*stNode), - h: newHasher(false), + root: stPool.Get().(*stNode), + h: newHasher(false), + onTrieNode: onTrieNode, } } @@ -101,10 +69,6 @@ func (t *StackTrie) Update(key, value []byte) error { if bytes.Compare(t.last, k) >= 0 { return errors.New("non-ascending key order") } - // track the first and last inserted entries. - if t.first == nil { - t.first = append([]byte{}, k...) - } if t.last == nil { t.last = append([]byte{}, k...) // allocate key slice } else { @@ -114,19 +78,9 @@ func (t *StackTrie) Update(key, value []byte) error { return nil } -// MustUpdate is a wrapper of Update and will omit any encountered error but -// just print out an error message. -func (t *StackTrie) MustUpdate(key, value []byte) { - if err := t.Update(key, value); err != nil { - log.Error("Unhandled trie error in StackTrie.Update", "err", err) - } -} - // Reset resets the stack trie object to empty state. func (t *StackTrie) Reset() { - t.options = NewStackTrieOptions() t.root = stPool.Get().(*stNode) - t.first = nil t.last = nil } @@ -346,10 +300,7 @@ func (t *StackTrie) insert(st *stNode, key, value []byte, path []byte) { // // This method also sets 'st.type' to hashedNode, and clears 'st.key'. func (t *StackTrie) hash(st *stNode, path []byte) { - var ( - blob []byte // RLP-encoded node blob - internal [][]byte // List of node paths covered by the extension node - ) + var blob []byte // RLP-encoded node blob switch st.typ { case hashedNode: return @@ -384,15 +335,6 @@ func (t *StackTrie) hash(st *stNode, path []byte) { // recursively hash and commit child as the first step t.hash(st.children[0], append(path, st.key...)) - // Collect the path of internal nodes between shortNode and its **in disk** - // child. This is essential in the case of path mode scheme to avoid leaving - // danging nodes within the range of this internal path on disk, which would - // break the guarantee for state healing. - if len(st.children[0].val) >= 32 && t.options.Cleaner != nil { - for i := 1; i < len(st.key); i++ { - internal = append(internal, append(path, st.key[:i]...)) - } - } // encode the extension node n := shortNode{Key: hexToCompactInPlace(st.key)} if len(st.children[0].val) < 32 { @@ -416,11 +358,12 @@ func (t *StackTrie) hash(st *stNode, path []byte) { default: panic("invalid node type") } - + // Convert the node type to hashNode and reset the key slice. st.typ = hashedNode st.key = st.key[:0] - // Skip committing the non-root node if the size is smaller than 32 bytes. + // Skip committing the non-root node if the size is smaller than 32 bytes + // as tiny nodes are always embedded in their parent except root node. if len(blob) < 32 && len(path) > 0 { st.val = common.CopyBytes(blob) return @@ -429,51 +372,20 @@ func (t *StackTrie) hash(st *stNode, path []byte) { // input values. st.val = t.h.hashData(blob) - // Short circuit if the stack trie is not configured for writing. - if t.options.Writer == nil { - return + // Invoke the callback it's provided. Notably, the path and blob slices are + // volatile, please deep-copy the slices in callback if the contents need + // to be retained. + if t.onTrieNode != nil { + t.onTrieNode(path, common.BytesToHash(st.val), blob) } - // Skip committing if the node is on the left boundary and stackTrie is - // configured to filter the boundary. - if t.options.SkipLeftBoundary && bytes.HasPrefix(t.first, path) { - if t.options.boundaryGauge != nil { - t.options.boundaryGauge.Inc(1) - } - return - } - // Skip committing if the node is on the right boundary and stackTrie is - // configured to filter the boundary. - if t.options.SkipRightBoundary && bytes.HasPrefix(t.last, path) { - if t.options.boundaryGauge != nil { - t.options.boundaryGauge.Inc(1) - } - return - } - // Clean up the internal dangling nodes covered by the extension node. - // This should be done before writing the node to adhere to the committing - // order from bottom to top. - for _, path := range internal { - t.options.Cleaner(path) - } - t.options.Writer(path, common.BytesToHash(st.val), blob) } // Hash will firstly hash the entire trie if it's still not hashed and then commit -// all nodes to the associated database. Actually most of the trie nodes have been -// committed already. The main purpose here is to commit the nodes on right boundary. -// -// For stack trie, Hash and Commit are functionally identical. +// all leftover nodes to the associated database. Actually most of the trie nodes +// have been committed already. The main purpose here is to commit the nodes on +// right boundary. func (t *StackTrie) Hash() common.Hash { n := t.root t.hash(n, nil) return common.BytesToHash(n.val) } - -// Commit will firstly hash the entire trie if it's still not hashed and then commit -// all nodes to the associated database. Actually most of the trie nodes have been -// committed already. The main purpose here is to commit the nodes on right boundary. -// -// For stack trie, Hash and Commit are functionally identical. -func (t *StackTrie) Commit() common.Hash { - return t.Hash() -} diff --git a/trie/stacktrie_fuzzer_test.go b/trie/stacktrie_fuzzer_test.go index 50b5c4de52..c8e568355c 100644 --- a/trie/stacktrie_fuzzer_test.go +++ b/trie/stacktrie_fuzzer_test.go @@ -46,11 +46,9 @@ func fuzz(data []byte, debugging bool) { trieA = NewEmpty(dbA) spongeB = &spongeDb{sponge: sha3.NewLegacyKeccak256()} dbB = newTestDatabase(rawdb.NewDatabase(spongeB), rawdb.HashScheme) - - options = NewStackTrieOptions().WithWriter(func(path []byte, hash common.Hash, blob []byte) { + trieB = NewStackTrie(func(path []byte, hash common.Hash, blob []byte) { rawdb.WriteTrieNode(spongeB, common.Hash{}, path, hash, blob, dbB.Scheme()) }) - trieB = NewStackTrie(options) vals []*kv maxElements = 10000 // operate on unique keys only @@ -99,10 +97,9 @@ func fuzz(data []byte, debugging bool) { if debugging { fmt.Printf("{\"%#x\" , \"%#x\"} // stacktrie.Update\n", kv.k, kv.v) } - trieB.MustUpdate(kv.k, kv.v) + trieB.Update(kv.k, kv.v) } rootB := trieB.Hash() - trieB.Commit() if rootA != rootB { panic(fmt.Sprintf("roots differ: (trie) %x != %x (stacktrie)", rootA, rootB)) } @@ -114,20 +111,19 @@ func fuzz(data []byte, debugging bool) { // Ensure all the nodes are persisted correctly var ( - nodeset = make(map[string][]byte) // path -> blob - optionsC = NewStackTrieOptions().WithWriter(func(path []byte, hash common.Hash, blob []byte) { + nodeset = make(map[string][]byte) // path -> blob + trieC = NewStackTrie(func(path []byte, hash common.Hash, blob []byte) { if crypto.Keccak256Hash(blob) != hash { panic("invalid node blob") } nodeset[string(path)] = common.CopyBytes(blob) }) - trieC = NewStackTrie(optionsC) checked int ) for _, kv := range vals { - trieC.MustUpdate(kv.k, kv.v) + trieC.Update(kv.k, kv.v) } - rootC := trieC.Commit() + rootC := trieC.Hash() if rootA != rootC { panic(fmt.Sprintf("roots differ: (trie) %x != %x (stacktrie)", rootA, rootC)) } diff --git a/trie/stacktrie_test.go b/trie/stacktrie_test.go index 3a0e1cb260..f053b5112d 100644 --- a/trie/stacktrie_test.go +++ b/trie/stacktrie_test.go @@ -19,15 +19,12 @@ package trie import ( "bytes" "math/big" - "math/rand" "testing" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/crypto" - "github.com/ethereum/go-ethereum/trie/testutil" "github.com/stretchr/testify/assert" - "golang.org/x/exp/slices" ) func TestStackTrieInsertAndHash(t *testing.T) { @@ -381,90 +378,6 @@ func TestStacktrieNotModifyValues(t *testing.T) { } } -func buildPartialTree(entries []*kv, t *testing.T) map[string]common.Hash { - var ( - options = NewStackTrieOptions() - nodes = make(map[string]common.Hash) - ) - var ( - first int - last = len(entries) - 1 - - noLeft bool - noRight bool - ) - // Enter split mode if there are at least two elements - if rand.Intn(5) != 0 { - for { - first = rand.Intn(len(entries)) - last = rand.Intn(len(entries)) - if first <= last { - break - } - } - if first != 0 { - noLeft = true - } - if last != len(entries)-1 { - noRight = true - } - } - options = options.WithSkipBoundary(noLeft, noRight, nil) - options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { - nodes[string(path)] = hash - }) - tr := NewStackTrie(options) - - for i := first; i <= last; i++ { - tr.MustUpdate(entries[i].k, entries[i].v) - } - tr.Commit() - return nodes -} - -func TestPartialStackTrie(t *testing.T) { - for round := 0; round < 100; round++ { - var ( - n = rand.Intn(100) + 1 - entries []*kv - ) - for i := 0; i < n; i++ { - var val []byte - if rand.Intn(3) == 0 { - val = testutil.RandBytes(3) - } else { - val = testutil.RandBytes(32) - } - entries = append(entries, &kv{ - k: testutil.RandBytes(32), - v: val, - }) - } - slices.SortFunc(entries, (*kv).cmp) - - var ( - nodes = make(map[string]common.Hash) - options = NewStackTrieOptions().WithWriter(func(path []byte, hash common.Hash, blob []byte) { - nodes[string(path)] = hash - }) - ) - tr := NewStackTrie(options) - - for i := 0; i < len(entries); i++ { - tr.MustUpdate(entries[i].k, entries[i].v) - } - tr.Commit() - - for j := 0; j < 100; j++ { - for path, hash := range buildPartialTree(entries, t) { - if nodes[path] != hash { - t.Errorf("%v, want %x, got %x", []byte(path), nodes[path], hash) - } - } - } - } -} - func TestStackTrieErrors(t *testing.T) { s := NewStackTrie(nil) // Deletion diff --git a/trie/trie_test.go b/trie/trie_test.go index 379a866f7e..c141c52078 100644 --- a/trie/trie_test.go +++ b/trie/trie_test.go @@ -963,11 +963,9 @@ func TestCommitSequenceStackTrie(t *testing.T) { id: "b", values: make(map[string]string), } - options := NewStackTrieOptions() - options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { + stTrie := NewStackTrie(func(path []byte, hash common.Hash, blob []byte) { rawdb.WriteTrieNode(stackTrieSponge, common.Hash{}, path, hash, blob, db.Scheme()) }) - stTrie := NewStackTrie(options) // Fill the trie with elements for i := 0; i < count; i++ { @@ -993,7 +991,7 @@ func TestCommitSequenceStackTrie(t *testing.T) { s.Flush() // And flush stacktrie -> disk - stRoot := stTrie.Commit() + stRoot := stTrie.Hash() if stRoot != root { t.Fatalf("root wrong, got %x exp %x", stRoot, root) } @@ -1034,12 +1032,9 @@ func TestCommitSequenceSmallRoot(t *testing.T) { id: "b", values: make(map[string]string), } - options := NewStackTrieOptions() - options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) { + stTrie := NewStackTrie(func(path []byte, hash common.Hash, blob []byte) { rawdb.WriteTrieNode(stackTrieSponge, common.Hash{}, path, hash, blob, db.Scheme()) }) - stTrie := NewStackTrie(options) - // Add a single small-element to the trie(s) key := make([]byte, 5) key[0] = 1 @@ -1053,7 +1048,7 @@ func TestCommitSequenceSmallRoot(t *testing.T) { db.Commit(root) // And flush stacktrie -> disk - stRoot := stTrie.Commit() + stRoot := stTrie.Hash() if stRoot != root { t.Fatalf("root wrong, got %x exp %x", stRoot, root) }