core, eth/protocols/snap, trie: fix cause for snap-sync corruption, implement gentrie (#29313)

This pull request defines a gentrie for snap sync purpose. The stackTrie is used to generate the merkle tree nodes upon receiving a state batch. Several additional options have been added into stackTrie to handle incomplete states (either missing states before or after). In this pull request, these options have been relocated from stackTrie to genTrie, which serves as a wrapper for stackTrie specifically for snap sync purposes. Further, the logic for managing incomplete state has been enhanced in this change. Originally, there are two cases handled: - boundary node filtering - internal (covered by extension node) node clearing This changes adds one more: - Clearing leftover nodes on the boundaries. This feature is necessary if there are leftover trie nodes in database, otherwise node inconsistency may break the state healing.
7 months ago · 35e0525bf4
parent 7bcb5532a5
commit 35e0525bf4
11 changed files with 1018 additions and 354 deletions
--- a/core/state/snapshot/conversion.go
+++ b/core/state/snapshot/conversion.go
@ -362,15 +362,15 @@ func generateTrieRoot(db ethdb.KeyValueWriter, scheme string, it Iterator, accou
 }

 func stackTrieGenerate(db ethdb.KeyValueWriter, scheme string, owner common.Hash, in chan trieKV, out chan common.Hash) {
-	options := trie.NewStackTrieOptions()
+	var onTrieNode trie.OnTrieNode
 	if db != nil {
-		options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
+		onTrieNode = func(path []byte, hash common.Hash, blob []byte) {
 			rawdb.WriteTrieNode(db, owner, path, hash, blob, scheme)
-		})
+		}
 	}
-	t := trie.NewStackTrie(options)
+	t := trie.NewStackTrie(onTrieNode)
 	for leaf := range in {
 		t.Update(leaf.key[:], leaf.value)
 	}
-	out <- t.Commit()
+	out <- t.Hash()
 }
--- a/core/state/statedb.go
+++ b/core/state/statedb.go
@ -961,12 +961,10 @@ func (s *StateDB) fastDeleteStorage(addrHash common.Hash, root common.Hash) (boo
 		nodes = trienode.NewNodeSet(addrHash)
 		slots = make(map[common.Hash][]byte)
 	)
-	options := trie.NewStackTrieOptions()
-	options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
+	stack := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
 		nodes.AddNode(path, trienode.NewDeleted())
 		size += common.StorageSize(len(path))
 	})
-	stack := trie.NewStackTrie(options)
 	for iter.Next() {
 		if size > storageDeleteLimit {
 			return true, size, nil, nil, nil
--- a/eth/protocols/snap/gentrie.go
+++ b/eth/protocols/snap/gentrie.go
@ -0,0 +1,287 @@
+// Copyright 2024 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snap
+
+import (
+	"bytes"
+
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/core/rawdb"
+	"github.com/ethereum/go-ethereum/ethdb"
+	"github.com/ethereum/go-ethereum/trie"
+)
+
+// genTrie interface is used by the snap syncer to generate merkle tree nodes
+// based on a received batch of states.
+type genTrie interface {
+	// update inserts the state item into generator trie.
+	update(key, value []byte) error
+
+	// commit flushes the right boundary nodes if complete flag is true. This
+	// function must be called before flushing the associated database batch.
+	commit(complete bool) common.Hash
+}
+
+// pathTrie is a wrapper over the stackTrie, incorporating numerous additional
+// logics to handle the semi-completed trie and potential leftover dangling
+// nodes in the database. It is utilized for constructing the merkle tree nodes
+// in path mode during the snap sync process.
+type pathTrie struct {
+	owner common.Hash     // identifier of trie owner, empty for account trie
+	tr    *trie.StackTrie // underlying raw stack trie
+	first []byte          // the path of first committed node by stackTrie
+	last  []byte          // the path of last committed node by stackTrie
+
+	// This flag indicates whether nodes on the left boundary are skipped for
+	// committing. If set, the left boundary nodes are considered incomplete
+	// due to potentially missing left children.
+	skipLeftBoundary bool
+	db               ethdb.KeyValueReader
+	batch            ethdb.Batch
+}
+
+// newPathTrie initializes the path trie.
+func newPathTrie(owner common.Hash, skipLeftBoundary bool, db ethdb.KeyValueReader, batch ethdb.Batch) *pathTrie {
+	tr := &pathTrie{
+		owner:            owner,
+		skipLeftBoundary: skipLeftBoundary,
+		db:               db,
+		batch:            batch,
+	}
+	tr.tr = trie.NewStackTrie(tr.onTrieNode)
+	return tr
+}
+
+// onTrieNode is invoked whenever a new node is committed by the stackTrie.
+//
+// As the committed nodes might be incomplete if they are on the boundaries
+// (left or right), this function has the ability to detect the incomplete
+// ones and filter them out for committing.
+//
+// Additionally, the assumption is made that there may exist leftover dangling
+// nodes in the database. This function has the ability to detect the dangling
+// nodes that fall within the path space of committed nodes (specifically on
+// the path covered by internal extension nodes) and remove them from the
+// database. This property ensures that the entire path space is uniquely
+// occupied by committed nodes.
+//
+// Furthermore, all leftover dangling nodes along the path from committed nodes
+// to the trie root (left and right boundaries) should be removed as well;
+// otherwise, they might potentially disrupt the state healing process.
+func (t *pathTrie) onTrieNode(path []byte, hash common.Hash, blob []byte) {
+	// Filter out the nodes on the left boundary if skipLeftBoundary is
+	// configured. Nodes are considered to be on the left boundary if
+	// it's the first one to be committed, or the parent/ancestor of the
+	// first committed node.
+	if t.skipLeftBoundary && (t.first == nil || bytes.HasPrefix(t.first, path)) {
+		if t.first == nil {
+			// Memorize the path of first committed node, which is regarded
+			// as left boundary. Deep-copy is necessary as the path given
+			// is volatile.
+			t.first = append([]byte{}, path...)
+
+			// The left boundary can be uniquely determined by the first committed node
+			// from stackTrie (e.g., N_1), as the shared path prefix between the first
+			// two inserted state items is deterministic (the path of N_3). The path
+			// from trie root towards the first committed node is considered the left
+			// boundary. The potential leftover dangling nodes on left boundary should
+			// be cleaned out.
+			//
+			//                            +-----+
+			//                            | N_3 | shared path prefix of state_1 and state_2
+			//                            +-----+
+			//                            /-   -\
+			//                       +-----+   +-----+
+			// First committed node  | N_1 |   | N_2 | latest inserted node (contain state_2)
+			//                       +-----+   +-----+
+			//
+			// The node with the path of the first committed one (e.g, N_1) is not
+			// removed because it's a sibling of the nodes we want to commit, not
+			// the parent or ancestor.
+			for i := 0; i < len(path); i++ {
+				t.delete(path[:i], false)
+			}
+		}
+		return
+	}
+	// If boundary filtering is not configured, or the node is not on the left
+	// boundary, commit it to database.
+	//
+	// Note: If the current committed node is an extension node, then the nodes
+	// falling within the path between itself and its standalone (not embedded
+	// in parent) child should be cleaned out for exclusively occupy the inner
+	// path.
+	//
+	// This is essential in snap sync to avoid leaving dangling nodes within
+	// this range covered by extension node which could potentially break the
+	// state healing.
+	//
+	// The extension node is detected if its path is the prefix of last committed
+	// one and path gap is larger than one. If the path gap is only one byte,
+	// the current node could either be a full node, or a extension with single
+	// byte key. In either case, no gaps will be left in the path.
+	if t.last != nil && bytes.HasPrefix(t.last, path) && len(t.last)-len(path) > 1 {
+		for i := len(path) + 1; i < len(t.last); i++ {
+			t.delete(t.last[:i], true)
+		}
+	}
+	t.write(path, blob)
+
+	// Update the last flag. Deep-copy is necessary as the provided path is volatile.
+	if t.last == nil {
+		t.last = append([]byte{}, path...)
+	} else {
+		t.last = append(t.last[:0], path...)
+	}
+}
+
+// write commits the node write to provided database batch in path mode.
+func (t *pathTrie) write(path []byte, blob []byte) {
+	if t.owner == (common.Hash{}) {
+		rawdb.WriteAccountTrieNode(t.batch, path, blob)
+	} else {
+		rawdb.WriteStorageTrieNode(t.batch, t.owner, path, blob)
+	}
+}
+
+func (t *pathTrie) deleteAccountNode(path []byte, inner bool) {
+	if inner {
+		accountInnerLookupGauge.Inc(1)
+	} else {
+		accountOuterLookupGauge.Inc(1)
+	}
+	if !rawdb.ExistsAccountTrieNode(t.db, path) {
+		return
+	}
+	if inner {
+		accountInnerDeleteGauge.Inc(1)
+	} else {
+		accountOuterDeleteGauge.Inc(1)
+	}
+	rawdb.DeleteAccountTrieNode(t.batch, path)
+}
+
+func (t *pathTrie) deleteStorageNode(path []byte, inner bool) {
+	if inner {
+		storageInnerLookupGauge.Inc(1)
+	} else {
+		storageOuterLookupGauge.Inc(1)
+	}
+	if !rawdb.ExistsStorageTrieNode(t.db, t.owner, path) {
+		return
+	}
+	if inner {
+		storageInnerDeleteGauge.Inc(1)
+	} else {
+		storageOuterDeleteGauge.Inc(1)
+	}
+	rawdb.DeleteStorageTrieNode(t.batch, t.owner, path)
+}
+
+// delete commits the node deletion to provided database batch in path mode.
+func (t *pathTrie) delete(path []byte, inner bool) {
+	if t.owner == (common.Hash{}) {
+		t.deleteAccountNode(path, inner)
+	} else {
+		t.deleteStorageNode(path, inner)
+	}
+}
+
+// update implements genTrie interface, inserting a (key, value) pair into the
+// stack trie.
+func (t *pathTrie) update(key, value []byte) error {
+	return t.tr.Update(key, value)
+}
+
+// commit implements genTrie interface, flushing the right boundary if it's
+// considered as complete. Otherwise, the nodes on the right boundary are
+// discarded and cleaned up.
+//
+// Note, this function must be called before flushing database batch, otherwise,
+// dangling nodes might be left in database.
+func (t *pathTrie) commit(complete bool) common.Hash {
+	// If the right boundary is claimed as complete, flush them out.
+	// The nodes on both left and right boundary will still be filtered
+	// out if left boundary filtering is configured.
+	if complete {
+		// Commit all inserted but not yet committed nodes(on the right
+		// boundary) in the stackTrie.
+		hash := t.tr.Hash()
+		if t.skipLeftBoundary {
+			return common.Hash{} // hash is meaningless if left side is incomplete
+		}
+		return hash
+	}
+	// Discard nodes on the right boundary as it's claimed as incomplete. These
+	// nodes might be incomplete due to missing children on the right side.
+	// Furthermore, the potential leftover nodes on right boundary should also
+	// be cleaned out.
+	//
+	// The right boundary can be uniquely determined by the last committed node
+	// from stackTrie (e.g., N_1), as the shared path prefix between the last
+	// two inserted state items is deterministic (the path of N_3). The path
+	// from trie root towards the last committed node is considered the right
+	// boundary (root to N_3).
+	//
+	//                           +-----+
+	//                           | N_3 | shared path prefix of last two states
+	//                           +-----+
+	//                           /-   -\
+	//                      +-----+   +-----+
+	// Last committed node  | N_1 |   | N_2 | latest inserted node  (contain last state)
+	//                      +-----+   +-----+
+	//
+	// Another interesting scenario occurs when the trie is committed due to
+	// too many items being accumulated in the batch. To flush them out to
+	// the database, the path of the last inserted node (N_2) is temporarily
+	// treated as an incomplete right boundary, and nodes on this path are
+	// removed (e.g. from root to N_3).
+	// However, this path will be reclaimed as an internal path by inserting
+	// more items after the batch flush. New nodes on this path can be committed
+	// with no issues as they are actually complete. Also, from a database
+	// perspective, first deleting and then rewriting is a valid data update.
+	for i := 0; i < len(t.last); i++ {
+		t.delete(t.last[:i], false)
+	}
+	return common.Hash{} // the hash is meaningless for incomplete commit
+}
+
+// hashTrie is a wrapper over the stackTrie for implementing genTrie interface.
+type hashTrie struct {
+	tr *trie.StackTrie
+}
+
+// newHashTrie initializes the hash trie.
+func newHashTrie(batch ethdb.Batch) *hashTrie {
+	return &hashTrie{tr: trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
+		rawdb.WriteLegacyTrieNode(batch, hash, blob)
+	})}
+}
+
+// update implements genTrie interface, inserting a (key, value) pair into
+// the stack trie.
+func (t *hashTrie) update(key, value []byte) error {
+	return t.tr.Update(key, value)
+}
+
+// commit implements genTrie interface, committing the nodes on right boundary.
+func (t *hashTrie) commit(complete bool) common.Hash {
+	if !complete {
+		return common.Hash{} // the hash is meaningless for incomplete commit
+	}
+	return t.tr.Hash() // return hash only if it's claimed as complete
+}
--- a/eth/protocols/snap/gentrie_test.go
+++ b/eth/protocols/snap/gentrie_test.go
@ -0,0 +1,553 @@
+// Copyright 2024 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snap
+
+import (
+	"bytes"
+	"math/rand"
+	"slices"
+	"testing"
+
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/core/rawdb"
+	"github.com/ethereum/go-ethereum/crypto"
+	"github.com/ethereum/go-ethereum/ethdb"
+	"github.com/ethereum/go-ethereum/internal/testrand"
+	"github.com/ethereum/go-ethereum/trie"
+)
+
+type replayer struct {
+	paths    []string      // sort in fifo order
+	hashes   []common.Hash // empty for deletion
+	unknowns int           // counter for unknown write
+}
+
+func newBatchReplay() *replayer {
+	return &replayer{}
+}
+
+func (r *replayer) decode(key []byte, value []byte) {
+	account := rawdb.IsAccountTrieNode(key)
+	storage := rawdb.IsStorageTrieNode(key)
+	if !account && !storage {
+		r.unknowns += 1
+		return
+	}
+	var path []byte
+	if account {
+		_, path = rawdb.ResolveAccountTrieNodeKey(key)
+	} else {
+		_, owner, inner := rawdb.ResolveStorageTrieNode(key)
+		path = append(owner.Bytes(), inner...)
+	}
+	r.paths = append(r.paths, string(path))
+
+	if len(value) == 0 {
+		r.hashes = append(r.hashes, common.Hash{})
+	} else {
+		r.hashes = append(r.hashes, crypto.Keccak256Hash(value))
+	}
+}
+
+// updates returns a set of effective mutations. Multiple mutations targeting
+// the same node path will be merged in FIFO order.
+func (r *replayer) modifies() map[string]common.Hash {
+	set := make(map[string]common.Hash)
+	for i, path := range r.paths {
+		set[path] = r.hashes[i]
+	}
+	return set
+}
+
+// updates returns the number of updates.
+func (r *replayer) updates() int {
+	var count int
+	for _, hash := range r.modifies() {
+		if hash == (common.Hash{}) {
+			continue
+		}
+		count++
+	}
+	return count
+}
+
+// Put inserts the given value into the key-value data store.
+func (r *replayer) Put(key []byte, value []byte) error {
+	r.decode(key, value)
+	return nil
+}
+
+// Delete removes the key from the key-value data store.
+func (r *replayer) Delete(key []byte) error {
+	r.decode(key, nil)
+	return nil
+}
+
+func byteToHex(str []byte) []byte {
+	l := len(str) * 2
+	var nibbles = make([]byte, l)
+	for i, b := range str {
+		nibbles[i*2] = b / 16
+		nibbles[i*2+1] = b % 16
+	}
+	return nibbles
+}
+
+// innerNodes returns the internal nodes narrowed by two boundaries along with
+// the leftmost and rightmost sub-trie roots.
+func innerNodes(first, last []byte, includeLeft, includeRight bool, nodes map[string]common.Hash, t *testing.T) (map[string]common.Hash, []byte, []byte) {
+	var (
+		leftRoot  []byte
+		rightRoot []byte
+		firstHex  = byteToHex(first)
+		lastHex   = byteToHex(last)
+		inner     = make(map[string]common.Hash)
+	)
+	for path, hash := range nodes {
+		if hash == (common.Hash{}) {
+			t.Fatalf("Unexpected deletion, %v", []byte(path))
+		}
+		// Filter out the siblings on the left side or the left boundary nodes.
+		if !includeLeft && (bytes.Compare(firstHex, []byte(path)) > 0 || bytes.HasPrefix(firstHex, []byte(path))) {
+			continue
+		}
+		// Filter out the siblings on the right side or the right boundary nodes.
+		if !includeRight && (bytes.Compare(lastHex, []byte(path)) < 0 || bytes.HasPrefix(lastHex, []byte(path))) {
+			continue
+		}
+		inner[path] = hash
+
+		// Track the path of the leftmost sub trie root
+		if leftRoot == nil || bytes.Compare(leftRoot, []byte(path)) > 0 {
+			leftRoot = []byte(path)
+		}
+		// Track the path of the rightmost sub trie root
+		if rightRoot == nil ||
+			(bytes.Compare(rightRoot, []byte(path)) < 0) ||
+			(bytes.Compare(rightRoot, []byte(path)) > 0 && bytes.HasPrefix(rightRoot, []byte(path))) {
+			rightRoot = []byte(path)
+		}
+	}
+	return inner, leftRoot, rightRoot
+}
+
+func buildPartial(owner common.Hash, db ethdb.KeyValueReader, batch ethdb.Batch, entries []*kv, first, last int) *replayer {
+	tr := newPathTrie(owner, first != 0, db, batch)
+	for i := first; i <= last; i++ {
+		tr.update(entries[i].k, entries[i].v)
+	}
+	tr.commit(last == len(entries)-1)
+
+	replay := newBatchReplay()
+	batch.Replay(replay)
+
+	return replay
+}
+
+// TestPartialGentree verifies if the trie constructed with partial states can
+// generate consistent trie nodes that match those of the full trie.
+func TestPartialGentree(t *testing.T) {
+	for round := 0; round < 100; round++ {
+		var (
+			n       = rand.Intn(1024) + 10
+			entries []*kv
+		)
+		for i := 0; i < n; i++ {
+			var val []byte
+			if rand.Intn(3) == 0 {
+				val = testrand.Bytes(3)
+			} else {
+				val = testrand.Bytes(32)
+			}
+			entries = append(entries, &kv{
+				k: testrand.Bytes(32),
+				v: val,
+			})
+		}
+		slices.SortFunc(entries, (*kv).cmp)
+
+		nodes := make(map[string]common.Hash)
+		tr := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
+			nodes[string(path)] = hash
+		})
+		for i := 0; i < len(entries); i++ {
+			tr.Update(entries[i].k, entries[i].v)
+		}
+		tr.Hash()
+
+		check := func(first, last int) {
+			var (
+				db    = rawdb.NewMemoryDatabase()
+				batch = db.NewBatch()
+			)
+			// Build the partial tree with specific boundaries
+			r := buildPartial(common.Hash{}, db, batch, entries, first, last)
+			if r.unknowns > 0 {
+				t.Fatalf("Unknown database write: %d", r.unknowns)
+			}
+
+			// Ensure all the internal nodes are produced
+			var (
+				set         = r.modifies()
+				inner, _, _ = innerNodes(entries[first].k, entries[last].k, first == 0, last == len(entries)-1, nodes, t)
+			)
+			for path, hash := range inner {
+				if _, ok := set[path]; !ok {
+					t.Fatalf("Missing nodes %v", []byte(path))
+				}
+				if hash != set[path] {
+					t.Fatalf("Inconsistent node, want %x, got: %x", hash, set[path])
+				}
+			}
+			if r.updates() != len(inner) {
+				t.Fatalf("Unexpected node write detected, want: %d, got: %d", len(inner), r.updates())
+			}
+		}
+		for j := 0; j < 100; j++ {
+			var (
+				first int
+				last  int
+			)
+			for {
+				first = rand.Intn(len(entries))
+				last = rand.Intn(len(entries))
+				if first <= last {
+					break
+				}
+			}
+			check(first, last)
+		}
+		var cases = []struct {
+			first int
+			last  int
+		}{
+			{0, len(entries) - 1},                // full
+			{1, len(entries) - 1},                // no left
+			{2, len(entries) - 1},                // no left
+			{2, len(entries) - 2},                // no left and right
+			{2, len(entries) - 2},                // no left and right
+			{len(entries) / 2, len(entries) / 2}, // single
+			{0, 0},                               // single first
+			{len(entries) - 1, len(entries) - 1}, // single last
+		}
+		for _, c := range cases {
+			check(c.first, c.last)
+		}
+	}
+}
+
+// TestGentreeDanglingClearing tests if the dangling nodes falling within the
+// path space of constructed tree can be correctly removed.
+func TestGentreeDanglingClearing(t *testing.T) {
+	for round := 0; round < 100; round++ {
+		var (
+			n       = rand.Intn(1024) + 10
+			entries []*kv
+		)
+		for i := 0; i < n; i++ {
+			var val []byte
+			if rand.Intn(3) == 0 {
+				val = testrand.Bytes(3)
+			} else {
+				val = testrand.Bytes(32)
+			}
+			entries = append(entries, &kv{
+				k: testrand.Bytes(32),
+				v: val,
+			})
+		}
+		slices.SortFunc(entries, (*kv).cmp)
+
+		nodes := make(map[string]common.Hash)
+		tr := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
+			nodes[string(path)] = hash
+		})
+		for i := 0; i < len(entries); i++ {
+			tr.Update(entries[i].k, entries[i].v)
+		}
+		tr.Hash()
+
+		check := func(first, last int) {
+			var (
+				db    = rawdb.NewMemoryDatabase()
+				batch = db.NewBatch()
+			)
+			// Write the junk nodes as the dangling
+			var injects []string
+			for path := range nodes {
+				for i := 0; i < len(path); i++ {
+					_, ok := nodes[path[:i]]
+					if ok {
+						continue
+					}
+					injects = append(injects, path[:i])
+				}
+			}
+			if len(injects) == 0 {
+				return
+			}
+			for _, path := range injects {
+				rawdb.WriteAccountTrieNode(db, []byte(path), testrand.Bytes(32))
+			}
+
+			// Build the partial tree with specific range
+			replay := buildPartial(common.Hash{}, db, batch, entries, first, last)
+			if replay.unknowns > 0 {
+				t.Fatalf("Unknown database write: %d", replay.unknowns)
+			}
+			set := replay.modifies()
+
+			// Make sure the injected junks falling within the path space of
+			// committed trie nodes are correctly deleted.
+			_, leftRoot, rightRoot := innerNodes(entries[first].k, entries[last].k, first == 0, last == len(entries)-1, nodes, t)
+			for _, path := range injects {
+				if bytes.Compare([]byte(path), leftRoot) < 0 && !bytes.HasPrefix(leftRoot, []byte(path)) {
+					continue
+				}
+				if bytes.Compare([]byte(path), rightRoot) > 0 {
+					continue
+				}
+				if hash, ok := set[path]; !ok || hash != (common.Hash{}) {
+					t.Fatalf("Missing delete, %v", []byte(path))
+				}
+			}
+		}
+		for j := 0; j < 100; j++ {
+			var (
+				first int
+				last  int
+			)
+			for {
+				first = rand.Intn(len(entries))
+				last = rand.Intn(len(entries))
+				if first <= last {
+					break
+				}
+			}
+			check(first, last)
+		}
+		var cases = []struct {
+			first int
+			last  int
+		}{
+			{0, len(entries) - 1},                // full
+			{1, len(entries) - 1},                // no left
+			{2, len(entries) - 1},                // no left
+			{2, len(entries) - 2},                // no left and right
+			{2, len(entries) - 2},                // no left and right
+			{len(entries) / 2, len(entries) / 2}, // single
+			{0, 0},                               // single first
+			{len(entries) - 1, len(entries) - 1}, // single last
+		}
+		for _, c := range cases {
+			check(c.first, c.last)
+		}
+	}
+}
+
+// TestFlushPartialTree tests the gentrie can produce complete inner trie nodes
+// even with lots of batch flushes.
+func TestFlushPartialTree(t *testing.T) {
+	var entries []*kv
+	for i := 0; i < 1024; i++ {
+		var val []byte
+		if rand.Intn(3) == 0 {
+			val = testrand.Bytes(3)
+		} else {
+			val = testrand.Bytes(32)
+		}
+		entries = append(entries, &kv{
+			k: testrand.Bytes(32),
+			v: val,
+		})
+	}
+	slices.SortFunc(entries, (*kv).cmp)
+
+	nodes := make(map[string]common.Hash)
+	tr := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
+		nodes[string(path)] = hash
+	})
+	for i := 0; i < len(entries); i++ {
+		tr.Update(entries[i].k, entries[i].v)
+	}
+	tr.Hash()
+
+	var cases = []struct {
+		first int
+		last  int
+	}{
+		{0, len(entries) - 1},                // full
+		{1, len(entries) - 1},                // no left
+		{10, len(entries) - 1},               // no left
+		{10, len(entries) - 2},               // no left and right
+		{10, len(entries) - 10},              // no left and right
+		{11, 11},                             // single
+		{0, 0},                               // single first
+		{len(entries) - 1, len(entries) - 1}, // single last
+	}
+	for _, c := range cases {
+		var (
+			db       = rawdb.NewMemoryDatabase()
+			batch    = db.NewBatch()
+			combined = db.NewBatch()
+		)
+		inner, _, _ := innerNodes(entries[c.first].k, entries[c.last].k, c.first == 0, c.last == len(entries)-1, nodes, t)
+
+		tr := newPathTrie(common.Hash{}, c.first != 0, db, batch)
+		for i := c.first; i <= c.last; i++ {
+			tr.update(entries[i].k, entries[i].v)
+			if rand.Intn(2) == 0 {
+				tr.commit(false)
+
+				batch.Replay(combined)
+				batch.Write()
+				batch.Reset()
+			}
+		}
+		tr.commit(c.last == len(entries)-1)
+
+		batch.Replay(combined)
+		batch.Write()
+		batch.Reset()
+
+		r := newBatchReplay()
+		combined.Replay(r)
+
+		// Ensure all the internal nodes are produced
+		set := r.modifies()
+		for path, hash := range inner {
+			if _, ok := set[path]; !ok {
+				t.Fatalf("Missing nodes %v", []byte(path))
+			}
+			if hash != set[path] {
+				t.Fatalf("Inconsistent node, want %x, got: %x", hash, set[path])
+			}
+		}
+		if r.updates() != len(inner) {
+			t.Fatalf("Unexpected node write detected, want: %d, got: %d", len(inner), r.updates())
+		}
+	}
+}
+
+// TestBoundSplit ensures two consecutive trie chunks are not overlapped with
+// each other.
+func TestBoundSplit(t *testing.T) {
+	var entries []*kv
+	for i := 0; i < 1024; i++ {
+		var val []byte
+		if rand.Intn(3) == 0 {
+			val = testrand.Bytes(3)
+		} else {
+			val = testrand.Bytes(32)
+		}
+		entries = append(entries, &kv{
+			k: testrand.Bytes(32),
+			v: val,
+		})
+	}
+	slices.SortFunc(entries, (*kv).cmp)
+
+	for j := 0; j < 100; j++ {
+		var (
+			next int
+			last int
+			db   = rawdb.NewMemoryDatabase()
+
+			lastRightRoot []byte
+		)
+		for {
+			if next == len(entries) {
+				break
+			}
+			last = rand.Intn(len(entries)-next) + next
+
+			r := buildPartial(common.Hash{}, db, db.NewBatch(), entries, next, last)
+			set := r.modifies()
+
+			// Skip if the chunk is zero-size
+			if r.updates() == 0 {
+				next = last + 1
+				continue
+			}
+
+			// Ensure the updates in two consecutive chunks are not overlapped.
+			// The only overlapping part should be deletion.
+			if lastRightRoot != nil && len(set) > 0 {
+				// Derive the path of left-most node in this chunk
+				var leftRoot []byte
+				for path, hash := range r.modifies() {
+					if hash == (common.Hash{}) {
+						t.Fatalf("Unexpected deletion %v", []byte(path))
+					}
+					if leftRoot == nil || bytes.Compare(leftRoot, []byte(path)) > 0 {
+						leftRoot = []byte(path)
+					}
+				}
+				if bytes.HasPrefix(lastRightRoot, leftRoot) || bytes.HasPrefix(leftRoot, lastRightRoot) {
+					t.Fatalf("Two chunks are not correctly separated, lastRight: %v, left: %v", lastRightRoot, leftRoot)
+				}
+			}
+
+			// Track the updates as the last chunk
+			var rightRoot []byte
+			for path := range set {
+				if rightRoot == nil ||
+					(bytes.Compare(rightRoot, []byte(path)) < 0) ||
+					(bytes.Compare(rightRoot, []byte(path)) > 0 && bytes.HasPrefix(rightRoot, []byte(path))) {
+					rightRoot = []byte(path)
+				}
+			}
+			lastRightRoot = rightRoot
+			next = last + 1
+		}
+	}
+}
+
+// TestTinyPartialTree tests if the partial tree is too tiny(has less than two
+// states), then nothing should be committed.
+func TestTinyPartialTree(t *testing.T) {
+	var entries []*kv
+	for i := 0; i < 1024; i++ {
+		var val []byte
+		if rand.Intn(3) == 0 {
+			val = testrand.Bytes(3)
+		} else {
+			val = testrand.Bytes(32)
+		}
+		entries = append(entries, &kv{
+			k: testrand.Bytes(32),
+			v: val,
+		})
+	}
+	slices.SortFunc(entries, (*kv).cmp)
+
+	for i := 0; i < len(entries); i++ {
+		next := i
+		last := i + 1
+		if last >= len(entries) {
+			last = len(entries) - 1
+		}
+		db := rawdb.NewMemoryDatabase()
+		r := buildPartial(common.Hash{}, db, db.NewBatch(), entries, next, last)
+
+		if next != 0 && last != len(entries)-1 {
+			if r.updates() != 0 {
+				t.Fatalf("Unexpected data writes, got: %d", r.updates())
+			}
+		}
+	}
+}
--- a/eth/protocols/snap/metrics.go
+++ b/eth/protocols/snap/metrics.go
@ -27,21 +27,28 @@ var (
 	IngressRegistrationErrorMeter = metrics.NewRegisteredMeter(ingressRegistrationErrorName, nil)
 	EgressRegistrationErrorMeter  = metrics.NewRegisteredMeter(egressRegistrationErrorName, nil)

-	// deletionGauge is the metric to track how many trie node deletions
-	// are performed in total during the sync process.
-	deletionGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete", nil)
+	// accountInnerDeleteGauge is the metric to track how many dangling trie nodes
+	// covered by extension node in account trie are deleted during the sync.
+	accountInnerDeleteGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete/account/inner", nil)

-	// lookupGauge is the metric to track how many trie node lookups are
-	// performed to determine if node needs to be deleted.
-	lookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/lookup", nil)
+	// storageInnerDeleteGauge is the metric to track how many dangling trie nodes
+	// covered by extension node in storage trie are deleted during the sync.
+	storageInnerDeleteGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete/storage/inner", nil)
+
+	// accountOuterDeleteGauge is the metric to track how many dangling trie nodes
+	// above the committed nodes in account trie are deleted during the sync.
+	accountOuterDeleteGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete/account/outer", nil)

-	// boundaryAccountNodesGauge is the metric to track how many boundary trie
-	// nodes in account trie are met.
-	boundaryAccountNodesGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/boundary/account", nil)
+	// storageOuterDeleteGauge is the metric to track how many dangling trie nodes
+	// above the committed nodes in storage trie are deleted during the sync.
+	storageOuterDeleteGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete/storage/outer", nil)

-	// boundaryAccountNodesGauge is the metric to track how many boundary trie
-	// nodes in storage tries are met.
-	boundaryStorageNodesGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/boundary/storage", nil)
+	// lookupGauge is the metric to track how many trie node lookups are
+	// performed to determine if node needs to be deleted.
+	accountInnerLookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/account/lookup/inner", nil)
+	accountOuterLookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/account/lookup/outer", nil)
+	storageInnerLookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/storage/lookup/inner", nil)
+	storageOuterLookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/storage/lookup/outer", nil)

 	// smallStorageGauge is the metric to track how many storages are small enough
 	// to retrieved in one or two request.
--- a/eth/protocols/snap/sync.go
+++ b/eth/protocols/snap/sync.go
@ -94,6 +94,9 @@ const (
 	// trienodeHealThrottleDecrease is the divisor for the throttle when the
 	// rate of arriving data is lower than the rate of processing it.
 	trienodeHealThrottleDecrease = 1.25
+
+	// batchSizeThreshold is the maximum size allowed for gentrie batch.
+	batchSizeThreshold = 8 * 1024 * 1024
 )

 var (
@ -321,8 +324,8 @@ type accountTask struct {
 	stateTasks     map[common.Hash]common.Hash // Account hashes->roots that need full state retrieval
 	stateCompleted map[common.Hash]struct{}    // Account hashes whose storage have been completed

-	genBatch ethdb.Batch     // Batch used by the node generator
-	genTrie  *trie.StackTrie // Node generator from storage slots
+	genBatch ethdb.Batch // Batch used by the node generator
+	genTrie  genTrie     // Node generator from storage slots

 	done bool // Flag whether the task can be removed
 }
@ -360,8 +363,8 @@ type storageTask struct {
 	root common.Hash     // Storage root hash for this instance
 	req  *storageRequest // Pending request to fill this task

-	genBatch ethdb.Batch     // Batch used by the node generator
-	genTrie  *trie.StackTrie // Node generator from storage slots
+	genBatch ethdb.Batch // Batch used by the node generator
+	genTrie  genTrie     // Node generator from storage slots

 	done bool // Flag whether the task can be removed
 }
@ -749,19 +752,6 @@ func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error {
 	}
 }

-// cleanPath is used to remove the dangling nodes in the stackTrie.
-func (s *Syncer) cleanPath(batch ethdb.Batch, owner common.Hash, path []byte) {
-	if owner == (common.Hash{}) && rawdb.ExistsAccountTrieNode(s.db, path) {
-		rawdb.DeleteAccountTrieNode(batch, path)
-		deletionGauge.Inc(1)
-	}
-	if owner != (common.Hash{}) && rawdb.ExistsStorageTrieNode(s.db, owner, path) {
-		rawdb.DeleteStorageTrieNode(batch, owner, path)
-		deletionGauge.Inc(1)
-	}
-	lookupGauge.Inc(1)
-}
-
 // loadSyncStatus retrieves a previously aborted sync status from the database,
 // or generates a fresh one if none is available.
 func (s *Syncer) loadSyncStatus() {
@ -792,23 +782,12 @@ func (s *Syncer) loadSyncStatus() {
 						s.accountBytes += common.StorageSize(len(key) + len(value))
 					},
 				}
-				options := trie.NewStackTrieOptions()
-				options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
-					rawdb.WriteTrieNode(task.genBatch, common.Hash{}, path, hash, blob, s.scheme)
-				})
+				if s.scheme == rawdb.HashScheme {
+					task.genTrie = newHashTrie(task.genBatch)
+				}
 				if s.scheme == rawdb.PathScheme {
-					// Configure the dangling node cleaner and also filter out boundary nodes
-					// only in the context of the path scheme. Deletion is forbidden in the
-					// hash scheme, as it can disrupt state completeness.
-					options = options.WithCleaner(func(path []byte) {
-						s.cleanPath(task.genBatch, common.Hash{}, path)
-					})
-					// Skip the left boundary if it's not the first range.
-					// Skip the right boundary if it's not the last range.
-					options = options.WithSkipBoundary(task.Next != (common.Hash{}), task.Last != common.MaxHash, boundaryAccountNodesGauge)
+					task.genTrie = newPathTrie(common.Hash{}, task.Next != common.Hash{}, s.db, task.genBatch)
 				}
-				task.genTrie = trie.NewStackTrie(options)
-
 				// Restore leftover storage tasks
 				for accountHash, subtasks := range task.SubTasks {
 					for _, subtask := range subtasks {
@ -820,23 +799,12 @@ func (s *Syncer) loadSyncStatus() {
 								s.storageBytes += common.StorageSize(len(key) + len(value))
 							},
 						}
-						owner := accountHash // local assignment for stacktrie writer closure
-						options := trie.NewStackTrieOptions()
-						options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
-							rawdb.WriteTrieNode(subtask.genBatch, owner, path, hash, blob, s.scheme)
-						})
+						if s.scheme == rawdb.HashScheme {
+							subtask.genTrie = newHashTrie(subtask.genBatch)
+						}
 						if s.scheme == rawdb.PathScheme {
-							// Configure the dangling node cleaner and also filter out boundary nodes
-							// only in the context of the path scheme. Deletion is forbidden in the
-							// hash scheme, as it can disrupt state completeness.
-							options = options.WithCleaner(func(path []byte) {
-								s.cleanPath(subtask.genBatch, owner, path)
-							})
-							// Skip the left boundary if it's not the first range.
-							// Skip the right boundary if it's not the last range.
-							options = options.WithSkipBoundary(subtask.Next != common.Hash{}, subtask.Last != common.MaxHash, boundaryStorageNodesGauge)
+							subtask.genTrie = newPathTrie(accountHash, subtask.Next != common.Hash{}, s.db, subtask.genBatch)
 						}
-						subtask.genTrie = trie.NewStackTrie(options)
 					}
 				}
 			}
@ -888,20 +856,12 @@ func (s *Syncer) loadSyncStatus() {
 				s.accountBytes += common.StorageSize(len(key) + len(value))
 			},
 		}
-		options := trie.NewStackTrieOptions()
-		options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
-			rawdb.WriteTrieNode(batch, common.Hash{}, path, hash, blob, s.scheme)
-		})
+		var tr genTrie
+		if s.scheme == rawdb.HashScheme {
+			tr = newHashTrie(batch)
+		}
 		if s.scheme == rawdb.PathScheme {
-			// Configure the dangling node cleaner and also filter out boundary nodes
-			// only in the context of the path scheme. Deletion is forbidden in the
-			// hash scheme, as it can disrupt state completeness.
-			options = options.WithCleaner(func(path []byte) {
-				s.cleanPath(batch, common.Hash{}, path)
-			})
-			// Skip the left boundary if it's not the first range.
-			// Skip the right boundary if it's not the last range.
-			options = options.WithSkipBoundary(next != common.Hash{}, last != common.MaxHash, boundaryAccountNodesGauge)
+			tr = newPathTrie(common.Hash{}, next != common.Hash{}, s.db, batch)
 		}
 		s.tasks = append(s.tasks, &accountTask{
 			Next:           next,
@ -909,7 +869,7 @@ func (s *Syncer) loadSyncStatus() {
 			SubTasks:       make(map[common.Hash][]*storageTask),
 			genBatch:       batch,
 			stateCompleted: make(map[common.Hash]struct{}),
-			genTrie:        trie.NewStackTrie(options),
+			genTrie:        tr,
 		})
 		log.Debug("Created account sync task", "from", next, "last", last)
 		next = common.BigToHash(new(big.Int).Add(last.Big(), common.Big1))
@ -920,11 +880,18 @@ func (s *Syncer) loadSyncStatus() {
 func (s *Syncer) saveSyncStatus() {
 	// Serialize any partial progress to disk before spinning down
 	for _, task := range s.tasks {
+		// Claim the right boundary as incomplete before flushing the
+		// accumulated nodes in batch, the nodes on right boundary
+		// will be discarded and cleaned up by this call.
+		task.genTrie.commit(false)
 		if err := task.genBatch.Write(); err != nil {
 			log.Error("Failed to persist account slots", "err", err)
 		}
 		for _, subtasks := range task.SubTasks {
 			for _, subtask := range subtasks {
+				// Same for account trie, discard and cleanup the
+				// incomplete right boundary.
+				subtask.genTrie.commit(false)
 				if err := subtask.genBatch.Write(); err != nil {
 					log.Error("Failed to persist storage slots", "err", err)
 				}
@ -2155,25 +2122,20 @@ func (s *Syncer) processStorageResponse(res *storageResponse) {
 							s.storageBytes += common.StorageSize(len(key) + len(value))
 						},
 					}
-					owner := account // local assignment for stacktrie writer closure
-					options := trie.NewStackTrieOptions()
-					options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
-						rawdb.WriteTrieNode(batch, owner, path, hash, blob, s.scheme)
-					})
+					var tr genTrie
+					if s.scheme == rawdb.HashScheme {
+						tr = newHashTrie(batch)
+					}
 					if s.scheme == rawdb.PathScheme {
-						options = options.WithCleaner(func(path []byte) {
-							s.cleanPath(batch, owner, path)
-						})
 						// Keep the left boundary as it's the first range.
-						// Skip the right boundary if it's not the last range.
-						options = options.WithSkipBoundary(false, r.End() != common.MaxHash, boundaryStorageNodesGauge)
+						tr = newPathTrie(account, false, s.db, batch)
 					}
 					tasks = append(tasks, &storageTask{
 						Next:     common.Hash{},
 						Last:     r.End(),
 						root:     acc.Root,
 						genBatch: batch,
-						genTrie:  trie.NewStackTrie(options),
+						genTrie:  tr,
 					})
 					for r.Next() {
 						batch := ethdb.HookedBatch{
@ -2182,27 +2144,19 @@ func (s *Syncer) processStorageResponse(res *storageResponse) {
 								s.storageBytes += common.StorageSize(len(key) + len(value))
 							},
 						}
-						options := trie.NewStackTrieOptions()
-						options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
-							rawdb.WriteTrieNode(batch, owner, path, hash, blob, s.scheme)
-						})
+						var tr genTrie
+						if s.scheme == rawdb.HashScheme {
+							tr = newHashTrie(batch)
+						}
 						if s.scheme == rawdb.PathScheme {
-							// Configure the dangling node cleaner and also filter out boundary nodes
-							// only in the context of the path scheme. Deletion is forbidden in the
-							// hash scheme, as it can disrupt state completeness.
-							options = options.WithCleaner(func(path []byte) {
-								s.cleanPath(batch, owner, path)
-							})
-							// Skip the left boundary as it's not the first range
-							// Skip the right boundary if it's not the last range.
-							options = options.WithSkipBoundary(true, r.End() != common.MaxHash, boundaryStorageNodesGauge)
+							tr = newPathTrie(account, true, s.db, batch)
 						}
 						tasks = append(tasks, &storageTask{
 							Next:     r.Start(),
 							Last:     r.End(),
 							root:     acc.Root,
 							genBatch: batch,
-							genTrie:  trie.NewStackTrie(options),
+							genTrie:  tr,
 						})
 					}
 					for _, task := range tasks {
@ -2248,26 +2202,18 @@ func (s *Syncer) processStorageResponse(res *storageResponse) {

 		if i < len(res.hashes)-1 || res.subTask == nil {
 			// no need to make local reassignment of account: this closure does not outlive the loop
-			options := trie.NewStackTrieOptions()
-			options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
-				rawdb.WriteTrieNode(batch, account, path, hash, blob, s.scheme)
-			})
+			var tr genTrie
+			if s.scheme == rawdb.HashScheme {
+				tr = newHashTrie(batch)
+			}
 			if s.scheme == rawdb.PathScheme {
-				// Configure the dangling node cleaner only in the context of the
-				// path scheme. Deletion is forbidden in the hash scheme, as it can
-				// disrupt state completeness.
-				//
-				// Notably, boundary nodes can be also kept because the whole storage
-				// trie is complete.
-				options = options.WithCleaner(func(path []byte) {
-					s.cleanPath(batch, account, path)
-				})
+				// Keep the left boundary as it's complete
+				tr = newPathTrie(account, false, s.db, batch)
 			}
-			tr := trie.NewStackTrie(options)
 			for j := 0; j < len(res.hashes[i]); j++ {
-				tr.Update(res.hashes[i][j][:], res.slots[i][j])
+				tr.update(res.hashes[i][j][:], res.slots[i][j])
 			}
-			tr.Commit()
+			tr.commit(true)
 		}
 		// Persist the received storage segments. These flat state maybe
 		// outdated during the sync, but it can be fixed later during the
@ -2278,14 +2224,14 @@ func (s *Syncer) processStorageResponse(res *storageResponse) {
 			// If we're storing large contracts, generate the trie nodes
 			// on the fly to not trash the gluing points
 			if i == len(res.hashes)-1 && res.subTask != nil {
-				res.subTask.genTrie.Update(res.hashes[i][j][:], res.slots[i][j])
+				res.subTask.genTrie.update(res.hashes[i][j][:], res.slots[i][j])
 			}
 		}
 	}
 	// Large contracts could have generated new trie nodes, flush them to disk
 	if res.subTask != nil {
 		if res.subTask.done {
-			root := res.subTask.genTrie.Commit()
+			root := res.subTask.genTrie.commit(res.subTask.Last == common.MaxHash)
 			if err := res.subTask.genBatch.Write(); err != nil {
 				log.Error("Failed to persist stack slots", "err", err)
 			}
@ -2302,8 +2248,8 @@ func (s *Syncer) processStorageResponse(res *storageResponse) {
 					}
 				}
 			}
-		}
-		if res.subTask.genBatch.ValueSize() > ethdb.IdealBatchSize {
+		} else if res.subTask.genBatch.ValueSize() > batchSizeThreshold {
+			res.subTask.genTrie.commit(false)
 			if err := res.subTask.genBatch.Write(); err != nil {
 				log.Error("Failed to persist stack slots", "err", err)
 			}
@ -2486,7 +2432,7 @@ func (s *Syncer) forwardAccountTask(task *accountTask) {
 			if err != nil {
 				panic(err) // Really shouldn't ever happen
 			}
-			task.genTrie.Update(hash[:], full)
+			task.genTrie.update(hash[:], full)
 		}
 	}
 	// Flush anything written just now and update the stats
@ -2519,9 +2465,13 @@ func (s *Syncer) forwardAccountTask(task *accountTask) {
 	// flush after finalizing task.done. It's fine even if we crash and lose this
 	// write as it will only cause more data to be downloaded during heal.
 	if task.done {
-		task.genTrie.Commit()
-	}
-	if task.genBatch.ValueSize() > ethdb.IdealBatchSize || task.done {
+		task.genTrie.commit(task.Last == common.MaxHash)
+		if err := task.genBatch.Write(); err != nil {
+			log.Error("Failed to persist stack account", "err", err)
+		}
+		task.genBatch.Reset()
+	} else if task.genBatch.ValueSize() > batchSizeThreshold {
+		task.genTrie.commit(false)
 		if err := task.genBatch.Write(); err != nil {
 			log.Error("Failed to persist stack account", "err", err)
 		}
--- a/internal/testrand/rand.go
+++ b/internal/testrand/rand.go
@ -0,0 +1,53 @@
+// Copyright 2023 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package testrand
+
+import (
+	crand "crypto/rand"
+	"encoding/binary"
+	mrand "math/rand"
+
+	"github.com/ethereum/go-ethereum/common"
+)
+
+// prng is a pseudo random number generator seeded by strong randomness.
+// The randomness is printed on startup in order to make failures reproducible.
+var prng = initRand()
+
+func initRand() *mrand.Rand {
+	var seed [8]byte
+	crand.Read(seed[:])
+	rnd := mrand.New(mrand.NewSource(int64(binary.LittleEndian.Uint64(seed[:]))))
+	return rnd
+}
+
+// Bytes generates a random byte slice with specified length.
+func Bytes(n int) []byte {
+	r := make([]byte, n)
+	prng.Read(r)
+	return r
+}
+
+// Hash generates a random hash.
+func Hash() common.Hash {
+	return common.BytesToHash(Bytes(common.HashLength))
+}
+
+// Address generates a random address.
+func Address() common.Address {
+	return common.BytesToAddress(Bytes(common.AddressLength))
+}
--- a/trie/stacktrie.go
+++ b/trie/stacktrie.go
@ -23,8 +23,6 @@ import (

 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/core/types"
-	"github.com/ethereum/go-ethereum/log"
-	"github.com/ethereum/go-ethereum/metrics"
 )

 var (
@ -32,62 +30,32 @@ var (
 	_      = types.TrieHasher((*StackTrie)(nil))
 )

-// StackTrieOptions contains the configured options for manipulating the stackTrie.
-type StackTrieOptions struct {
-	Writer  func(path []byte, hash common.Hash, blob []byte) // The function to commit the dirty nodes
-	Cleaner func(path []byte)                                // The function to clean up dangling nodes
-
-	SkipLeftBoundary  bool          // Flag whether the nodes on the left boundary are skipped for committing
-	SkipRightBoundary bool          // Flag whether the nodes on the right boundary are skipped for committing
-	boundaryGauge     metrics.Gauge // Gauge to track how many boundary nodes are met
-}
-
-// NewStackTrieOptions initializes an empty options for stackTrie.
-func NewStackTrieOptions() *StackTrieOptions { return &StackTrieOptions{} }
-
-// WithWriter configures trie node writer within the options.
-func (o *StackTrieOptions) WithWriter(writer func(path []byte, hash common.Hash, blob []byte)) *StackTrieOptions {
-	o.Writer = writer
-	return o
-}
-
-// WithCleaner configures the cleaner in the option for removing dangling nodes.
-func (o *StackTrieOptions) WithCleaner(cleaner func(path []byte)) *StackTrieOptions {
-	o.Cleaner = cleaner
-	return o
-}
-
-// WithSkipBoundary configures whether the left and right boundary nodes are
-// filtered for committing, along with a gauge metrics to track how many
-// boundary nodes are met.
-func (o *StackTrieOptions) WithSkipBoundary(skipLeft, skipRight bool, gauge metrics.Gauge) *StackTrieOptions {
-	o.SkipLeftBoundary = skipLeft
-	o.SkipRightBoundary = skipRight
-	o.boundaryGauge = gauge
-	return o
-}
+// OnTrieNode is a callback method invoked when a trie node is committed
+// by the stack trie. The node is only committed if it's considered complete.
+//
+// The caller should not modify the contents of the returned path and blob
+// slice, and their contents may be changed after the call. It is up to the
+// `onTrieNode` receiver function to deep-copy the data if it wants to retain
+// it after the call ends.
+type OnTrieNode func(path []byte, hash common.Hash, blob []byte)

 // StackTrie is a trie implementation that expects keys to be inserted
 // in order. Once it determines that a subtree will no longer be inserted
 // into, it will hash it and free up the memory it uses.
 type StackTrie struct {
-	options *StackTrieOptions
-	root    *stNode
-	h       *hasher
-
-	first []byte // The (hex-encoded without terminator) key of first inserted entry, tracked as left boundary.
-	last  []byte // The (hex-encoded without terminator) key of last inserted entry, tracked as right boundary.
+	root       *stNode
+	h          *hasher
+	last       []byte
+	onTrieNode OnTrieNode
 }

-// NewStackTrie allocates and initializes an empty trie.
-func NewStackTrie(options *StackTrieOptions) *StackTrie {
-	if options == nil {
-		options = NewStackTrieOptions()
-	}
+// NewStackTrie allocates and initializes an empty trie. The committed nodes
+// will be discarded immediately if no callback is configured.
+func NewStackTrie(onTrieNode OnTrieNode) *StackTrie {
 	return &StackTrie{
-		options: options,
-		root:    stPool.Get().(*stNode),
-		h:       newHasher(false),
+		root:       stPool.Get().(*stNode),
+		h:          newHasher(false),
+		onTrieNode: onTrieNode,
 	}
 }

@ -101,10 +69,6 @@ func (t *StackTrie) Update(key, value []byte) error {
 	if bytes.Compare(t.last, k) >= 0 {
 		return errors.New("non-ascending key order")
 	}
-	// track the first and last inserted entries.
-	if t.first == nil {
-		t.first = append([]byte{}, k...)
-	}
 	if t.last == nil {
 		t.last = append([]byte{}, k...) // allocate key slice
 	} else {
@ -114,19 +78,9 @@ func (t *StackTrie) Update(key, value []byte) error {
 	return nil
 }

-// MustUpdate is a wrapper of Update and will omit any encountered error but
-// just print out an error message.
-func (t *StackTrie) MustUpdate(key, value []byte) {
-	if err := t.Update(key, value); err != nil {
-		log.Error("Unhandled trie error in StackTrie.Update", "err", err)
-	}
-}
-
 // Reset resets the stack trie object to empty state.
 func (t *StackTrie) Reset() {
-	t.options = NewStackTrieOptions()
 	t.root = stPool.Get().(*stNode)
-	t.first = nil
 	t.last = nil
 }

@ -346,10 +300,7 @@ func (t *StackTrie) insert(st *stNode, key, value []byte, path []byte) {
 //
 // This method also sets 'st.type' to hashedNode, and clears 'st.key'.
 func (t *StackTrie) hash(st *stNode, path []byte) {
-	var (
-		blob     []byte   // RLP-encoded node blob
-		internal [][]byte // List of node paths covered by the extension node
-	)
+	var blob []byte // RLP-encoded node blob
 	switch st.typ {
 	case hashedNode:
 		return
@ -384,15 +335,6 @@ func (t *StackTrie) hash(st *stNode, path []byte) {
 		// recursively hash and commit child as the first step
 		t.hash(st.children[0], append(path, st.key...))

-		// Collect the path of internal nodes between shortNode and its **in disk**
-		// child. This is essential in the case of path mode scheme to avoid leaving
-		// danging nodes within the range of this internal path on disk, which would
-		// break the guarantee for state healing.
-		if len(st.children[0].val) >= 32 && t.options.Cleaner != nil {
-			for i := 1; i < len(st.key); i++ {
-				internal = append(internal, append(path, st.key[:i]...))
-			}
-		}
 		// encode the extension node
 		n := shortNode{Key: hexToCompactInPlace(st.key)}
 		if len(st.children[0].val) < 32 {
@ -416,11 +358,12 @@ func (t *StackTrie) hash(st *stNode, path []byte) {
 	default:
 		panic("invalid node type")
 	}
-
+	// Convert the node type to hashNode and reset the key slice.
 	st.typ = hashedNode
 	st.key = st.key[:0]

-	// Skip committing the non-root node if the size is smaller than 32 bytes.
+	// Skip committing the non-root node if the size is smaller than 32 bytes
+	// as tiny nodes are always embedded in their parent except root node.
 	if len(blob) < 32 && len(path) > 0 {
 		st.val = common.CopyBytes(blob)
 		return
@ -429,51 +372,20 @@ func (t *StackTrie) hash(st *stNode, path []byte) {
 	// input values.
 	st.val = t.h.hashData(blob)

-	// Short circuit if the stack trie is not configured for writing.
-	if t.options.Writer == nil {
-		return
+	// Invoke the callback it's provided. Notably, the path and blob slices are
+	// volatile, please deep-copy the slices in callback if the contents need
+	// to be retained.
+	if t.onTrieNode != nil {
+		t.onTrieNode(path, common.BytesToHash(st.val), blob)
 	}
-	// Skip committing if the node is on the left boundary and stackTrie is
-	// configured to filter the boundary.
-	if t.options.SkipLeftBoundary && bytes.HasPrefix(t.first, path) {
-		if t.options.boundaryGauge != nil {
-			t.options.boundaryGauge.Inc(1)
-		}
-		return
-	}
-	// Skip committing if the node is on the right boundary and stackTrie is
-	// configured to filter the boundary.
-	if t.options.SkipRightBoundary && bytes.HasPrefix(t.last, path) {
-		if t.options.boundaryGauge != nil {
-			t.options.boundaryGauge.Inc(1)
-		}
-		return
-	}
-	// Clean up the internal dangling nodes covered by the extension node.
-	// This should be done before writing the node to adhere to the committing
-	// order from bottom to top.
-	for _, path := range internal {
-		t.options.Cleaner(path)
-	}
-	t.options.Writer(path, common.BytesToHash(st.val), blob)
 }

 // Hash will firstly hash the entire trie if it's still not hashed and then commit
-// all nodes to the associated database. Actually most of the trie nodes have been
-// committed already. The main purpose here is to commit the nodes on right boundary.
-//
-// For stack trie, Hash and Commit are functionally identical.
+// all leftover nodes to the associated database. Actually most of the trie nodes
+// have been committed already. The main purpose here is to commit the nodes on
+// right boundary.
 func (t *StackTrie) Hash() common.Hash {
 	n := t.root
 	t.hash(n, nil)
 	return common.BytesToHash(n.val)
 }
-
-// Commit will firstly hash the entire trie if it's still not hashed and then commit
-// all nodes to the associated database. Actually most of the trie nodes have been
-// committed already. The main purpose here is to commit the nodes on right boundary.
-//
-// For stack trie, Hash and Commit are functionally identical.
-func (t *StackTrie) Commit() common.Hash {
-	return t.Hash()
-}
--- a/trie/stacktrie_fuzzer_test.go
+++ b/trie/stacktrie_fuzzer_test.go
@ -46,11 +46,9 @@ func fuzz(data []byte, debugging bool) {
 		trieA   = NewEmpty(dbA)
 		spongeB = &spongeDb{sponge: sha3.NewLegacyKeccak256()}
 		dbB     = newTestDatabase(rawdb.NewDatabase(spongeB), rawdb.HashScheme)
-
-		options = NewStackTrieOptions().WithWriter(func(path []byte, hash common.Hash, blob []byte) {
+		trieB   = NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
 			rawdb.WriteTrieNode(spongeB, common.Hash{}, path, hash, blob, dbB.Scheme())
 		})
-		trieB       = NewStackTrie(options)
 		vals        []*kv
 		maxElements = 10000
 		// operate on unique keys only
@ -99,10 +97,9 @@ func fuzz(data []byte, debugging bool) {
 		if debugging {
 			fmt.Printf("{\"%#x\" , \"%#x\"} // stacktrie.Update\n", kv.k, kv.v)
 		}
-		trieB.MustUpdate(kv.k, kv.v)
+		trieB.Update(kv.k, kv.v)
 	}
 	rootB := trieB.Hash()
-	trieB.Commit()
 	if rootA != rootB {
 		panic(fmt.Sprintf("roots differ: (trie) %x != %x (stacktrie)", rootA, rootB))
 	}
@ -114,20 +111,19 @@ func fuzz(data []byte, debugging bool) {

 	// Ensure all the nodes are persisted correctly
 	var (
-		nodeset  = make(map[string][]byte) // path -> blob
-		optionsC = NewStackTrieOptions().WithWriter(func(path []byte, hash common.Hash, blob []byte) {
+		nodeset = make(map[string][]byte) // path -> blob
+		trieC   = NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
 			if crypto.Keccak256Hash(blob) != hash {
 				panic("invalid node blob")
 			}
 			nodeset[string(path)] = common.CopyBytes(blob)
 		})
-		trieC   = NewStackTrie(optionsC)
 		checked int
 	)
 	for _, kv := range vals {
-		trieC.MustUpdate(kv.k, kv.v)
+		trieC.Update(kv.k, kv.v)
 	}
-	rootC := trieC.Commit()
+	rootC := trieC.Hash()
 	if rootA != rootC {
 		panic(fmt.Sprintf("roots differ: (trie) %x != %x (stacktrie)", rootA, rootC))
 	}
--- a/trie/stacktrie_test.go
+++ b/trie/stacktrie_test.go
@ -19,15 +19,12 @@ package trie
 import (
 	"bytes"
 	"math/big"
-	"math/rand"
 	"testing"

 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/core/rawdb"
 	"github.com/ethereum/go-ethereum/crypto"
-	"github.com/ethereum/go-ethereum/trie/testutil"
 	"github.com/stretchr/testify/assert"
-	"golang.org/x/exp/slices"
 )

 func TestStackTrieInsertAndHash(t *testing.T) {
@ -381,90 +378,6 @@ func TestStacktrieNotModifyValues(t *testing.T) {
 	}
 }

-func buildPartialTree(entries []*kv, t *testing.T) map[string]common.Hash {
-	var (
-		options = NewStackTrieOptions()
-		nodes   = make(map[string]common.Hash)
-	)
-	var (
-		first int
-		last  = len(entries) - 1
-
-		noLeft  bool
-		noRight bool
-	)
-	// Enter split mode if there are at least two elements
-	if rand.Intn(5) != 0 {
-		for {
-			first = rand.Intn(len(entries))
-			last = rand.Intn(len(entries))
-			if first <= last {
-				break
-			}
-		}
-		if first != 0 {
-			noLeft = true
-		}
-		if last != len(entries)-1 {
-			noRight = true
-		}
-	}
-	options = options.WithSkipBoundary(noLeft, noRight, nil)
-	options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
-		nodes[string(path)] = hash
-	})
-	tr := NewStackTrie(options)
-
-	for i := first; i <= last; i++ {
-		tr.MustUpdate(entries[i].k, entries[i].v)
-	}
-	tr.Commit()
-	return nodes
-}
-
-func TestPartialStackTrie(t *testing.T) {
-	for round := 0; round < 100; round++ {
-		var (
-			n       = rand.Intn(100) + 1
-			entries []*kv
-		)
-		for i := 0; i < n; i++ {
-			var val []byte
-			if rand.Intn(3) == 0 {
-				val = testutil.RandBytes(3)
-			} else {
-				val = testutil.RandBytes(32)
-			}
-			entries = append(entries, &kv{
-				k: testutil.RandBytes(32),
-				v: val,
-			})
-		}
-		slices.SortFunc(entries, (*kv).cmp)
-
-		var (
-			nodes   = make(map[string]common.Hash)
-			options = NewStackTrieOptions().WithWriter(func(path []byte, hash common.Hash, blob []byte) {
-				nodes[string(path)] = hash
-			})
-		)
-		tr := NewStackTrie(options)
-
-		for i := 0; i < len(entries); i++ {
-			tr.MustUpdate(entries[i].k, entries[i].v)
-		}
-		tr.Commit()
-
-		for j := 0; j < 100; j++ {
-			for path, hash := range buildPartialTree(entries, t) {
-				if nodes[path] != hash {
-					t.Errorf("%v, want %x, got %x", []byte(path), nodes[path], hash)
-				}
-			}
-		}
-	}
-}
-
 func TestStackTrieErrors(t *testing.T) {
 	s := NewStackTrie(nil)
 	// Deletion
--- a/trie/trie_test.go
+++ b/trie/trie_test.go
@ -963,11 +963,9 @@ func TestCommitSequenceStackTrie(t *testing.T) {
 			id:     "b",
 			values: make(map[string]string),
 		}
-		options := NewStackTrieOptions()
-		options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
+		stTrie := NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
 			rawdb.WriteTrieNode(stackTrieSponge, common.Hash{}, path, hash, blob, db.Scheme())
 		})
-		stTrie := NewStackTrie(options)

 		// Fill the trie with elements
 		for i := 0; i < count; i++ {
@ -993,7 +991,7 @@ func TestCommitSequenceStackTrie(t *testing.T) {
 		s.Flush()

 		// And flush stacktrie -> disk
-		stRoot := stTrie.Commit()
+		stRoot := stTrie.Hash()
 		if stRoot != root {
 			t.Fatalf("root wrong, got %x exp %x", stRoot, root)
 		}
@ -1034,12 +1032,9 @@ func TestCommitSequenceSmallRoot(t *testing.T) {
 		id:     "b",
 		values: make(map[string]string),
 	}
-	options := NewStackTrieOptions()
-	options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
+	stTrie := NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
 		rawdb.WriteTrieNode(stackTrieSponge, common.Hash{}, path, hash, blob, db.Scheme())
 	})
-	stTrie := NewStackTrie(options)
-
 	// Add a single small-element to the trie(s)
 	key := make([]byte, 5)
 	key[0] = 1
@ -1053,7 +1048,7 @@ func TestCommitSequenceSmallRoot(t *testing.T) {
 	db.Commit(root)

 	// And flush stacktrie -> disk
-	stRoot := stTrie.Commit()
+	stRoot := stTrie.Hash()
 	if stRoot != root {
 		t.Fatalf("root wrong, got %x exp %x", stRoot, root)
 	}