diff --git a/eth/protocols/snap/gentrie.go b/eth/protocols/snap/gentrie.go index 6255fb221d..5126d26777 100644 --- a/eth/protocols/snap/gentrie.go +++ b/eth/protocols/snap/gentrie.go @@ -31,6 +31,9 @@ type genTrie interface { // update inserts the state item into generator trie. update(key, value []byte) error + // delete removes the state item from the generator trie. + delete(key []byte) error + // commit flushes the right boundary nodes if complete flag is true. This // function must be called before flushing the associated database batch. commit(complete bool) common.Hash @@ -113,7 +116,7 @@ func (t *pathTrie) onTrieNode(path []byte, hash common.Hash, blob []byte) { // removed because it's a sibling of the nodes we want to commit, not // the parent or ancestor. for i := 0; i < len(path); i++ { - t.delete(path[:i], false) + t.deleteNode(path[:i], false) } } return @@ -136,7 +139,7 @@ func (t *pathTrie) onTrieNode(path []byte, hash common.Hash, blob []byte) { // byte key. In either case, no gaps will be left in the path. if t.last != nil && bytes.HasPrefix(t.last, path) && len(t.last)-len(path) > 1 { for i := len(path) + 1; i < len(t.last); i++ { - t.delete(t.last[:i], true) + t.deleteNode(t.last[:i], true) } } t.write(path, blob) @@ -192,8 +195,8 @@ func (t *pathTrie) deleteStorageNode(path []byte, inner bool) { rawdb.DeleteStorageTrieNode(t.batch, t.owner, path) } -// delete commits the node deletion to provided database batch in path mode. -func (t *pathTrie) delete(path []byte, inner bool) { +// deleteNode commits the node deletion to provided database batch in path mode. +func (t *pathTrie) deleteNode(path []byte, inner bool) { if t.owner == (common.Hash{}) { t.deleteAccountNode(path, inner) } else { @@ -207,6 +210,34 @@ func (t *pathTrie) update(key, value []byte) error { return t.tr.Update(key, value) } +// delete implements genTrie interface, deleting the item from the stack trie. +func (t *pathTrie) delete(key []byte) error { + // Commit the trie since the right boundary is incomplete because + // of the deleted item. This will implicitly discard the last inserted + // item and clean some ancestor trie nodes of the last committed + // item in the database. + t.commit(false) + + // Reset the trie and all the internal trackers + t.first = nil + t.last = nil + t.tr.Reset() + + // Explicitly mark the left boundary as incomplete, as the left-side + // item of the next one has been deleted. Be aware that the next item + // to be inserted will be ignored from committing as well as it's on + // the left boundary. + t.skipLeftBoundary = true + + // Explicitly delete the potential leftover nodes on the specific + // path from the database. + tkey := t.tr.TrieKey(key) + for i := 0; i <= len(tkey); i++ { + t.deleteNode(tkey[:i], false) + } + return nil +} + // commit implements genTrie interface, flushing the right boundary if it's // considered as complete. Otherwise, the nodes on the right boundary are // discarded and cleaned up. @@ -255,7 +286,7 @@ func (t *pathTrie) commit(complete bool) common.Hash { // with no issues as they are actually complete. Also, from a database // perspective, first deleting and then rewriting is a valid data update. for i := 0; i < len(t.last); i++ { - t.delete(t.last[:i], false) + t.deleteNode(t.last[:i], false) } return common.Hash{} // the hash is meaningless for incomplete commit } @@ -278,6 +309,9 @@ func (t *hashTrie) update(key, value []byte) error { return t.tr.Update(key, value) } +// delete implements genTrie interface, ignoring the state item for deleting. +func (t *hashTrie) delete(key []byte) error { return nil } + // commit implements genTrie interface, committing the nodes on right boundary. func (t *hashTrie) commit(complete bool) common.Hash { if !complete { diff --git a/eth/protocols/snap/gentrie_test.go b/eth/protocols/snap/gentrie_test.go index 1fb2dbce75..2da4f3c866 100644 --- a/eth/protocols/snap/gentrie_test.go +++ b/eth/protocols/snap/gentrie_test.go @@ -551,3 +551,145 @@ func TestTinyPartialTree(t *testing.T) { } } } + +func TestTrieDelete(t *testing.T) { + var entries []*kv + for i := 0; i < 1024; i++ { + entries = append(entries, &kv{ + k: testrand.Bytes(32), + v: testrand.Bytes(32), + }) + } + slices.SortFunc(entries, (*kv).cmp) + + nodes := make(map[string]common.Hash) + tr := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) { + nodes[string(path)] = hash + }) + for i := 0; i < len(entries); i++ { + tr.Update(entries[i].k, entries[i].v) + } + tr.Hash() + + check := func(index []int) { + var ( + db = rawdb.NewMemoryDatabase() + batch = db.NewBatch() + marks = map[int]struct{}{} + neighbors = map[int]struct{}{} + ) + for _, n := range index { + marks[n] = struct{}{} + } + for _, n := range index { + if n != 0 { + if _, ok := marks[n-1]; !ok { + neighbors[n-1] = struct{}{} + } + } + if n != len(entries)-1 { + if _, ok := neighbors[n+1]; !ok { + neighbors[n+1] = struct{}{} + } + } + } + // Write the junk nodes as the dangling + var injects []string + for _, n := range index { + nibbles := byteToHex(entries[n].k) + for i := 0; i <= len(nibbles); i++ { + injects = append(injects, string(nibbles[:i])) + } + } + for _, path := range injects { + rawdb.WriteAccountTrieNode(db, []byte(path), testrand.Bytes(32)) + } + tr := newPathTrie(common.Hash{}, false, db, batch) + for i := 0; i < len(entries); i++ { + if _, ok := marks[i]; ok { + tr.delete(entries[i].k) + } else { + tr.update(entries[i].k, entries[i].v) + } + } + tr.commit(true) + + r := newBatchReplay() + batch.Replay(r) + batch.Write() + + for _, path := range injects { + if rawdb.HasAccountTrieNode(db, []byte(path)) { + t.Fatalf("Unexpected leftover node %v", []byte(path)) + } + } + + // ensure all the written nodes match with the complete tree + set := make(map[string]common.Hash) + for path, hash := range r.modifies() { + if hash == (common.Hash{}) { + continue + } + n, ok := nodes[path] + if !ok { + t.Fatalf("Unexpected trie node: %v", []byte(path)) + } + if n != hash { + t.Fatalf("Unexpected trie node content: %v, want: %x, got: %x", []byte(path), n, hash) + } + set[path] = hash + } + + // ensure all the missing nodes either on the deleted path, or + // on the neighbor paths. + isMissing := func(path []byte) bool { + for n := range marks { + key := byteToHex(entries[n].k) + if bytes.HasPrefix(key, path) { + return true + } + } + for n := range neighbors { + key := byteToHex(entries[n].k) + if bytes.HasPrefix(key, path) { + return true + } + } + return false + } + for path := range nodes { + if _, ok := set[path]; ok { + continue + } + if !isMissing([]byte(path)) { + t.Fatalf("Missing node %v", []byte(path)) + } + } + } + var cases = []struct { + index []int + }{ + // delete the first + {[]int{0}}, + + // delete the last + {[]int{len(entries) - 1}}, + + // delete the first two + {[]int{0, 1}}, + + // delete the last two + {[]int{len(entries) - 2, len(entries) - 1}}, + + {[]int{ + 0, 2, 4, 6, + len(entries) - 1, + len(entries) - 3, + len(entries) - 5, + len(entries) - 7, + }}, + } + for _, c := range cases { + check(c.index) + } +} diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index 88d7d34dcc..cdd03e6a0c 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -2424,14 +2424,21 @@ func (s *Syncer) forwardAccountTask(task *accountTask) { slim := types.SlimAccountRLP(*res.accounts[i]) rawdb.WriteAccountSnapshot(batch, hash, slim) - // If the task is complete, drop it into the stack trie to generate - // account trie nodes for it if !task.needHeal[i] { + // If the storage task is complete, drop it into the stack trie + // to generate account trie nodes for it full, err := types.FullAccountRLP(slim) // TODO(karalabe): Slim parsing can be omitted if err != nil { panic(err) // Really shouldn't ever happen } task.genTrie.update(hash[:], full) + } else { + // If the storage task is incomplete, explicitly delete the corresponding + // account item from the account trie to ensure that all nodes along the + // path to the incomplete storage trie are cleaned up. + if err := task.genTrie.delete(hash[:]); err != nil { + panic(err) // Really shouldn't ever happen + } } } // Flush anything written just now and update the stats diff --git a/trie/stacktrie.go b/trie/stacktrie.go index 9c574db0bf..d194cbf0ae 100644 --- a/trie/stacktrie.go +++ b/trie/stacktrie.go @@ -64,8 +64,7 @@ func (t *StackTrie) Update(key, value []byte) error { if len(value) == 0 { return errors.New("trying to insert empty (deletion)") } - k := keybytesToHex(key) - k = k[:len(k)-1] // chop the termination flag + k := t.TrieKey(key) if bytes.Compare(t.last, k) >= 0 { return errors.New("non-ascending key order") } @@ -84,6 +83,13 @@ func (t *StackTrie) Reset() { t.last = nil } +// TrieKey returns the internal key representation for the given user key. +func (t *StackTrie) TrieKey(key []byte) []byte { + k := keybytesToHex(key) + k = k[:len(k)-1] // chop the termination flag + return k +} + // stNode represents a node within a StackTrie type stNode struct { typ uint8 // node type (as in branch, ext, leaf)