From 5d3f5805d50c0630b0f68af21b82635880fe03ec Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Wed, 26 Apr 2023 14:01:54 +0800 Subject: [PATCH] trie: add node type common package (#27160) * trie: add node type common package In trie/types package, a few node wrappers are defined, which will be used in both trie package, trie/snap package, etc. Therefore, a standalone common package is created to put these stuffs. * trie: rename trie/types to trie/trienode --- trie/committer.go | 25 +++++---- trie/database.go | 7 +-- trie/nodeset.go | 117 +++++++++++------------------------------- trie/tracer.go | 10 ++-- trie/trie.go | 4 +- trie/trie_test.go | 19 +++---- trie/trienode/node.go | 67 ++++++++++++++++++++++++ 7 files changed, 133 insertions(+), 116 deletions(-) create mode 100644 trie/trienode/node.go diff --git a/trie/committer.go b/trie/committer.go index af5f5f1a17..805d4a3143 100644 --- a/trie/committer.go +++ b/trie/committer.go @@ -20,6 +20,7 @@ import ( "fmt" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/trie/trienode" ) // leaf represents a trie leaf node @@ -33,13 +34,15 @@ type leaf struct { // insertion order. type committer struct { nodes *NodeSet + tracer *tracer collectLeaf bool } // newCommitter creates a new committer or picks one from the pool. -func newCommitter(nodeset *NodeSet, collectLeaf bool) *committer { +func newCommitter(nodeset *NodeSet, tracer *tracer, collectLeaf bool) *committer { return &committer{ nodes: nodeset, + tracer: tracer, collectLeaf: collectLeaf, } } @@ -134,22 +137,22 @@ func (c *committer) store(path []byte, n node) node { // The node is embedded in its parent, in other words, this node // will not be stored in the database independently, mark it as // deleted only if the node was existent in database before. - if _, ok := c.nodes.accessList[string(path)]; ok { - c.nodes.markDeleted(path) + prev, ok := c.tracer.accessList[string(path)] + if ok { + c.nodes.addNode(path, trienode.NewWithPrev(common.Hash{}, nil, prev)) } return n } - // We have the hash already, estimate the RLP encoding-size of the node. - // The size is used for mem tracking, does not need to be exact + // Collect the dirty node to nodeset for return. var ( nhash = common.BytesToHash(hash) - mnode = &memoryNode{ - hash: nhash, - node: nodeToBytes(n), - } + node = trienode.NewWithPrev( + nhash, + nodeToBytes(n), + c.tracer.accessList[string(path)], + ) ) - // Collect the dirty node to nodeset for return. - c.nodes.markUpdated(path, mnode) + c.nodes.addNode(path, node) // Collect the corresponding leaf node if it's required. We don't check // full node since it's impossible to store value in fullNode. The key diff --git a/trie/database.go b/trie/database.go index c1f7ddd756..c105c730a2 100644 --- a/trie/database.go +++ b/trie/database.go @@ -31,6 +31,7 @@ import ( "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/metrics" "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie/trienode" ) var ( @@ -607,11 +608,11 @@ func (db *Database) Update(nodes *MergedNodeSet) error { } for _, owner := range order { subset := nodes.sets[owner] - subset.forEachWithOrder(func(path string, n *memoryNode) { - if n.isDeleted() { + subset.forEachWithOrder(func(path string, n *trienode.Node) { + if n.IsDeleted() { return // ignore deletion } - db.insert(n.hash, n.node) + db.insert(n.Hash, n.Blob) }) } // Link up the account trie and storage trie if the node points diff --git a/trie/nodeset.go b/trie/nodeset.go index fc2111375f..4ebbb3329c 100644 --- a/trie/nodeset.go +++ b/trie/nodeset.go @@ -22,89 +22,37 @@ import ( "strings" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/trie/trienode" ) -// memoryNode is all the information we know about a single cached trie node -// in the memory. -type memoryNode struct { - hash common.Hash // Node hash by hashing node blob, empty for deleted nodes - node []byte // Encoded node blob, nil for deleted nodes -} - -// memorySize returns the total memory size used by this node. -// nolint:unused -func (n *memoryNode) memorySize(pathlen int) int { - return len(n.node) + common.HashLength + pathlen -} - -// rlp returns the raw rlp encoded blob of the cached trie node, either directly -// from the cache, or by regenerating it from the collapsed node. -// nolint:unused -func (n *memoryNode) rlp() []byte { - return n.node -} - -// obj returns the decoded and expanded trie node, either directly from the cache, -// or by regenerating it from the rlp encoded blob. -// nolint:unused -func (n *memoryNode) obj() node { - return mustDecodeNode(n.hash[:], n.node) -} - -// isDeleted returns the indicator if the node is marked as deleted. -func (n *memoryNode) isDeleted() bool { - return n.hash == (common.Hash{}) -} - -// nodeWithPrev wraps the memoryNode with the previous node value. -// nolint: unused -type nodeWithPrev struct { - *memoryNode - prev []byte // RLP-encoded previous value, nil means it's non-existent -} - -// unwrap returns the internal memoryNode object. -// nolint:unused -func (n *nodeWithPrev) unwrap() *memoryNode { - return n.memoryNode -} - -// memorySize returns the total memory size used by this node. It overloads -// the function in memoryNode by counting the size of previous value as well. -// nolint: unused -func (n *nodeWithPrev) memorySize(pathlen int) int { - return n.memoryNode.memorySize(pathlen) + len(n.prev) -} - // NodeSet contains all dirty nodes collected during the commit operation. // Each node is keyed by path. It's not thread-safe to use. type NodeSet struct { - owner common.Hash // the identifier of the trie - nodes map[string]*memoryNode // the set of dirty nodes(inserted, updated, deleted) - leaves []*leaf // the list of dirty leaves - updates int // the count of updated and inserted nodes - deletes int // the count of deleted nodes - - // The list of accessed nodes, which records the original node value. - // The origin value is expected to be nil for newly inserted node - // and is expected to be non-nil for other types(updated, deleted). - accessList map[string][]byte + owner common.Hash // the identifier of the trie + leaves []*leaf // the list of dirty leaves + updates int // the count of updated and inserted nodes + deletes int // the count of deleted nodes + + // The set of all dirty nodes. Dirty nodes include newly inserted nodes, + // deleted nodes and updated nodes. The original value of the newly + // inserted node must be nil, and the original value of the other two + // types must be non-nil. + nodes map[string]*trienode.WithPrev } // NewNodeSet initializes an empty node set to be used for tracking dirty nodes // from a specific account or storage trie. The owner is zero for the account // trie and the owning account address hash for storage tries. -func NewNodeSet(owner common.Hash, accessList map[string][]byte) *NodeSet { +func NewNodeSet(owner common.Hash) *NodeSet { return &NodeSet{ - owner: owner, - nodes: make(map[string]*memoryNode), - accessList: accessList, + owner: owner, + nodes: make(map[string]*trienode.WithPrev), } } // forEachWithOrder iterates the dirty nodes with the order from bottom to top, // right to left, nodes with the longest path will be iterated first. -func (set *NodeSet) forEachWithOrder(callback func(path string, n *memoryNode)) { +func (set *NodeSet) forEachWithOrder(callback func(path string, n *trienode.Node)) { var paths sort.StringSlice for path := range set.nodes { paths = append(paths, path) @@ -112,23 +60,21 @@ func (set *NodeSet) forEachWithOrder(callback func(path string, n *memoryNode)) // Bottom-up, longest path first sort.Sort(sort.Reverse(paths)) for _, path := range paths { - callback(path, set.nodes[path]) + callback(path, set.nodes[path].Unwrap()) } } -// markUpdated marks the node as dirty(newly-inserted or updated). -func (set *NodeSet) markUpdated(path []byte, node *memoryNode) { - set.nodes[string(path)] = node - set.updates += 1 -} - -// markDeleted marks the node as deleted. -func (set *NodeSet) markDeleted(path []byte) { - set.nodes[string(path)] = &memoryNode{} - set.deletes += 1 +// addNode adds the provided dirty node into set. +func (set *NodeSet) addNode(path []byte, n *trienode.WithPrev) { + if n.IsDeleted() { + set.deletes += 1 + } else { + set.updates += 1 + } + set.nodes[string(path)] = n } -// addLeaf collects the provided leaf node into set. +// addLeaf adds the provided leaf node into set. func (set *NodeSet) addLeaf(node *leaf) { set.leaves = append(set.leaves, node) } @@ -143,7 +89,7 @@ func (set *NodeSet) Size() (int, int) { func (set *NodeSet) Hashes() []common.Hash { var ret []common.Hash for _, node := range set.nodes { - ret = append(ret, node.hash) + ret = append(ret, node.Hash) } return ret } @@ -155,18 +101,17 @@ func (set *NodeSet) Summary() string { if set.nodes != nil { for path, n := range set.nodes { // Deletion - if n.isDeleted() { - fmt.Fprintf(out, " [-]: %x prev: %x\n", path, set.accessList[path]) + if n.IsDeleted() { + fmt.Fprintf(out, " [-]: %x prev: %x\n", path, n.Prev) continue } // Insertion - origin, ok := set.accessList[path] - if !ok { - fmt.Fprintf(out, " [+]: %x -> %v\n", path, n.hash) + if len(n.Prev) == 0 { + fmt.Fprintf(out, " [+]: %x -> %v\n", path, n.Hash) continue } // Update - fmt.Fprintf(out, " [*]: %x -> %v prev: %x\n", path, n.hash, origin) + fmt.Fprintf(out, " [*]: %x -> %v prev: %x\n", path, n.Hash, n.Prev) } } for _, n := range set.leaves { diff --git a/trie/tracer.go b/trie/tracer.go index a27e371c7a..04b2f12bf6 100644 --- a/trie/tracer.go +++ b/trie/tracer.go @@ -16,7 +16,10 @@ package trie -import "github.com/ethereum/go-ethereum/common" +import ( + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/trie/trienode" +) // tracer tracks the changes of trie nodes. During the trie operations, // some nodes can be deleted from the trie, while these deleted nodes @@ -117,9 +120,10 @@ func (t *tracer) markDeletions(set *NodeSet) { // It's possible a few deleted nodes were embedded // in their parent before, the deletions can be no // effect by deleting nothing, filter them out. - if _, ok := set.accessList[path]; !ok { + prev, ok := t.accessList[path] + if !ok { continue } - set.markDeleted([]byte(path)) + set.addNode([]byte(path), trienode.NewWithPrev(common.Hash{}, nil, prev)) } } diff --git a/trie/trie.go b/trie/trie.go index 18504dc5bc..14685c3df8 100644 --- a/trie/trie.go +++ b/trie/trie.go @@ -574,7 +574,7 @@ func (t *Trie) Hash() common.Hash { func (t *Trie) Commit(collectLeaf bool) (common.Hash, *NodeSet) { defer t.tracer.reset() - nodes := NewNodeSet(t.owner, t.tracer.accessList) + nodes := NewNodeSet(t.owner) t.tracer.markDeletions(nodes) // Trie is empty and can be classified into two types of situations: @@ -595,7 +595,7 @@ func (t *Trie) Commit(collectLeaf bool) (common.Hash, *NodeSet) { t.root = hashedNode return rootHash, nil } - t.root = newCommitter(nodes, collectLeaf).Commit(t.root) + t.root = newCommitter(nodes, t.tracer, collectLeaf).Commit(t.root) return rootHash, nodes } diff --git a/trie/trie_test.go b/trie/trie_test.go index 82ead8b441..a03a68283d 100644 --- a/trie/trie_test.go +++ b/trie/trie_test.go @@ -408,39 +408,36 @@ func verifyAccessList(old *Trie, new *Trie, set *NodeSet) error { // Check insertion set for path := range inserts { n, ok := set.nodes[path] - if !ok || n.isDeleted() { + if !ok || n.IsDeleted() { return errors.New("expect new node") } - _, ok = set.accessList[path] - if ok { + if len(n.Prev) > 0 { return errors.New("unexpected origin value") } } // Check deletion set for path, blob := range deletes { n, ok := set.nodes[path] - if !ok || !n.isDeleted() { + if !ok || !n.IsDeleted() { return errors.New("expect deleted node") } - v, ok := set.accessList[path] - if !ok { + if len(n.Prev) == 0 { return errors.New("expect origin value") } - if !bytes.Equal(v, blob) { + if !bytes.Equal(n.Prev, blob) { return errors.New("invalid origin value") } } // Check update set for path, blob := range updates { n, ok := set.nodes[path] - if !ok || n.isDeleted() { + if !ok || n.IsDeleted() { return errors.New("expect updated node") } - v, ok := set.accessList[path] - if !ok { + if len(n.Prev) == 0 { return errors.New("expect origin value") } - if !bytes.Equal(v, blob) { + if !bytes.Equal(n.Prev, blob) { return errors.New("invalid origin value") } } diff --git a/trie/trienode/node.go b/trie/trienode/node.go new file mode 100644 index 0000000000..5ebfea8896 --- /dev/null +++ b/trie/trienode/node.go @@ -0,0 +1,67 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see + +package trienode + +import "github.com/ethereum/go-ethereum/common" + +// Node is a wrapper which contains the encoded blob of the trie node and its +// unique hash identifier. It is general enough that can be used to represent +// trie nodes corresponding to different trie implementations. +type Node struct { + Hash common.Hash // Node hash, empty for deleted node + Blob []byte // Encoded node blob, nil for the deleted node +} + +// Size returns the total memory size used by this node. +func (n *Node) Size() int { + return len(n.Blob) + common.HashLength +} + +// IsDeleted returns the indicator if the node is marked as deleted. +func (n *Node) IsDeleted() bool { + return n.Hash == (common.Hash{}) +} + +// WithPrev wraps the Node with the previous node value attached. +type WithPrev struct { + *Node + Prev []byte // Encoded original value, nil means it's non-existent +} + +// Unwrap returns the internal Node object. +func (n *WithPrev) Unwrap() *Node { + return n.Node +} + +// Size returns the total memory size used by this node. It overloads +// the function in Node by counting the size of previous value as well. +func (n *WithPrev) Size() int { + return n.Node.Size() + len(n.Prev) +} + +// New constructs a node with provided node information. +func New(hash common.Hash, blob []byte) *Node { + return &Node{Hash: hash, Blob: blob} +} + +// NewWithPrev constructs a node with provided node information. +func NewWithPrev(hash common.Hash, blob []byte, prev []byte) *WithPrev { + return &WithPrev{ + Node: New(hash, blob), + Prev: prev, + } +}