trie: polishes to trie committer (#21351)

* trie: update tests to check commit integrity

* trie: polish committer

* trie: fix typo

* trie: remove hasvalue notion

According to the benchmarks, type assertion between the pointer and
interface is extremely fast.

BenchmarkIntmethod-12           1000000000               1.91 ns/op
BenchmarkInterface-12           1000000000               2.13 ns/op
BenchmarkTypeSwitch-12          1000000000               1.81 ns/op
BenchmarkTypeAssertion-12       2000000000               1.78 ns/op

So the overhead for asserting whether the shortnode has "valuenode"
child is super tiny. No necessary to have another field.

* trie: linter nitpicks

Co-authored-by: Martin Holst Swende <martin@swende.se>
pull/21643/head
gary rong 4 years ago committed by GitHub
parent dad26582b6
commit 053ed9cc84
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 96
      trie/committer.go
  2. 4
      trie/hasher.go
  3. 5
      trie/trie.go
  4. 134
      trie/trie_test.go

@ -23,7 +23,6 @@ import (
"github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/rlp"
"golang.org/x/crypto/sha3" "golang.org/x/crypto/sha3"
) )
@ -36,7 +35,6 @@ type leaf struct {
size int // size of the rlp data (estimate) size int // size of the rlp data (estimate)
hash common.Hash // hash of rlp data hash common.Hash // hash of rlp data
node node // the node to commit node node // the node to commit
vnodes bool // set to true if the node (possibly) contains a valueNode
} }
// committer is a type used for the trie Commit operation. A committer has some // committer is a type used for the trie Commit operation. A committer has some
@ -74,18 +72,12 @@ func returnCommitterToPool(h *committer) {
committerPool.Put(h) committerPool.Put(h)
} }
// commitNeeded returns 'false' if the given node is already in sync with db
func (c *committer) commitNeeded(n node) bool {
hash, dirty := n.cache()
return hash == nil || dirty
}
// commit collapses a node down into a hash node and inserts it into the database // commit collapses a node down into a hash node and inserts it into the database
func (c *committer) Commit(n node, db *Database) (hashNode, error) { func (c *committer) Commit(n node, db *Database) (hashNode, error) {
if db == nil { if db == nil {
return nil, errors.New("no db provided") return nil, errors.New("no db provided")
} }
h, err := c.commit(n, db, true) h, err := c.commit(n, db)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -93,7 +85,7 @@ func (c *committer) Commit(n node, db *Database) (hashNode, error) {
} }
// commit collapses a node down into a hash node and inserts it into the database // commit collapses a node down into a hash node and inserts it into the database
func (c *committer) commit(n node, db *Database, force bool) (node, error) { func (c *committer) commit(n node, db *Database) (node, error) {
// if this path is clean, use available cached data // if this path is clean, use available cached data
hash, dirty := n.cache() hash, dirty := n.cache()
if hash != nil && !dirty { if hash != nil && !dirty {
@ -104,8 +96,11 @@ func (c *committer) commit(n node, db *Database, force bool) (node, error) {
case *shortNode: case *shortNode:
// Commit child // Commit child
collapsed := cn.copy() collapsed := cn.copy()
if _, ok := cn.Val.(valueNode); !ok {
childV, err := c.commit(cn.Val, db, false) // If the child is fullnode, recursively commit.
// Otherwise it can only be hashNode or valueNode.
if _, ok := cn.Val.(*fullNode); ok {
childV, err := c.commit(cn.Val, db)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -113,78 +108,78 @@ func (c *committer) commit(n node, db *Database, force bool) (node, error) {
} }
// The key needs to be copied, since we're delivering it to database // The key needs to be copied, since we're delivering it to database
collapsed.Key = hexToCompact(cn.Key) collapsed.Key = hexToCompact(cn.Key)
hashedNode := c.store(collapsed, db, force, true) hashedNode := c.store(collapsed, db)
if hn, ok := hashedNode.(hashNode); ok { if hn, ok := hashedNode.(hashNode); ok {
return hn, nil return hn, nil
} }
return collapsed, nil return collapsed, nil
case *fullNode: case *fullNode:
hashedKids, hasVnodes, err := c.commitChildren(cn, db, force) hashedKids, err := c.commitChildren(cn, db)
if err != nil { if err != nil {
return nil, err return nil, err
} }
collapsed := cn.copy() collapsed := cn.copy()
collapsed.Children = hashedKids collapsed.Children = hashedKids
hashedNode := c.store(collapsed, db, force, hasVnodes) hashedNode := c.store(collapsed, db)
if hn, ok := hashedNode.(hashNode); ok { if hn, ok := hashedNode.(hashNode); ok {
return hn, nil return hn, nil
} }
return collapsed, nil return collapsed, nil
case valueNode:
return c.store(cn, db, force, false), nil
// hashnodes aren't stored
case hashNode: case hashNode:
return cn, nil return cn, nil
default:
// nil, valuenode shouldn't be committed
panic(fmt.Sprintf("%T: invalid node: %v", n, n))
} }
return hash, nil
} }
// commitChildren commits the children of the given fullnode // commitChildren commits the children of the given fullnode
func (c *committer) commitChildren(n *fullNode, db *Database, force bool) ([17]node, bool, error) { func (c *committer) commitChildren(n *fullNode, db *Database) ([17]node, error) {
var children [17]node var children [17]node
var hasValueNodeChildren = false for i := 0; i < 16; i++ {
for i, child := range n.Children { child := n.Children[i]
if child == nil { if child == nil {
continue continue
} }
hnode, err := c.commit(child, db, false) // If it's the hashed child, save the hash value directly.
// Note: it's impossible that the child in range [0, 15]
// is a valuenode.
if hn, ok := child.(hashNode); ok {
children[i] = hn
continue
}
// Commit the child recursively and store the "hashed" value.
// Note the returned node can be some embedded nodes, so it's
// possible the type is not hashnode.
hashed, err := c.commit(child, db)
if err != nil { if err != nil {
return children, false, err return children, err
} }
children[i] = hnode children[i] = hashed
if _, ok := hnode.(valueNode); ok {
hasValueNodeChildren = true
} }
// For the 17th child, it's possible the type is valuenode.
if n.Children[16] != nil {
children[16] = n.Children[16]
} }
return children, hasValueNodeChildren, nil return children, nil
} }
// store hashes the node n and if we have a storage layer specified, it writes // store hashes the node n and if we have a storage layer specified, it writes
// the key/value pair to it and tracks any node->child references as well as any // the key/value pair to it and tracks any node->child references as well as any
// node->external trie references. // node->external trie references.
func (c *committer) store(n node, db *Database, force bool, hasVnodeChildren bool) node { func (c *committer) store(n node, db *Database) node {
// Larger nodes are replaced by their hash and stored in the database. // Larger nodes are replaced by their hash and stored in the database.
var ( var (
hash, _ = n.cache() hash, _ = n.cache()
size int size int
) )
if hash == nil { if hash == nil {
if vn, ok := n.(valueNode); ok { // This was not generated - must be a small node stored in the parent.
c.tmp.Reset() // In theory we should apply the leafCall here if it's not nil(embedded
if err := rlp.Encode(&c.tmp, vn); err != nil { // node usually contains value). But small value(less than 32bytes) is
panic("encode error: " + err.Error()) // not our target.
}
size = len(c.tmp)
if size < 32 && !force {
return n // Nodes smaller than 32 bytes are stored inside their parent
}
hash = c.makeHashNode(c.tmp)
} else {
// This was not generated - must be a small node stored in the parent
// No need to do anything here
return n return n
}
} else { } else {
// We have the hash already, estimate the RLP encoding-size of the node. // We have the hash already, estimate the RLP encoding-size of the node.
// The size is used for mem tracking, does not need to be exact // The size is used for mem tracking, does not need to be exact
@ -197,7 +192,6 @@ func (c *committer) store(n node, db *Database, force bool, hasVnodeChildren boo
size: size, size: size,
hash: common.BytesToHash(hash), hash: common.BytesToHash(hash),
node: n, node: n,
vnodes: hasVnodeChildren,
} }
} else if db != nil { } else if db != nil {
// No leaf-callback used, but there's still a database. Do serial // No leaf-callback used, but there's still a database. Do serial
@ -209,30 +203,30 @@ func (c *committer) store(n node, db *Database, force bool, hasVnodeChildren boo
return hash return hash
} }
// commitLoop does the actual insert + leaf callback for nodes // commitLoop does the actual insert + leaf callback for nodes.
func (c *committer) commitLoop(db *Database) { func (c *committer) commitLoop(db *Database) {
for item := range c.leafCh { for item := range c.leafCh {
var ( var (
hash = item.hash hash = item.hash
size = item.size size = item.size
n = item.node n = item.node
hasVnodes = item.vnodes
) )
// We are pooling the trie nodes into an intermediate memory cache // We are pooling the trie nodes into an intermediate memory cache
db.lock.Lock() db.lock.Lock()
db.insert(hash, size, n) db.insert(hash, size, n)
db.lock.Unlock() db.lock.Unlock()
if c.onleaf != nil && hasVnodes {
if c.onleaf != nil {
switch n := n.(type) { switch n := n.(type) {
case *shortNode: case *shortNode:
if child, ok := n.Val.(valueNode); ok { if child, ok := n.Val.(valueNode); ok {
c.onleaf(nil, child, hash) c.onleaf(nil, child, hash)
} }
case *fullNode: case *fullNode:
for i := 0; i < 16; i++ { // For children in range [0, 15], it's impossible
if child, ok := n.Children[i].(valueNode); ok { // to contain valuenode. Only check the 17th child.
c.onleaf(nil, child, hash) if n.Children[16] != nil {
} c.onleaf(nil, n.Children[16].(valueNode), hash)
} }
} }
} }

@ -66,11 +66,11 @@ func returnHasherToPool(h *hasher) {
// hash collapses a node down into a hash node, also returning a copy of the // hash collapses a node down into a hash node, also returning a copy of the
// original node initialized with the computed hash to replace the original one. // original node initialized with the computed hash to replace the original one.
func (h *hasher) hash(n node, force bool) (hashed node, cached node) { func (h *hasher) hash(n node, force bool) (hashed node, cached node) {
// We're not storing the node, just hashing, use available cached data // Return the cached hash if it's available
if hash, _ := n.cache(); hash != nil { if hash, _ := n.cache(); hash != nil {
return hash, n return hash, n
} }
// Trie not processed yet or needs storage, walk the children // Trie not processed yet, walk the children
switch n := n.(type) { switch n := n.(type) {
case *shortNode: case *shortNode:
collapsed, cached := h.hashShortNodeChildren(n) collapsed, cached := h.hashShortNodeChildren(n)

@ -505,13 +505,16 @@ func (t *Trie) Commit(onleaf LeafCallback) (root common.Hash, err error) {
if t.root == nil { if t.root == nil {
return emptyRoot, nil return emptyRoot, nil
} }
// Derive the hash for all dirty nodes first. We hold the assumption
// in the following procedure that all nodes are hashed.
rootHash := t.Hash() rootHash := t.Hash()
h := newCommitter() h := newCommitter()
defer returnCommitterToPool(h) defer returnCommitterToPool(h)
// Do a quick check if we really need to commit, before we spin // Do a quick check if we really need to commit, before we spin
// up goroutines. This can happen e.g. if we load a trie for reading storage // up goroutines. This can happen e.g. if we load a trie for reading storage
// values, but don't write to it. // values, but don't write to it.
if !h.commitNeeded(t.root) { if _, dirty := t.root.cache(); !dirty {
return rootHash, nil return rootHash, nil
} }
var wg sync.WaitGroup var wg sync.WaitGroup

@ -19,7 +19,9 @@ package trie
import ( import (
"bytes" "bytes"
"encoding/binary" "encoding/binary"
"errors"
"fmt" "fmt"
"hash"
"io/ioutil" "io/ioutil"
"math/big" "math/big"
"math/rand" "math/rand"
@ -31,9 +33,11 @@ import (
"github.com/davecgh/go-spew/spew" "github.com/davecgh/go-spew/spew"
"github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/ethdb/leveldb" "github.com/ethereum/go-ethereum/ethdb/leveldb"
"github.com/ethereum/go-ethereum/ethdb/memorydb" "github.com/ethereum/go-ethereum/ethdb/memorydb"
"github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/rlp"
"golang.org/x/crypto/sha3"
) )
func init() { func init() {
@ -659,6 +663,136 @@ func makeAccounts(size int) (addresses [][20]byte, accounts [][]byte) {
return addresses, accounts return addresses, accounts
} }
// spongeDb is a dummy db backend which accumulates writes in a sponge
type spongeDb struct {
sponge hash.Hash
}
func (s *spongeDb) Has(key []byte) (bool, error) { panic("implement me") }
func (s *spongeDb) Get(key []byte) ([]byte, error) { return nil, errors.New("no such elem") }
func (s *spongeDb) Delete(key []byte) error { panic("implement me") }
func (s *spongeDb) NewBatch() ethdb.Batch { return &spongeBatch{s} }
func (s *spongeDb) Stat(property string) (string, error) { panic("implement me") }
func (s *spongeDb) Compact(start []byte, limit []byte) error { panic("implement me") }
func (s *spongeDb) Close() error { return nil }
func (s *spongeDb) Put(key []byte, value []byte) error {
s.sponge.Write(key)
s.sponge.Write(value)
return nil
}
func (s *spongeDb) NewIterator(prefix []byte, start []byte) ethdb.Iterator { panic("implement me") }
// spongeBatch is a dummy batch which immediately writes to the underlying spongedb
type spongeBatch struct {
db *spongeDb
}
func (b *spongeBatch) Put(key, value []byte) error {
b.db.Put(key, value)
return nil
}
func (b *spongeBatch) Delete(key []byte) error { panic("implement me") }
func (b *spongeBatch) ValueSize() int { return 100 }
func (b *spongeBatch) Write() error { return nil }
func (b *spongeBatch) Reset() {}
func (b *spongeBatch) Replay(w ethdb.KeyValueWriter) error { return nil }
// TestCommitSequence tests that the trie.Commit operation writes the elements of the trie
// in the expected order, and calls the callbacks in the expected order.
// The test data was based on the 'master' code, and is basically random. It can be used
// to check whether changes to the trie modifies the write order or data in any way.
func TestCommitSequence(t *testing.T) {
for i, tc := range []struct {
count int
expWriteSeqHash []byte
expCallbackSeqHash []byte
}{
{20, common.FromHex("68c495e45209e243eb7e4f4e8ca8f9f7be71003bd9cafb8061b4534373740193"),
common.FromHex("01783213033d6b7781a641ab499e680d959336d025ac16f44d02f4f0c021bbf5")},
{200, common.FromHex("3b20d16c13c4bc3eb3b8d0ad7a169fef3b1600e056c0665895d03d3d2b2ff236"),
common.FromHex("fb8db0ec82e8f02729f11228940885b181c3047ab0d654ed0110291ca57111a8")},
{2000, common.FromHex("34eff3d1048bebdf77e9ae8bd939f2e7c742edc3dcd1173cff1aad9dbd20451a"),
common.FromHex("1c981604b1a9f8ffa40e0ae66b14830a87f5a4ed8345146a3912e6b2dcb05e63")},
} {
addresses, accounts := makeAccounts(tc.count)
// This spongeDb is used to check the sequence of disk-db-writes
s := &spongeDb{sponge: sha3.NewLegacyKeccak256()}
db := NewDatabase(s)
trie, _ := New(common.Hash{}, db)
// Another sponge is used to check the callback-sequence
callbackSponge := sha3.NewLegacyKeccak256()
// Fill the trie with elements
for i := 0; i < tc.count; i++ {
trie.Update(crypto.Keccak256(addresses[i][:]), accounts[i])
}
// Flush trie -> database
root, _ := trie.Commit(nil)
// Flush memdb -> disk (sponge)
db.Commit(root, false, func(c common.Hash) {
// And spongify the callback-order
callbackSponge.Write(c[:])
})
if got, exp := s.sponge.Sum(nil), tc.expWriteSeqHash; !bytes.Equal(got, exp) {
t.Fatalf("test %d, disk write sequence wrong:\ngot %x exp %x\n", i, got, exp)
}
if got, exp := callbackSponge.Sum(nil), tc.expCallbackSeqHash; !bytes.Equal(got, exp) {
t.Fatalf("test %d, call back sequence wrong:\ngot: %x exp %x\n", i, got, exp)
}
}
}
// TestCommitSequenceRandomBlobs is identical to TestCommitSequence
// but uses random blobs instead of 'accounts'
func TestCommitSequenceRandomBlobs(t *testing.T) {
for i, tc := range []struct {
count int
expWriteSeqHash []byte
expCallbackSeqHash []byte
}{
{20, common.FromHex("8e4a01548551d139fa9e833ebc4e66fc1ba40a4b9b7259d80db32cff7b64ebbc"),
common.FromHex("450238d73bc36dc6cc6f926987e5428535e64be403877c4560e238a52749ba24")},
{200, common.FromHex("6869b4e7b95f3097a19ddb30ff735f922b915314047e041614df06958fc50554"),
common.FromHex("0ace0b03d6cb8c0b82f6289ef5b1a1838306b455a62dafc63cada8e2924f2550")},
{2000, common.FromHex("444200e6f4e2df49f77752f629a96ccf7445d4698c164f962bbd85a0526ef424"),
common.FromHex("117d30dafaa62a1eed498c3dfd70982b377ba2b46dd3e725ed6120c80829e518")},
} {
prng := rand.New(rand.NewSource(int64(i)))
// This spongeDb is used to check the sequence of disk-db-writes
s := &spongeDb{sponge: sha3.NewLegacyKeccak256()}
db := NewDatabase(s)
trie, _ := New(common.Hash{}, db)
// Another sponge is used to check the callback-sequence
callbackSponge := sha3.NewLegacyKeccak256()
// Fill the trie with elements
for i := 0; i < tc.count; i++ {
key := make([]byte, 32)
var val []byte
// 50% short elements, 50% large elements
if prng.Intn(2) == 0 {
val = make([]byte, 1+prng.Intn(32))
} else {
val = make([]byte, 1+prng.Intn(4096))
}
prng.Read(key)
prng.Read(val)
trie.Update(key, val)
}
// Flush trie -> database
root, _ := trie.Commit(nil)
// Flush memdb -> disk (sponge)
db.Commit(root, false, func(c common.Hash) {
// And spongify the callback-order
callbackSponge.Write(c[:])
})
if got, exp := s.sponge.Sum(nil), tc.expWriteSeqHash; !bytes.Equal(got, exp) {
t.Fatalf("test %d, disk write sequence wrong:\ngot %x exp %x\n", i, got, exp)
}
if got, exp := callbackSponge.Sum(nil), tc.expCallbackSeqHash; !bytes.Equal(got, exp) {
t.Fatalf("test %d, call back sequence wrong:\ngot: %x exp %x\n", i, got, exp)
}
}
}
// BenchmarkCommitAfterHashFixedSize benchmarks the Commit (after Hash) of a fixed number of updates to a trie. // BenchmarkCommitAfterHashFixedSize benchmarks the Commit (after Hash) of a fixed number of updates to a trie.
// This benchmark is meant to capture the difference on efficiency of small versus large changes. Typically, // This benchmark is meant to capture the difference on efficiency of small versus large changes. Typically,
// storage tries are small (a couple of entries), whereas the full post-block account trie update is large (a couple // storage tries are small (a couple of entries), whereas the full post-block account trie update is large (a couple

Loading…
Cancel
Save