trie: reduce allocations in stacktrie (#30743)

This PR uses various tweaks and tricks to make the stacktrie near alloc-free. ``` [user@work go-ethereum]$ benchstat stacktrie.1 stacktrie.7 goos: linux goarch: amd64 pkg: github.com/ethereum/go-ethereum/trie cpu: 12th Gen Intel(R) Core(TM) i7-1270P │ stacktrie.1 │ stacktrie.7 │ │ sec/op │ sec/op vs base │ Insert100K-8 106.97m ± 8% 88.21m ± 34% -17.54% (p=0.000 n=10) │ stacktrie.1 │ stacktrie.7 │ │ B/op │ B/op vs base │ Insert100K-8 13199.608Ki ± 0% 3.424Ki ± 3% -99.97% (p=0.000 n=10) │ stacktrie.1 │ stacktrie.7 │ │ allocs/op │ allocs/op vs base │ Insert100K-8 553428.50 ± 0% 22.00 ± 5% -100.00% (p=0.000 n=10) ``` Also improves derivesha: ``` goos: linux goarch: amd64 pkg: github.com/ethereum/go-ethereum/core/types cpu: 12th Gen Intel(R) Core(TM) i7-1270P │ derivesha.1 │ derivesha.2 │ │ sec/op │ sec/op vs base │ DeriveSha200/stack_trie-8 477.8µ ± 2% 430.0µ ± 12% -10.00% (p=0.000 n=10) │ derivesha.1 │ derivesha.2 │ │ B/op │ B/op vs base │ DeriveSha200/stack_trie-8 45.17Ki ± 0% 25.65Ki ± 0% -43.21% (p=0.000 n=10) │ derivesha.1 │ derivesha.2 │ │ allocs/op │ allocs/op vs base │ DeriveSha200/stack_trie-8 1259.0 ± 0% 232.0 ± 0% -81.57% (p=0.000 n=10) ``` --------- Co-authored-by: Gary Rong <garyrong0905@gmail.com>
1 week ago · d3cc618951
parent a840e9b59f
commit d3cc618951
7 changed files with 238 additions and 39 deletions
--- a/trie/bytepool.go
+++ b/trie/bytepool.go
@ -0,0 +1,64 @@
 // Copyright 2024 The go-ethereum Authors
 // This file is part of the go-ethereum library.
 //
 // The go-ethereum library is free software: you can redistribute it and/or modify
 // it under the terms of the GNU Lesser General Public License as published by
 // the Free Software Foundation, either version 3 of the License, or
 // (at your option) any later version.
 //
 // The go-ethereum library is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 // GNU Lesser General Public License for more details.
 //
 // You should have received a copy of the GNU Lesser General Public License
 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
 package trie
 // bytesPool is a pool for byte slices. It is safe for concurrent use.
 type bytesPool struct {
 	c chan []byte
 	w int
 }
 // newBytesPool creates a new bytesPool. The sliceCap sets the capacity of
 // newly allocated slices, and the nitems determines how many items the pool
 // will hold, at maximum.
 func newBytesPool(sliceCap, nitems int) *bytesPool {
 	return &bytesPool{
 		c: make(chan []byte, nitems),
 		w: sliceCap,
 	}
 }
 // Get returns a slice. Safe for concurrent use.
 func (bp *bytesPool) Get() []byte {
 	select {
 	case b := <-bp.c:
 		return b
 	default:
 		return make([]byte, 0, bp.w)
 	}
 }
 // GetWithSize returns a slice with specified byte slice size.
 func (bp *bytesPool) GetWithSize(s int) []byte {
 	b := bp.Get()
 	if cap(b) < s {
 		return make([]byte, s)
 	}
 	return b[:s]
 }
 // Put returns a slice to the pool. Safe for concurrent use. This method
 // will ignore slices that are too small or too large (>3x the cap)
 func (bp *bytesPool) Put(b []byte) {
 	if c := cap(b); c < bp.w || c > 3*bp.w {
 		return
 	}
 	select {
 	case bp.c <- b:
 	default:
 	}
 }
--- a/trie/encoding.go
+++ b/trie/encoding.go
@ -104,6 +104,18 @@ func keybytesToHex(str []byte) []byte {
 	return nibbles
 }
 // writeHexKey writes the hexkey into the given slice.
 // OBS! This method omits the termination flag.
 // OBS! The dst slice must be at least 2x as large as the key
 func writeHexKey(dst []byte, key []byte) []byte {
 	_ = dst[2*len(key)-1]
 	for i, b := range key {
 		dst[i*2] = b / 16
 		dst[i*2+1] = b % 16
 	}
 	return dst[:2*len(key)]
 }
 // hexToKeybytes turns hex nibbles into key bytes.
 // This can only be used for keys of even length.
 func hexToKeybytes(hex []byte) []byte {
--- a/trie/hasher.go
+++ b/trie/hasher.go
@ -188,6 +188,14 @@ func (h *hasher) hashData(data []byte) hashNode {
 	return n
 }
 // hashDataTo hashes the provided data to the given destination buffer. The caller
 // must ensure that the dst buffer is of appropriate size.
 func (h *hasher) hashDataTo(dst, data []byte) {
 	h.sha.Reset()
 	h.sha.Write(data)
 	h.sha.Read(dst)
 }
 // proofHash is used to construct trie proofs, and returns the 'collapsed'
 // node (for later RLP encoding) as well as the hashed node -- unless the
 // node is smaller than 32 bytes, in which case it will be returned as is.
--- a/trie/node.go
+++ b/trie/node.go
@ -45,6 +45,27 @@ type (
 	}
 	hashNode  []byte
 	valueNode []byte
 	// fullnodeEncoder is a type used exclusively for encoding fullNode.
 	// Briefly instantiating a fullnodeEncoder and initializing with
 	// existing slices is less memory intense than using the fullNode type.
 	fullnodeEncoder struct {
 		Children [17][]byte
 	}
 	// extNodeEncoder is a type used exclusively for encoding extension node.
 	// Briefly instantiating a extNodeEncoder and initializing with existing
 	// slices is less memory intense than using the shortNode type.
 	extNodeEncoder struct {
 		Key []byte
 		Val []byte
 	}
 	// leafNodeEncoder is a type used exclusively for encoding leaf node.
 	leafNodeEncoder struct {
 		Key []byte
 		Val []byte
 	}
 )
 // nilValueNode is used when collapsing internal trie nodes for hashing, since
@ -89,6 +110,7 @@ func (n *fullNode) fstring(ind string) string {
 	}
 	return resp + fmt.Sprintf("\n%s] ", ind)
 }
 func (n *shortNode) fstring(ind string) string {
 	return fmt.Sprintf("{%x: %v} ", n.Key, n.Val.fstring(ind+"  "))
 }
@ -99,19 +121,6 @@ func (n valueNode) fstring(ind string) string {
 	return fmt.Sprintf("%x ", []byte(n))
 }
 // rawNode is a simple binary blob used to differentiate between collapsed trie
 // nodes and already encoded RLP binary blobs (while at the same time store them
 // in the same cache fields).
 type rawNode []byte
 func (n rawNode) cache() (hashNode, bool)   { panic("this should never end up in a live trie") }
 func (n rawNode) fstring(ind string) string { panic("this should never end up in a live trie") }
 func (n rawNode) EncodeRLP(w io.Writer) error {
 	_, err := w.Write(n)
 	return err
 }
 // mustDecodeNode is a wrapper of decodeNode and panic if any error is encountered.
 func mustDecodeNode(hash, buf []byte) node {
 	n, err := decodeNode(hash, buf)
--- a/trie/node_enc.go
+++ b/trie/node_enc.go
@ -40,6 +40,20 @@ func (n *fullNode) encode(w rlp.EncoderBuffer) {
 	w.ListEnd(offset)
 }
 func (n *fullnodeEncoder) encode(w rlp.EncoderBuffer) {
 	offset := w.List()
 	for _, c := range n.Children {
 		if c == nil {
 			w.Write(rlp.EmptyString)
 		} else if len(c) < 32 {
 			w.Write(c) // rawNode
 		} else {
 			w.WriteBytes(c) // hashNode
 		}
 	}
 	w.ListEnd(offset)
 }
 func (n *shortNode) encode(w rlp.EncoderBuffer) {
 	offset := w.List()
 	w.WriteBytes(n.Key)
@ -51,6 +65,27 @@ func (n *shortNode) encode(w rlp.EncoderBuffer) {
 	w.ListEnd(offset)
 }
 func (n *extNodeEncoder) encode(w rlp.EncoderBuffer) {
 	offset := w.List()
 	w.WriteBytes(n.Key)
 	if n.Val == nil {
 		w.Write(rlp.EmptyString)
 	} else if len(n.Val) < 32 {
 		w.Write(n.Val) // rawNode
 	} else {
 		w.WriteBytes(n.Val) // hashNode
 	}
 	w.ListEnd(offset)
 }
 func (n *leafNodeEncoder) encode(w rlp.EncoderBuffer) {
 	offset := w.List()
 	w.WriteBytes(n.Key) // Compact format key
 	w.WriteBytes(n.Val) // Value node, must be non-nil
 	w.ListEnd(offset)
 }
 func (n hashNode) encode(w rlp.EncoderBuffer) {
 	w.WriteBytes(n)
 }
@ -58,7 +93,3 @@ func (n hashNode) encode(w rlp.EncoderBuffer) {
 func (n valueNode) encode(w rlp.EncoderBuffer) {
 	w.WriteBytes(n)
 }
 func (n rawNode) encode(w rlp.EncoderBuffer) {
 	w.Write(n)
 }
--- a/trie/stacktrie.go
+++ b/trie/stacktrie.go
@ -27,6 +27,7 @@ import (
 var (
 	stPool = sync.Pool{New: func() any { return new(stNode) }}
 	bPool  = newBytesPool(32, 100)
 	_      = types.TrieHasher((*StackTrie)(nil))
 )
@ -47,6 +48,8 @@ type StackTrie struct {
 	h          *hasher
 	last       []byte
 	onTrieNode OnTrieNode
 	kBuf       []byte // buf space used for hex-key during insertions
 	pBuf       []byte // buf space used for path during insertions
 }
 // NewStackTrie allocates and initializes an empty trie. The committed nodes
@ -56,6 +59,17 @@ func NewStackTrie(onTrieNode OnTrieNode) *StackTrie {
 		root:       stPool.Get().(*stNode),
 		h:          newHasher(false),
 		onTrieNode: onTrieNode,
 		kBuf:       make([]byte, 64),
 		pBuf:       make([]byte, 64),
 	}
 }
 func (t *StackTrie) grow(key []byte) {
 	if cap(t.kBuf) < 2*len(key) {
 		t.kBuf = make([]byte, 2*len(key))
 	}
 	if cap(t.pBuf) < 2*len(key) {
 		t.pBuf = make([]byte, 2*len(key))
 	}
 }
@ -64,7 +78,8 @@ func (t *StackTrie) Update(key, value []byte) error {
 	if len(value) == 0 {
 		return errors.New("trying to insert empty (deletion)")
 	}
-	k := t.TrieKey(key)
+	t.grow(key)
 	k := writeHexKey(t.kBuf, key)
 	if bytes.Compare(t.last, k) >= 0 {
 		return errors.New("non-ascending key order")
 	}
@ -73,7 +88,7 @@ func (t *StackTrie) Update(key, value []byte) error {
 	} else {
 		t.last = append(t.last[:0], k...) // reuse key slice
 	}
-	t.insert(t.root, k, value, nil)
+	t.insert(t.root, k, value, t.pBuf[:0])
 	return nil
 }
@ -129,6 +144,12 @@ const (
 )
 func (n *stNode) reset() *stNode {
 	if n.typ == hashedNode {
 		// On hashnodes, we 'own' the val: it is guaranteed to be not held
 		// by external caller. Hence, when we arrive here, we can put it back
 		// into the pool
 		bPool.Put(n.val)
 	}
 	n.key = n.key[:0]
 	n.val = nil
 	for i := range n.children {
@ -150,8 +171,12 @@ func (n *stNode) getDiffIndex(key []byte) int {
 	return len(n.key)
 }
-// Helper function to that inserts a (key, value) pair into
+// Helper function to that inserts a (key, value) pair into the trie.
-// the trie.
+//
 //   - The key is not retained by this method, but always copied if needed.
 //   - The value is retained by this method, as long as the leaf that it represents
 //     remains unhashed. However: it is never modified.
 //   - The path is not retained by this method.
 func (t *StackTrie) insert(st *stNode, key, value []byte, path []byte) {
 	switch st.typ {
 	case branchNode: /* Branch */
@ -283,7 +308,7 @@ func (t *StackTrie) insert(st *stNode, key, value []byte, path []byte) {
 	case emptyNode: /* Empty */
 		st.typ = leafNode
-		st.key = key
+		st.key = append(st.key, key...) // deep-copy the key as it's volatile
 		st.val = value
 	case hashedNode:
@ -318,35 +343,33 @@ func (t *StackTrie) hash(st *stNode, path []byte) {
 		return
 	case branchNode:
-		var nodes fullNode
+		var nodes fullnodeEncoder
 		for i, child := range st.children {
 			if child == nil {
 				nodes.Children[i] = nilValueNode
 				continue
 			}
 			t.hash(child, append(path, byte(i)))
 			nodes.Children[i] = child.val
 		}
 		nodes.encode(t.h.encbuf)
 		blob = t.h.encodedBytes()
-			if len(child.val) < 32 {
+		for i, child := range st.children {
-				nodes.Children[i] = rawNode(child.val)
+			if child == nil {
-			} else {
+				continue
 				nodes.Children[i] = hashNode(child.val)
 			}
 			st.children[i] = nil
 			stPool.Put(child.reset()) // Release child back to pool.
 		}
 		nodes.encode(t.h.encbuf)
 		blob = t.h.encodedBytes()
 	case extNode:
 		// recursively hash and commit child as the first step
 		t.hash(st.children[0], append(path, st.key...))
 		// encode the extension node
-		n := shortNode{Key: hexToCompactInPlace(st.key)}
+		n := extNodeEncoder{
-		if len(st.children[0].val) < 32 {
+			Key: hexToCompactInPlace(st.key),
-			n.Val = rawNode(st.children[0].val)
+			Val: st.children[0].val,
 		} else {
 			n.Val = hashNode(st.children[0].val)
 		}
 		n.encode(t.h.encbuf)
 		blob = t.h.encodedBytes()
@ -356,8 +379,10 @@ func (t *StackTrie) hash(st *stNode, path []byte) {
 	case leafNode:
 		st.key = append(st.key, byte(16))
-		n := shortNode{Key: hexToCompactInPlace(st.key), Val: valueNode(st.val)}
+		n := leafNodeEncoder{
-
+			Key: hexToCompactInPlace(st.key),
 			Val: st.val,
 		}
 		n.encode(t.h.encbuf)
 		blob = t.h.encodedBytes()
@ -368,15 +393,19 @@ func (t *StackTrie) hash(st *stNode, path []byte) {
 	st.typ = hashedNode
 	st.key = st.key[:0]
 	st.val = nil // Release reference to potentially externally held slice.
 	// Skip committing the non-root node if the size is smaller than 32 bytes
 	// as tiny nodes are always embedded in their parent except root node.
 	if len(blob) < 32 && len(path) > 0 {
-		st.val = common.CopyBytes(blob)
+		st.val = bPool.GetWithSize(len(blob))
 		copy(st.val, blob)
 		return
 	}
 	// Write the hash to the 'val'. We allocate a new val here to not mutate
 	// input values.
-	st.val = t.h.hashData(blob)
+	st.val = bPool.GetWithSize(32)
 	t.h.hashDataTo(st.val, blob)
 	// Invoke the callback it's provided. Notably, the path and blob slices are
 	// volatile, please deep-copy the slices in callback if the contents need
--- a/trie/stacktrie_test.go
+++ b/trie/stacktrie_test.go
@ -18,6 +18,7 @@ package trie
 import (
 	"bytes"
 	"encoding/binary"
 	"math/big"
 	"testing"
@ -398,3 +399,48 @@ func TestStackTrieErrors(t *testing.T) {
 	assert.NotNil(t, s.Update([]byte{0x10}, []byte{0xb}), "out of order insert")
 	assert.NotNil(t, s.Update([]byte{0xaa}, []byte{0xb}), "repeat insert same key")
 }
 func BenchmarkInsert100K(b *testing.B) {
 	var num = 100_000
 	var key = make([]byte, 8)
 	var val = make([]byte, 20)
 	var hash common.Hash
 	b.ReportAllocs()
 	for i := 0; i < b.N; i++ {
 		s := NewStackTrie(nil)
 		var k uint64
 		for j := 0; j < num; j++ {
 			binary.BigEndian.PutUint64(key, k)
 			if err := s.Update(key, val); err != nil {
 				b.Fatal(err)
 			}
 			k += 1024
 		}
 		if hash == (common.Hash{}) {
 			hash = s.Hash()
 		} else {
 			if hash != s.Hash() && false {
 				b.Fatalf("hash wrong, have %x want %x", s.Hash(), hash)
 			}
 		}
 	}
 }
 func TestInsert100K(t *testing.T) {
 	var num = 100_000
 	var key = make([]byte, 8)
 	var val = make([]byte, 20)
 	s := NewStackTrie(nil)
 	var k uint64
 	for j := 0; j < num; j++ {
 		binary.BigEndian.PutUint64(key, k)
 		if err := s.Update(key, val); err != nil {
 			t.Fatal(err)
 		}
 		k += 1024
 	}
 	want := common.HexToHash("0xb0071bd257342925d9d8a9f002b9d2b646a35437aa8b089628ab56e428d29a1a")
 	if have := s.Hash(); have != want {
 		t.Fatalf("hash wrong, have %x want %x", have, want)
 	}
 }