From 81678971dbd578751896c71f8724fb61f8f22ad7 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Mon, 9 Nov 2020 15:08:12 +0100 Subject: [PATCH] trie, tests/fuzzers: implement a stacktrie fuzzer + stacktrie fixes (#21799) * trie: fix error in stacktrie not committing small roots * fuzzers: make trie-fuzzer use correct returnvalues * trie: improved tests * tests/fuzzers: fuzzer for stacktrie vs regular trie * test/fuzzers: make stacktrie fuzzer use 32-byte keys * trie: fix error in stacktrie with small nodes * trie: add (skipped) testcase for stacktrie * tests/fuzzers: address review comments for stacktrie fuzzer * trie: fix docs in stacktrie --- tests/fuzzers/stacktrie/debug/main.go | 23 +++ tests/fuzzers/stacktrie/trie_fuzzer.go | 197 +++++++++++++++++++++++++ tests/fuzzers/trie/trie-fuzzer.go | 11 +- trie/stacktrie.go | 39 +++-- trie/stacktrie_test.go | 49 ++++++ trie/trie_test.go | 36 +++++ 6 files changed, 341 insertions(+), 14 deletions(-) create mode 100644 tests/fuzzers/stacktrie/debug/main.go create mode 100644 tests/fuzzers/stacktrie/trie_fuzzer.go diff --git a/tests/fuzzers/stacktrie/debug/main.go b/tests/fuzzers/stacktrie/debug/main.go new file mode 100644 index 0000000000..1ec28a8ef1 --- /dev/null +++ b/tests/fuzzers/stacktrie/debug/main.go @@ -0,0 +1,23 @@ +package main + +import ( + "fmt" + "io/ioutil" + "os" + + "github.com/ethereum/go-ethereum/tests/fuzzers/stacktrie" +) + +func main() { + if len(os.Args) != 2 { + fmt.Fprintf(os.Stderr, "Usage: debug ") + os.Exit(1) + } + crasher := os.Args[1] + data, err := ioutil.ReadFile(crasher) + if err != nil { + fmt.Fprintf(os.Stderr, "error loading crasher %v: %v", crasher, err) + os.Exit(1) + } + stacktrie.Debug(data) +} diff --git a/tests/fuzzers/stacktrie/trie_fuzzer.go b/tests/fuzzers/stacktrie/trie_fuzzer.go new file mode 100644 index 0000000000..a072ff772d --- /dev/null +++ b/tests/fuzzers/stacktrie/trie_fuzzer.go @@ -0,0 +1,197 @@ +// Copyright 2020 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package stacktrie + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "hash" + "io" + "sort" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/trie" + "golang.org/x/crypto/sha3" +) + +type fuzzer struct { + input io.Reader + exhausted bool + debugging bool +} + +func (f *fuzzer) read(size int) []byte { + out := make([]byte, size) + if _, err := f.input.Read(out); err != nil { + f.exhausted = true + } + return out +} + +func (f *fuzzer) readSlice(min, max int) []byte { + var a uint16 + binary.Read(f.input, binary.LittleEndian, &a) + size := min + int(a)%(max-min) + out := make([]byte, size) + if _, err := f.input.Read(out); err != nil { + f.exhausted = true + } + return out +} + +// spongeDb is a dummy db backend which accumulates writes in a sponge +type spongeDb struct { + sponge hash.Hash + debug bool +} + +func (s *spongeDb) Has(key []byte) (bool, error) { panic("implement me") } +func (s *spongeDb) Get(key []byte) ([]byte, error) { return nil, errors.New("no such elem") } +func (s *spongeDb) Delete(key []byte) error { panic("implement me") } +func (s *spongeDb) NewBatch() ethdb.Batch { return &spongeBatch{s} } +func (s *spongeDb) Stat(property string) (string, error) { panic("implement me") } +func (s *spongeDb) Compact(start []byte, limit []byte) error { panic("implement me") } +func (s *spongeDb) Close() error { return nil } + +func (s *spongeDb) Put(key []byte, value []byte) error { + if s.debug { + fmt.Printf("db.Put %x : %x\n", key, value) + } + s.sponge.Write(key) + s.sponge.Write(value) + return nil +} +func (s *spongeDb) NewIterator(prefix []byte, start []byte) ethdb.Iterator { panic("implement me") } + +// spongeBatch is a dummy batch which immediately writes to the underlying spongedb +type spongeBatch struct { + db *spongeDb +} + +func (b *spongeBatch) Put(key, value []byte) error { + b.db.Put(key, value) + return nil +} +func (b *spongeBatch) Delete(key []byte) error { panic("implement me") } +func (b *spongeBatch) ValueSize() int { return 100 } +func (b *spongeBatch) Write() error { return nil } +func (b *spongeBatch) Reset() {} +func (b *spongeBatch) Replay(w ethdb.KeyValueWriter) error { return nil } + +type kv struct { + k, v []byte +} +type kvs []kv + +func (k kvs) Len() int { + return len(k) +} + +func (k kvs) Less(i, j int) bool { + return bytes.Compare(k[i].k, k[j].k) < 0 +} + +func (k kvs) Swap(i, j int) { + k[j], k[i] = k[i], k[j] +} + +// The function must return +// 1 if the fuzzer should increase priority of the +// given input during subsequent fuzzing (for example, the input is lexically +// correct and was parsed successfully); +// -1 if the input must not be added to corpus even if gives new coverage; and +// 0 otherwise +// other values are reserved for future use. +func Fuzz(data []byte) int { + f := fuzzer{ + input: bytes.NewReader(data), + exhausted: false, + } + return f.fuzz() +} + +func Debug(data []byte) int { + f := fuzzer{ + input: bytes.NewReader(data), + exhausted: false, + debugging: true, + } + return f.fuzz() +} + +func (f *fuzzer) fuzz() int { + + // This spongeDb is used to check the sequence of disk-db-writes + var ( + spongeA = &spongeDb{sponge: sha3.NewLegacyKeccak256()} + dbA = trie.NewDatabase(spongeA) + trieA, _ = trie.New(common.Hash{}, dbA) + spongeB = &spongeDb{sponge: sha3.NewLegacyKeccak256()} + trieB = trie.NewStackTrie(spongeB) + vals kvs + useful bool + maxElements = 10000 + ) + // Fill the trie with elements + for i := 0; !f.exhausted && i < maxElements; i++ { + k := f.read(32) + v := f.readSlice(1, 500) + if f.exhausted { + // If it was exhausted while reading, the value may be all zeroes, + // thus 'deletion' which is not supported on stacktrie + break + } + vals = append(vals, kv{k: k, v: v}) + trieA.Update(k, v) + useful = true + } + if !useful { + return 0 + } + // Flush trie -> database + rootA, err := trieA.Commit(nil) + if err != nil { + panic(err) + } + // Flush memdb -> disk (sponge) + dbA.Commit(rootA, false, nil) + + // Stacktrie requires sorted insertion + sort.Sort(vals) + for _, kv := range vals { + if f.debugging { + fmt.Printf("{\"0x%x\" , \"0x%x\"} // stacktrie.Update\n", kv.k, kv.v) + } + trieB.Update(kv.k, kv.v) + } + rootB := trieB.Hash() + if _, err := trieB.Commit(); err != nil { + panic(err) + } + if rootA != rootB { + panic(fmt.Sprintf("roots differ: (trie) %x != %x (stacktrie)", rootA, rootB)) + } + sumA := spongeA.sponge.Sum(nil) + sumB := spongeB.sponge.Sum(nil) + if !bytes.Equal(sumA, sumB) { + panic(fmt.Sprintf("sequence differ: (trie) %x != %x (stacktrie)", sumA, sumB)) + } + return 1 +} diff --git a/tests/fuzzers/trie/trie-fuzzer.go b/tests/fuzzers/trie/trie-fuzzer.go index 9818838053..762ab5f347 100644 --- a/tests/fuzzers/trie/trie-fuzzer.go +++ b/tests/fuzzers/trie/trie-fuzzer.go @@ -122,15 +122,22 @@ func Generate(input []byte) randTest { return steps } +// The function must return +// 1 if the fuzzer should increase priority of the +// given input during subsequent fuzzing (for example, the input is lexically +// correct and was parsed successfully); +// -1 if the input must not be added to corpus even if gives new coverage; and +// 0 otherwise +// other values are reserved for future use. func Fuzz(input []byte) int { program := Generate(input) if len(program) == 0 { - return -1 + return 0 } if err := runRandTest(program); err != nil { panic(err) } - return 0 + return 1 } func runRandTest(rt randTest) error { diff --git a/trie/stacktrie.go b/trie/stacktrie.go index ffccbbf4ac..575a04022f 100644 --- a/trie/stacktrie.go +++ b/trie/stacktrie.go @@ -314,19 +314,22 @@ func (st *StackTrie) hash() { panic(err) } case extNode: + st.children[0].hash() h = newHasher(false) defer returnHasherToPool(h) h.tmp.Reset() - st.children[0].hash() - // This is also possible: - //sz := hexToCompactInPlace(st.key) - //n := [][]byte{ - // st.key[:sz], - // st.children[0].val, - //} - n := [][]byte{ - hexToCompact(st.key), - st.children[0].val, + var valuenode node + if len(st.children[0].val) < 32 { + valuenode = rawNode(st.children[0].val) + } else { + valuenode = hashNode(st.children[0].val) + } + n := struct { + Key []byte + Val node + }{ + Key: hexToCompact(st.key), + Val: valuenode, } if err := rlp.Encode(&h.tmp, n); err != nil { panic(err) @@ -406,6 +409,18 @@ func (st *StackTrie) Commit() (common.Hash, error) { return common.Hash{}, ErrCommitDisabled } st.hash() - h := common.BytesToHash(st.val) - return h, nil + if len(st.val) != 32 { + // If the node's RLP isn't 32 bytes long, the node will not + // be hashed (and committed), and instead contain the rlp-encoding of the + // node. For the top level node, we need to force the hashing+commit. + ret := make([]byte, 32) + h := newHasher(false) + defer returnHasherToPool(h) + h.sha.Reset() + h.sha.Write(st.val) + h.sha.Read(ret) + st.db.Put(ret, st.val) + return common.BytesToHash(ret), nil + } + return common.BytesToHash(st.val), nil } diff --git a/trie/stacktrie_test.go b/trie/stacktrie_test.go index 26e3bade27..d4488b4029 100644 --- a/trie/stacktrie_test.go +++ b/trie/stacktrie_test.go @@ -240,3 +240,52 @@ func TestDerivableList(t *testing.T) { } } } + +// TestUpdateSmallNodes tests a case where the leaves are small (both key and value), +// which causes a lot of node-within-node. This case was found via fuzzing. +func TestUpdateSmallNodes(t *testing.T) { + st := NewStackTrie(nil) + nt, _ := New(common.Hash{}, NewDatabase(memorydb.New())) + kvs := []struct { + K string + V string + }{ + {"63303030", "3041"}, // stacktrie.Update + {"65", "3000"}, // stacktrie.Update + } + for _, kv := range kvs { + nt.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V)) + st.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V)) + } + if nt.Hash() != st.Hash() { + t.Fatalf("error %x != %x", st.Hash(), nt.Hash()) + } +} + +// TestUpdateVariableKeys contains a case which stacktrie fails: when keys of different +// sizes are used, and the second one has the same prefix as the first, then the +// stacktrie fails, since it's unable to 'expand' on an already added leaf. +// For all practical purposes, this is fine, since keys are fixed-size length +// in account and storage tries. +// +// The test is marked as 'skipped', and exists just to have the behaviour documented. +// This case was found via fuzzing. +func TestUpdateVariableKeys(t *testing.T) { + t.SkipNow() + st := NewStackTrie(nil) + nt, _ := New(common.Hash{}, NewDatabase(memorydb.New())) + kvs := []struct { + K string + V string + }{ + {"0x33303534636532393561313031676174", "303030"}, + {"0x3330353463653239356131303167617430", "313131"}, + } + for _, kv := range kvs { + nt.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V)) + st.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V)) + } + if nt.Hash() != st.Hash() { + t.Fatalf("error %x != %x", st.Hash(), nt.Hash()) + } +} diff --git a/trie/trie_test.go b/trie/trie_test.go index 539451fbf4..682dec157c 100644 --- a/trie/trie_test.go +++ b/trie/trie_test.go @@ -853,6 +853,42 @@ func TestCommitSequenceStackTrie(t *testing.T) { } } +// TestCommitSequenceSmallRoot tests that a trie which is essentially only a +// small (<32 byte) shortnode with an included value is properly committed to a +// database. +// This case might not matter, since in practice, all keys are 32 bytes, which means +// that even a small trie which contains a leaf will have an extension making it +// not fit into 32 bytes, rlp-encoded. However, it's still the correct thing to do. +func TestCommitSequenceSmallRoot(t *testing.T) { + s := &spongeDb{sponge: sha3.NewLegacyKeccak256(), id: "a"} + db := NewDatabase(s) + trie, _ := New(common.Hash{}, db) + // Another sponge is used for the stacktrie commits + stackTrieSponge := &spongeDb{sponge: sha3.NewLegacyKeccak256(), id: "b"} + stTrie := NewStackTrie(stackTrieSponge) + // Add a single small-element to the trie(s) + key := make([]byte, 5) + key[0] = 1 + trie.TryUpdate(key, []byte{0x1}) + stTrie.TryUpdate(key, []byte{0x1}) + // Flush trie -> database + root, _ := trie.Commit(nil) + // Flush memdb -> disk (sponge) + db.Commit(root, false, nil) + // And flush stacktrie -> disk + stRoot, err := stTrie.Commit() + if err != nil { + t.Fatalf("Failed to commit stack trie %v", err) + } + if stRoot != root { + t.Fatalf("root wrong, got %x exp %x", stRoot, root) + } + fmt.Printf("root: %x\n", stRoot) + if got, exp := stackTrieSponge.sponge.Sum(nil), s.sponge.Sum(nil); !bytes.Equal(got, exp) { + t.Fatalf("test, disk write sequence wrong:\ngot %x exp %x\n", got, exp) + } +} + // BenchmarkCommitAfterHashFixedSize benchmarks the Commit (after Hash) of a fixed number of updates to a trie. // This benchmark is meant to capture the difference on efficiency of small versus large changes. Typically, // storage tries are small (a couple of entries), whereas the full post-block account trie update is large (a couple