From e567675473606cb325c6f51c83b9c5cb0592c8d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= Date: Thu, 5 Dec 2019 15:37:25 +0200 Subject: [PATCH] core/state/snapshot: move iterator out into its own files --- core/state/snapshot/difflayer.go | 289 ------------------ core/state/snapshot/difflayer_test.go | 363 ----------------------- core/state/snapshot/iterator.go | 116 ++++++++ core/state/snapshot/iterator_binary.go | 115 +++++++ core/state/snapshot/iterator_fast.go | 211 +++++++++++++ core/state/snapshot/iterator_test.go | 396 +++++++++++++++++++++++++ 6 files changed, 838 insertions(+), 652 deletions(-) create mode 100644 core/state/snapshot/iterator.go create mode 100644 core/state/snapshot/iterator_binary.go create mode 100644 core/state/snapshot/iterator_fast.go create mode 100644 core/state/snapshot/iterator_test.go diff --git a/core/state/snapshot/difflayer.go b/core/state/snapshot/difflayer.go index 0d97fbdc8..05d55a6fa 100644 --- a/core/state/snapshot/difflayer.go +++ b/core/state/snapshot/difflayer.go @@ -18,7 +18,6 @@ package snapshot import ( "encoding/binary" - "bytes" "fmt" "math" "math/rand" @@ -476,291 +475,3 @@ func (dl *diffLayer) StorageList(accountHash common.Hash) []common.Hash { dl.storageList[accountHash] = accountStorageList return accountStorageList } - -type Iterator interface { - // Next steps the iterator forward one element, and returns false if - // the iterator is exhausted - Next() bool - // Key returns the current key - Key() common.Hash - // Seek steps the iterator forward as many elements as needed, so that after - // calling Next(), the iterator will be at a key higher than the given hash - Seek(common.Hash) -} - -func (dl *diffLayer) newIterator() Iterator { - dl.AccountList() - return &dlIterator{dl, -1} -} - -type dlIterator struct { - layer *diffLayer - index int -} - -func (it *dlIterator) Next() bool { - if it.index < len(it.layer.accountList) { - it.index++ - } - return it.index < len(it.layer.accountList) -} - -func (it *dlIterator) Key() common.Hash { - if it.index < len(it.layer.accountList) { - return it.layer.accountList[it.index] - } - return common.Hash{} -} - -func (it *dlIterator) Seek(key common.Hash) { - // Search uses binary search to find and return the smallest index i - // in [0, n) at which f(i) is true - size := len(it.layer.accountList) - index := sort.Search(size, - func(i int) bool { - v := it.layer.accountList[i] - return bytes.Compare(key[:], v[:]) < 0 - }) - it.index = index - 1 -} - -type binaryIterator struct { - a Iterator - b Iterator - aDone bool - bDone bool - k common.Hash -} - -func (dl *diffLayer) newBinaryIterator() Iterator { - parent, ok := dl.parent.(*diffLayer) - if !ok { - // parent is the disk layer - return dl.newIterator() - } - l := &binaryIterator{ - a: dl.newIterator(), - b: parent.newBinaryIterator()} - - l.aDone = !l.a.Next() - l.bDone = !l.b.Next() - return l -} - -func (it *binaryIterator) Next() bool { - - if it.aDone && it.bDone { - return false - } - nextB := it.b.Key() -first: - nextA := it.a.Key() - if it.aDone { - it.bDone = !it.b.Next() - it.k = nextB - return true - } - if it.bDone { - it.aDone = !it.a.Next() - it.k = nextA - return true - } - if diff := bytes.Compare(nextA[:], nextB[:]); diff < 0 { - it.aDone = !it.a.Next() - it.k = nextA - return true - } else if diff == 0 { - // Now we need to advance one of them - it.aDone = !it.a.Next() - goto first - } - it.bDone = !it.b.Next() - it.k = nextB - return true -} - -func (it *binaryIterator) Key() common.Hash { - return it.k -} -func (it *binaryIterator) Seek(key common.Hash) { - panic("todo: implement") -} - -func (dl *diffLayer) iterators() []Iterator { - if parent, ok := dl.parent.(*diffLayer); ok { - iterators := parent.iterators() - return append(iterators, dl.newIterator()) - } - return []Iterator{dl.newIterator()} -} - -// fastIterator is a more optimized multi-layer iterator which maintains a -// direct mapping of all iterators leading down to the bottom layer -type fastIterator struct { - iterators []Iterator - initiated bool -} - -// Len returns the number of active iterators -func (fi *fastIterator) Len() int { - return len(fi.iterators) -} - -// Less implements sort.Interface -func (fi *fastIterator) Less(i, j int) bool { - a := fi.iterators[i].Key() - b := fi.iterators[j].Key() - return bytes.Compare(a[:], b[:]) < 0 -} - -// Swap implements sort.Interface -func (fi *fastIterator) Swap(i, j int) { - fi.iterators[i], fi.iterators[j] = fi.iterators[j], fi.iterators[i] -} - -// Next implements the Iterator interface. It returns false if no more elemnts -// can be retrieved (false == exhausted) -func (fi *fastIterator) Next() bool { - if len(fi.iterators) == 0 { - return false - } - if !fi.initiated { - // Don't forward first time -- we had to 'Next' once in order to - // do the sorting already - fi.initiated = true - return true - } - return fi.innerNext(0) -} - -// innerNext handles the next operation internally, -// and should be invoked when we know that two elements in the list may have -// the same value. -// For example, if the list becomes [2,3,5,5,8,9,10], then we should invoke -// innerNext(3), which will call Next on elem 3 (the second '5'). It will continue -// along the list and apply the same operation if needed -func (fi *fastIterator) innerNext(pos int) bool { - if !fi.iterators[pos].Next() { - //Exhausted, remove this iterator - fi.remove(pos) - if len(fi.iterators) == 0 { - return false - } - return true - } - if pos == len(fi.iterators)-1 { - // Only one iterator left - return true - } - // We next:ed the elem at 'pos'. Now we may have to re-sort that elem - val, neighbour := fi.iterators[pos].Key(), fi.iterators[pos+1].Key() - diff := bytes.Compare(val[:], neighbour[:]) - if diff < 0 { - // It is still in correct place - return true - } - if diff == 0 { - // It has same value as the neighbour. So still in correct place, but - // we need to iterate on the neighbour - fi.innerNext(pos + 1) - return true - } - // At this point, the elem is in the wrong location, but the - // remaining list is sorted. Find out where to move the elem - iterationNeeded := false - index := sort.Search(len(fi.iterators), func(n int) bool { - if n <= pos { - // No need to search 'behind' us - return false - } - if n == len(fi.iterators)-1 { - // Can always place an elem last - return true - } - neighbour := fi.iterators[n+1].Key() - diff := bytes.Compare(val[:], neighbour[:]) - if diff == 0 { - // The elem we're placing it next to has the same value, - // so it's going to need further iteration - iterationNeeded = true - } - return diff < 0 - }) - fi.move(pos, index) - if iterationNeeded { - fi.innerNext(index) - } - return true -} - -// move moves an iterator to another position in the list -func (fi *fastIterator) move(index, newpos int) { - if newpos > len(fi.iterators)-1 { - newpos = len(fi.iterators) - 1 - } - var ( - elem = fi.iterators[index] - middle = fi.iterators[index+1 : newpos+1] - suffix []Iterator - ) - if newpos < len(fi.iterators)-1 { - suffix = fi.iterators[newpos+1:] - } - fi.iterators = append(fi.iterators[:index], middle...) - fi.iterators = append(fi.iterators, elem) - fi.iterators = append(fi.iterators, suffix...) -} - -// remove drops an iterator from the list -func (fi *fastIterator) remove(index int) { - fi.iterators = append(fi.iterators[:index], fi.iterators[index+1:]...) -} - -// Key returns the current key -func (fi *fastIterator) Key() common.Hash { - return fi.iterators[0].Key() -} - -func (fi *fastIterator) Seek(key common.Hash) { - // We need to apply this across all iterators - var seen = make(map[common.Hash]struct{}) - - length := len(fi.iterators) - for i, it := range fi.iterators { - it.Seek(key) - for { - if !it.Next() { - // To be removed - // swap it to the last position for now - fi.iterators[i], fi.iterators[length-1] = fi.iterators[length-1], fi.iterators[i] - length-- - break - } - v := it.Key() - if _, exist := seen[v]; !exist { - seen[v] = struct{}{} - break - } - } - } - // Now remove those that were placed in the end - fi.iterators = fi.iterators[:length] - // The list is now totally unsorted, need to re-sort the entire list - sort.Sort(fi) - fi.initiated = false -} - -// The fast iterator does not query parents as much. -func (dl *diffLayer) newFastIterator() Iterator { - f := &fastIterator{dl.iterators(), false} - f.Seek(common.Hash{}) - return f -} - -// Debug is a convencience helper during testing -func (fi *fastIterator) Debug() { - for _, it := range fi.iterators { - fmt.Printf(" %v ", it.Key()[31]) - } - fmt.Println() -} diff --git a/core/state/snapshot/difflayer_test.go b/core/state/snapshot/difflayer_test.go index 5f914f626..7d7b21eb0 100644 --- a/core/state/snapshot/difflayer_test.go +++ b/core/state/snapshot/difflayer_test.go @@ -18,7 +18,6 @@ package snapshot import ( "bytes" - "encoding/binary" "math/big" "math/rand" "testing" @@ -348,365 +347,3 @@ func BenchmarkJournal(b *testing.B) { layer.Journal(new(bytes.Buffer)) } } - -// TestIteratorBasics tests some simple single-layer iteration -func TestIteratorBasics(t *testing.T) { - var ( - accounts = make(map[common.Hash][]byte) - storage = make(map[common.Hash]map[common.Hash][]byte) - ) - // Fill up a parent - for i := 0; i < 100; i++ { - h := randomHash() - data := randomAccount() - accounts[h] = data - if rand.Intn(20) < 10 { - accStorage := make(map[common.Hash][]byte) - value := make([]byte, 32) - rand.Read(value) - accStorage[randomHash()] = value - storage[h] = accStorage - } - } - // Add some (identical) layers on top - parent := newDiffLayer(emptyLayer{}, common.Hash{}, accounts, storage) - it := parent.newIterator() - verifyIterator(t, 100, it) -} - -type testIterator struct { - values []byte -} - -func newTestIterator(values ...byte) *testIterator { - return &testIterator{values} -} -func (ti *testIterator) Next() bool { - ti.values = ti.values[1:] - if len(ti.values) == 0 { - return false - } - return true -} - -func (ti *testIterator) Key() common.Hash { - return common.BytesToHash([]byte{ti.values[0]}) -} - -func (ti *testIterator) Seek(common.Hash) { - panic("implement me") -} - -func TestFastIteratorBasics(t *testing.T) { - type testCase struct { - lists [][]byte - expKeys []byte - } - for i, tc := range []testCase{ - {lists: [][]byte{{0, 1, 8}, {1, 2, 8}, {2, 9}, {4}, - {7, 14, 15}, {9, 13, 15, 16}}, - expKeys: []byte{0, 1, 2, 4, 7, 8, 9, 13, 14, 15, 16}}, - {lists: [][]byte{{0, 8}, {1, 2, 8}, {7, 14, 15}, {8, 9}, - {9, 10}, {10, 13, 15, 16}}, - expKeys: []byte{0, 1, 2, 7, 8, 9, 10, 13, 14, 15, 16}}, - } { - var iterators []Iterator - for _, data := range tc.lists { - iterators = append(iterators, newTestIterator(data...)) - - } - fi := &fastIterator{ - iterators: iterators, - initiated: false, - } - count := 0 - for fi.Next() { - if got, exp := fi.Key()[31], tc.expKeys[count]; exp != got { - t.Errorf("tc %d, [%d]: got %d exp %d", i, count, got, exp) - } - count++ - } - } -} - -func verifyIterator(t *testing.T, expCount int, it Iterator) { - var ( - i = 0 - last = common.Hash{} - ) - for it.Next() { - v := it.Key() - if bytes.Compare(last[:], v[:]) >= 0 { - t.Errorf("Wrong order:\n%x \n>=\n%x", last, v) - } - i++ - } - if i != expCount { - t.Errorf("iterator len wrong, expected %d, got %d", expCount, i) - } -} - -// TestIteratorTraversal tests some simple multi-layer iteration -func TestIteratorTraversal(t *testing.T) { - var ( - storage = make(map[common.Hash]map[common.Hash][]byte) - ) - - mkAccounts := func(args ...string) map[common.Hash][]byte { - accounts := make(map[common.Hash][]byte) - for _, h := range args { - accounts[common.HexToHash(h)] = randomAccount() - } - return accounts - } - // entries in multiple layers should only become output once - parent := newDiffLayer(emptyLayer{}, common.Hash{}, - mkAccounts("0xaa", "0xee", "0xff", "0xf0"), storage) - - child := parent.Update(common.Hash{}, - mkAccounts("0xbb", "0xdd", "0xf0"), storage) - - child = child.Update(common.Hash{}, - mkAccounts("0xcc", "0xf0", "0xff"), storage) - - // single layer iterator - verifyIterator(t, 3, child.newIterator()) - // multi-layered binary iterator - verifyIterator(t, 7, child.newBinaryIterator()) - // multi-layered fast iterator - verifyIterator(t, 7, child.newFastIterator()) -} - -func TestIteratorLargeTraversal(t *testing.T) { - // This testcase is a bit notorious -- all layers contain the exact - // same 200 accounts. - var storage = make(map[common.Hash]map[common.Hash][]byte) - mkAccounts := func(num int) map[common.Hash][]byte { - accounts := make(map[common.Hash][]byte) - for i := 0; i < num; i++ { - h := common.Hash{} - binary.BigEndian.PutUint64(h[:], uint64(i+1)) - accounts[h] = randomAccount() - } - return accounts - } - parent := newDiffLayer(emptyLayer{}, common.Hash{}, - mkAccounts(200), storage) - child := parent.Update(common.Hash{}, - mkAccounts(200), storage) - for i := 2; i < 100; i++ { - child = child.Update(common.Hash{}, - mkAccounts(200), storage) - } - // single layer iterator - verifyIterator(t, 200, child.newIterator()) - // multi-layered binary iterator - verifyIterator(t, 200, child.newBinaryIterator()) - // multi-layered fast iterator - verifyIterator(t, 200, child.newFastIterator()) -} - -// BenchmarkIteratorTraversal is a bit a bit notorious -- all layers contain the exact -// same 200 accounts. That means that we need to process 2000 items, but only -// spit out 200 values eventually. -// -//BenchmarkIteratorTraversal/binary_iterator-6 2008 573290 ns/op 9520 B/op 199 allocs/op -//BenchmarkIteratorTraversal/fast_iterator-6 1946 575596 ns/op 20146 B/op 134 allocs/op -func BenchmarkIteratorTraversal(b *testing.B) { - - var storage = make(map[common.Hash]map[common.Hash][]byte) - - mkAccounts := func(num int) map[common.Hash][]byte { - accounts := make(map[common.Hash][]byte) - for i := 0; i < num; i++ { - h := common.Hash{} - binary.BigEndian.PutUint64(h[:], uint64(i+1)) - accounts[h] = randomAccount() - } - return accounts - } - parent := newDiffLayer(emptyLayer{}, common.Hash{}, - mkAccounts(200), storage) - - child := parent.Update(common.Hash{}, - mkAccounts(200), storage) - - for i := 2; i < 100; i++ { - child = child.Update(common.Hash{}, - mkAccounts(200), storage) - - } - // We call this once before the benchmark, so the creation of - // sorted accountlists are not included in the results. - child.newBinaryIterator() - b.Run("binary iterator", func(b *testing.B) { - for i := 0; i < b.N; i++ { - got := 0 - it := child.newBinaryIterator() - for it.Next() { - got++ - } - if exp := 200; got != exp { - b.Errorf("iterator len wrong, expected %d, got %d", exp, got) - } - } - }) - b.Run("fast iterator", func(b *testing.B) { - for i := 0; i < b.N; i++ { - got := 0 - it := child.newFastIterator() - for it.Next() { - got++ - } - if exp := 200; got != exp { - b.Errorf("iterator len wrong, expected %d, got %d", exp, got) - } - } - }) -} - -// BenchmarkIteratorLargeBaselayer is a pretty realistic benchmark, where -// the baselayer is a lot larger than the upper layer. -// -// This is heavy on the binary iterator, which in most cases will have to -// call recursively 100 times for the majority of the values -// -// BenchmarkIteratorLargeBaselayer/binary_iterator-6 585 2067377 ns/op 9520 B/op 199 allocs/op -// BenchmarkIteratorLargeBaselayer/fast_iterator-6 13198 91043 ns/op 8601 B/op 118 allocs/op -func BenchmarkIteratorLargeBaselayer(b *testing.B) { - var storage = make(map[common.Hash]map[common.Hash][]byte) - - mkAccounts := func(num int) map[common.Hash][]byte { - accounts := make(map[common.Hash][]byte) - for i := 0; i < num; i++ { - h := common.Hash{} - binary.BigEndian.PutUint64(h[:], uint64(i+1)) - accounts[h] = randomAccount() - } - return accounts - } - - parent := newDiffLayer(emptyLayer{}, common.Hash{}, - mkAccounts(2000), storage) - - child := parent.Update(common.Hash{}, - mkAccounts(20), storage) - - for i := 2; i < 100; i++ { - child = child.Update(common.Hash{}, - mkAccounts(20), storage) - - } - // We call this once before the benchmark, so the creation of - // sorted accountlists are not included in the results. - child.newBinaryIterator() - b.Run("binary iterator", func(b *testing.B) { - for i := 0; i < b.N; i++ { - got := 0 - it := child.newBinaryIterator() - for it.Next() { - got++ - } - if exp := 2000; got != exp { - b.Errorf("iterator len wrong, expected %d, got %d", exp, got) - } - } - }) - b.Run("fast iterator", func(b *testing.B) { - for i := 0; i < b.N; i++ { - got := 0 - it := child.newFastIterator() - for it.Next() { - got++ - } - if exp := 2000; got != exp { - b.Errorf("iterator len wrong, expected %d, got %d", exp, got) - } - } - }) -} - -// TestIteratorFlatting tests what happens when we -// - have a live iterator on child C (parent C1 -> C2 .. CN) -// - flattens C2 all the way into CN -// - continues iterating -// Right now, this "works" simply because the keys do not change -- the -// iterator is not aware that a layer has become stale. This naive -// solution probably won't work in the long run, however -func TestIteratorFlattning(t *testing.T) { - var ( - storage = make(map[common.Hash]map[common.Hash][]byte) - ) - mkAccounts := func(args ...string) map[common.Hash][]byte { - accounts := make(map[common.Hash][]byte) - for _, h := range args { - accounts[common.HexToHash(h)] = randomAccount() - } - return accounts - } - // entries in multiple layers should only become output once - parent := newDiffLayer(emptyLayer{}, common.Hash{}, - mkAccounts("0xaa", "0xee", "0xff", "0xf0"), storage) - - child := parent.Update(common.Hash{}, - mkAccounts("0xbb", "0xdd", "0xf0"), storage) - - child = child.Update(common.Hash{}, - mkAccounts("0xcc", "0xf0", "0xff"), storage) - - it := child.newFastIterator() - child.parent.(*diffLayer).flatten() - // The parent should now be stale - verifyIterator(t, 7, it) -} - -func TestIteratorSeek(t *testing.T) { - storage := make(map[common.Hash]map[common.Hash][]byte) - mkAccounts := func(args ...string) map[common.Hash][]byte { - accounts := make(map[common.Hash][]byte) - for _, h := range args { - accounts[common.HexToHash(h)] = randomAccount() - } - return accounts - } - parent := newDiffLayer(emptyLayer{}, common.Hash{}, - mkAccounts("0xaa", "0xee", "0xff", "0xf0"), storage) - it := parent.newIterator() - // expected: ee, f0, ff - it.Seek(common.HexToHash("0xdd")) - verifyIterator(t, 3, it) - - it = parent.newIterator().(*dlIterator) - // expected: ee, f0, ff - it.Seek(common.HexToHash("0xaa")) - verifyIterator(t, 3, it) - - it = parent.newIterator().(*dlIterator) - // expected: nothing - it.Seek(common.HexToHash("0xff")) - verifyIterator(t, 0, it) - - child := parent.Update(common.Hash{}, - mkAccounts("0xbb", "0xdd", "0xf0"), storage) - - child = child.Update(common.Hash{}, - mkAccounts("0xcc", "0xf0", "0xff"), storage) - - it = child.newFastIterator() - // expected: cc, dd, ee, f0, ff - it.Seek(common.HexToHash("0xbb")) - verifyIterator(t, 5, it) - - it = child.newFastIterator() - it.Seek(common.HexToHash("0xef")) - // exp: f0, ff - verifyIterator(t, 2, it) - - it = child.newFastIterator() - it.Seek(common.HexToHash("0xf0")) - verifyIterator(t, 1, it) - - it.Seek(common.HexToHash("0xff")) - verifyIterator(t, 0, it) - -} diff --git a/core/state/snapshot/iterator.go b/core/state/snapshot/iterator.go new file mode 100644 index 000000000..6df7b3147 --- /dev/null +++ b/core/state/snapshot/iterator.go @@ -0,0 +1,116 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snapshot + +import ( + "bytes" + "sort" + + "github.com/ethereum/go-ethereum/common" +) + +// AccountIterator is an iterator to step over all the accounts in a snapshot, +// which may or may npt be composed of multiple layers. +type AccountIterator interface { + // Seek steps the iterator forward as many elements as needed, so that after + // calling Next(), the iterator will be at a key higher than the given hash. + Seek(hash common.Hash) + + // Next steps the iterator forward one element, returning false if exhausted, + // or an error if iteration failed for some reason (e.g. root being iterated + // becomes stale and garbage collected). + Next() bool + + // Error returns any failure that occurred during iteration, which might have + // caused a premature iteration exit (e.g. snapshot stack becoming stale). + Error() error + + // Key returns the hash of the account the iterator is currently at. + Key() common.Hash + + // Value returns the RLP encoded slim account the iterator is currently at. + // An error will be returned if the iterator becomes invalid (e.g. snaph + Value() []byte +} + +// diffAccountIterator is an account iterator that steps over the accounts (both +// live and deleted) contained within a single +type diffAccountIterator struct { + layer *diffLayer + index int +} + +func (dl *diffLayer) newAccountIterator() *diffAccountIterator { + dl.AccountList() + return &diffAccountIterator{layer: dl, index: -1} +} + +// Seek steps the iterator forward as many elements as needed, so that after +// calling Next(), the iterator will be at a key higher than the given hash. +func (it *diffAccountIterator) Seek(key common.Hash) { + // Search uses binary search to find and return the smallest index i + // in [0, n) at which f(i) is true + index := sort.Search(len(it.layer.accountList), func(i int) bool { + return bytes.Compare(key[:], it.layer.accountList[i][:]) < 0 + }) + it.index = index - 1 +} + +// Next steps the iterator forward one element, returning false if exhausted. +func (it *diffAccountIterator) Next() bool { + if it.index < len(it.layer.accountList) { + it.index++ + } + return it.index < len(it.layer.accountList) +} + +// Error returns any failure that occurred during iteration, which might have +// caused a premature iteration exit (e.g. snapshot stack becoming stale). +// +// A diff layer is immutable after creation content wise and can always be fully +// iterated without error, so this method always returns nil. +func (it *diffAccountIterator) Error() error { + return nil +} + +// Key returns the hash of the account the iterator is currently at. +func (it *diffAccountIterator) Key() common.Hash { + if it.index < len(it.layer.accountList) { + return it.layer.accountList[it.index] + } + return common.Hash{} +} + +// Value returns the RLP encoded slim account the iterator is currently at. +func (it *diffAccountIterator) Value() []byte { + it.layer.lock.RLock() + defer it.layer.lock.RUnlock() + + hash := it.layer.accountList[it.index] + if data, ok := it.layer.accountData[hash]; ok { + return data + } + panic("iterator references non-existent layer account") +} + +func (dl *diffLayer) iterators() []AccountIterator { + if parent, ok := dl.parent.(*diffLayer); ok { + iterators := parent.iterators() + return append(iterators, dl.newAccountIterator()) + } + return []AccountIterator{dl.newAccountIterator()} +} diff --git a/core/state/snapshot/iterator_binary.go b/core/state/snapshot/iterator_binary.go new file mode 100644 index 000000000..7ff6e3337 --- /dev/null +++ b/core/state/snapshot/iterator_binary.go @@ -0,0 +1,115 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snapshot + +import ( + "bytes" + + "github.com/ethereum/go-ethereum/common" +) + +// binaryAccountIterator is a simplistic iterator to step over the accounts in +// a snapshot, which may or may npt be composed of multiple layers. Performance +// wise this iterator is slow, it's meant for cross validating the fast one, +type binaryAccountIterator struct { + a *diffAccountIterator + b AccountIterator + aDone bool + bDone bool + k common.Hash + fail error +} + +// newBinaryAccountIterator creates a simplistic account iterator to step over +// all the accounts in a slow, but eaily verifyable way. +func (dl *diffLayer) newBinaryAccountIterator() AccountIterator { + parent, ok := dl.parent.(*diffLayer) + if !ok { + // parent is the disk layer + return dl.newAccountIterator() + } + l := &binaryAccountIterator{ + a: dl.newAccountIterator(), + b: parent.newBinaryAccountIterator(), + } + l.aDone = !l.a.Next() + l.bDone = !l.b.Next() + return l +} + +// Seek steps the iterator forward as many elements as needed, so that after +// calling Next(), the iterator will be at a key higher than the given hash. +func (it *binaryAccountIterator) Seek(key common.Hash) { + panic("todo: implement") +} + +// Next steps the iterator forward one element, returning false if exhausted, +// or an error if iteration failed for some reason (e.g. root being iterated +// becomes stale and garbage collected). +func (it *binaryAccountIterator) Next() bool { + if it.aDone && it.bDone { + return false + } + nextB := it.b.Key() +first: + nextA := it.a.Key() + if it.aDone { + it.bDone = !it.b.Next() + it.k = nextB + return true + } + if it.bDone { + it.aDone = !it.a.Next() + it.k = nextA + return true + } + if diff := bytes.Compare(nextA[:], nextB[:]); diff < 0 { + it.aDone = !it.a.Next() + it.k = nextA + return true + } else if diff == 0 { + // Now we need to advance one of them + it.aDone = !it.a.Next() + goto first + } + it.bDone = !it.b.Next() + it.k = nextB + return true +} + +// Error returns any failure that occurred during iteration, which might have +// caused a premature iteration exit (e.g. snapshot stack becoming stale). +func (it *binaryAccountIterator) Error() error { + return it.fail +} + +// Key returns the hash of the account the iterator is currently at. +func (it *binaryAccountIterator) Key() common.Hash { + return it.k +} + +// Value returns the RLP encoded slim account the iterator is currently at, or +// nil if the iterated snapshot stack became stale (you can check Error after +// to see if it failed or not). +func (it *binaryAccountIterator) Value() []byte { + blob, err := it.a.layer.AccountRLP(it.k) + if err != nil { + it.fail = err + return nil + } + return blob +} diff --git a/core/state/snapshot/iterator_fast.go b/core/state/snapshot/iterator_fast.go new file mode 100644 index 000000000..d3f315353 --- /dev/null +++ b/core/state/snapshot/iterator_fast.go @@ -0,0 +1,211 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snapshot + +import ( + "bytes" + "fmt" + "sort" + + "github.com/ethereum/go-ethereum/common" +) + +// fastAccountIterator is a more optimized multi-layer iterator which maintains a +// direct mapping of all iterators leading down to the bottom layer +type fastAccountIterator struct { + iterators []AccountIterator + initiated bool + fail error +} + +// The fast iterator does not query parents as much. +func (dl *diffLayer) newFastAccountIterator() AccountIterator { + f := &fastAccountIterator{ + iterators: dl.iterators(), + initiated: false, + } + f.Seek(common.Hash{}) + return f +} + +// Len returns the number of active iterators +func (fi *fastAccountIterator) Len() int { + return len(fi.iterators) +} + +// Less implements sort.Interface +func (fi *fastAccountIterator) Less(i, j int) bool { + a := fi.iterators[i].Key() + b := fi.iterators[j].Key() + return bytes.Compare(a[:], b[:]) < 0 +} + +// Swap implements sort.Interface +func (fi *fastAccountIterator) Swap(i, j int) { + fi.iterators[i], fi.iterators[j] = fi.iterators[j], fi.iterators[i] +} + +func (fi *fastAccountIterator) Seek(key common.Hash) { + // We need to apply this across all iterators + var seen = make(map[common.Hash]struct{}) + + length := len(fi.iterators) + for i, it := range fi.iterators { + it.Seek(key) + for { + if !it.Next() { + // To be removed + // swap it to the last position for now + fi.iterators[i], fi.iterators[length-1] = fi.iterators[length-1], fi.iterators[i] + length-- + break + } + v := it.Key() + if _, exist := seen[v]; !exist { + seen[v] = struct{}{} + break + } + } + } + // Now remove those that were placed in the end + fi.iterators = fi.iterators[:length] + // The list is now totally unsorted, need to re-sort the entire list + sort.Sort(fi) + fi.initiated = false +} + +// Next implements the Iterator interface. It returns false if no more elemnts +// can be retrieved (false == exhausted) +func (fi *fastAccountIterator) Next() bool { + if len(fi.iterators) == 0 { + return false + } + if !fi.initiated { + // Don't forward first time -- we had to 'Next' once in order to + // do the sorting already + fi.initiated = true + return true + } + return fi.innerNext(0) +} + +// innerNext handles the next operation internally, +// and should be invoked when we know that two elements in the list may have +// the same value. +// For example, if the list becomes [2,3,5,5,8,9,10], then we should invoke +// innerNext(3), which will call Next on elem 3 (the second '5'). It will continue +// along the list and apply the same operation if needed +func (fi *fastAccountIterator) innerNext(pos int) bool { + if !fi.iterators[pos].Next() { + //Exhausted, remove this iterator + fi.remove(pos) + if len(fi.iterators) == 0 { + return false + } + return true + } + if pos == len(fi.iterators)-1 { + // Only one iterator left + return true + } + // We next:ed the elem at 'pos'. Now we may have to re-sort that elem + val, neighbour := fi.iterators[pos].Key(), fi.iterators[pos+1].Key() + diff := bytes.Compare(val[:], neighbour[:]) + if diff < 0 { + // It is still in correct place + return true + } + if diff == 0 { + // It has same value as the neighbour. So still in correct place, but + // we need to iterate on the neighbour + fi.innerNext(pos + 1) + return true + } + // At this point, the elem is in the wrong location, but the + // remaining list is sorted. Find out where to move the elem + iterationNeeded := false + index := sort.Search(len(fi.iterators), func(n int) bool { + if n <= pos { + // No need to search 'behind' us + return false + } + if n == len(fi.iterators)-1 { + // Can always place an elem last + return true + } + neighbour := fi.iterators[n+1].Key() + diff := bytes.Compare(val[:], neighbour[:]) + if diff == 0 { + // The elem we're placing it next to has the same value, + // so it's going to need further iteration + iterationNeeded = true + } + return diff < 0 + }) + fi.move(pos, index) + if iterationNeeded { + fi.innerNext(index) + } + return true +} + +// move moves an iterator to another position in the list +func (fi *fastAccountIterator) move(index, newpos int) { + if newpos > len(fi.iterators)-1 { + newpos = len(fi.iterators) - 1 + } + var ( + elem = fi.iterators[index] + middle = fi.iterators[index+1 : newpos+1] + suffix []AccountIterator + ) + if newpos < len(fi.iterators)-1 { + suffix = fi.iterators[newpos+1:] + } + fi.iterators = append(fi.iterators[:index], middle...) + fi.iterators = append(fi.iterators, elem) + fi.iterators = append(fi.iterators, suffix...) +} + +// remove drops an iterator from the list +func (fi *fastAccountIterator) remove(index int) { + fi.iterators = append(fi.iterators[:index], fi.iterators[index+1:]...) +} + +// Error returns any failure that occurred during iteration, which might have +// caused a premature iteration exit (e.g. snapshot stack becoming stale). +func (fi *fastAccountIterator) Error() error { + return fi.fail +} + +// Key returns the current key +func (fi *fastAccountIterator) Key() common.Hash { + return fi.iterators[0].Key() +} + +// Value returns the current key +func (fi *fastAccountIterator) Value() []byte { + panic("todo") +} + +// Debug is a convencience helper during testing +func (fi *fastAccountIterator) Debug() { + for _, it := range fi.iterators { + fmt.Printf(" %v ", it.Key()[31]) + } + fmt.Println() +} diff --git a/core/state/snapshot/iterator_test.go b/core/state/snapshot/iterator_test.go new file mode 100644 index 000000000..597523189 --- /dev/null +++ b/core/state/snapshot/iterator_test.go @@ -0,0 +1,396 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snapshot + +import ( + "bytes" + "encoding/binary" + "math/rand" + "testing" + + "github.com/ethereum/go-ethereum/common" +) + +// TestIteratorBasics tests some simple single-layer iteration +func TestIteratorBasics(t *testing.T) { + var ( + accounts = make(map[common.Hash][]byte) + storage = make(map[common.Hash]map[common.Hash][]byte) + ) + // Fill up a parent + for i := 0; i < 100; i++ { + h := randomHash() + data := randomAccount() + accounts[h] = data + if rand.Intn(20) < 10 { + accStorage := make(map[common.Hash][]byte) + value := make([]byte, 32) + rand.Read(value) + accStorage[randomHash()] = value + storage[h] = accStorage + } + } + // Add some (identical) layers on top + parent := newDiffLayer(emptyLayer(), common.Hash{}, accounts, storage) + it := parent.newAccountIterator() + verifyIterator(t, 100, it) +} + +type testIterator struct { + values []byte +} + +func newTestIterator(values ...byte) *testIterator { + return &testIterator{values} +} + +func (ti *testIterator) Seek(common.Hash) { + panic("implement me") +} + +func (ti *testIterator) Next() bool { + ti.values = ti.values[1:] + if len(ti.values) == 0 { + return false + } + return true +} + +func (ti *testIterator) Error() error { + panic("implement me") +} + +func (ti *testIterator) Key() common.Hash { + return common.BytesToHash([]byte{ti.values[0]}) +} + +func (ti *testIterator) Value() []byte { + panic("implement me") +} + +func TestFastIteratorBasics(t *testing.T) { + type testCase struct { + lists [][]byte + expKeys []byte + } + for i, tc := range []testCase{ + {lists: [][]byte{{0, 1, 8}, {1, 2, 8}, {2, 9}, {4}, + {7, 14, 15}, {9, 13, 15, 16}}, + expKeys: []byte{0, 1, 2, 4, 7, 8, 9, 13, 14, 15, 16}}, + {lists: [][]byte{{0, 8}, {1, 2, 8}, {7, 14, 15}, {8, 9}, + {9, 10}, {10, 13, 15, 16}}, + expKeys: []byte{0, 1, 2, 7, 8, 9, 10, 13, 14, 15, 16}}, + } { + var iterators []AccountIterator + for _, data := range tc.lists { + iterators = append(iterators, newTestIterator(data...)) + + } + fi := &fastAccountIterator{ + iterators: iterators, + initiated: false, + } + count := 0 + for fi.Next() { + if got, exp := fi.Key()[31], tc.expKeys[count]; exp != got { + t.Errorf("tc %d, [%d]: got %d exp %d", i, count, got, exp) + } + count++ + } + } +} + +func verifyIterator(t *testing.T, expCount int, it AccountIterator) { + var ( + i = 0 + last = common.Hash{} + ) + for it.Next() { + v := it.Key() + if bytes.Compare(last[:], v[:]) >= 0 { + t.Errorf("Wrong order:\n%x \n>=\n%x", last, v) + } + i++ + } + if i != expCount { + t.Errorf("iterator len wrong, expected %d, got %d", expCount, i) + } +} + +// TestIteratorTraversal tests some simple multi-layer iteration +func TestIteratorTraversal(t *testing.T) { + var ( + storage = make(map[common.Hash]map[common.Hash][]byte) + ) + + mkAccounts := func(args ...string) map[common.Hash][]byte { + accounts := make(map[common.Hash][]byte) + for _, h := range args { + accounts[common.HexToHash(h)] = randomAccount() + } + return accounts + } + // entries in multiple layers should only become output once + parent := newDiffLayer(emptyLayer(), common.Hash{}, + mkAccounts("0xaa", "0xee", "0xff", "0xf0"), storage) + + child := parent.Update(common.Hash{}, + mkAccounts("0xbb", "0xdd", "0xf0"), storage) + + child = child.Update(common.Hash{}, + mkAccounts("0xcc", "0xf0", "0xff"), storage) + + // single layer iterator + verifyIterator(t, 3, child.newAccountIterator()) + // multi-layered binary iterator + verifyIterator(t, 7, child.newBinaryAccountIterator()) + // multi-layered fast iterator + verifyIterator(t, 7, child.newFastAccountIterator()) +} + +func TestIteratorLargeTraversal(t *testing.T) { + // This testcase is a bit notorious -- all layers contain the exact + // same 200 accounts. + var storage = make(map[common.Hash]map[common.Hash][]byte) + mkAccounts := func(num int) map[common.Hash][]byte { + accounts := make(map[common.Hash][]byte) + for i := 0; i < num; i++ { + h := common.Hash{} + binary.BigEndian.PutUint64(h[:], uint64(i+1)) + accounts[h] = randomAccount() + } + return accounts + } + parent := newDiffLayer(emptyLayer(), common.Hash{}, + mkAccounts(200), storage) + child := parent.Update(common.Hash{}, + mkAccounts(200), storage) + for i := 2; i < 100; i++ { + child = child.Update(common.Hash{}, + mkAccounts(200), storage) + } + // single layer iterator + verifyIterator(t, 200, child.newAccountIterator()) + // multi-layered binary iterator + verifyIterator(t, 200, child.newBinaryAccountIterator()) + // multi-layered fast iterator + verifyIterator(t, 200, child.newFastAccountIterator()) +} + +// BenchmarkIteratorTraversal is a bit a bit notorious -- all layers contain the exact +// same 200 accounts. That means that we need to process 2000 items, but only +// spit out 200 values eventually. +// +//BenchmarkIteratorTraversal/binary_iterator-6 2008 573290 ns/op 9520 B/op 199 allocs/op +//BenchmarkIteratorTraversal/fast_iterator-6 1946 575596 ns/op 20146 B/op 134 allocs/op +func BenchmarkIteratorTraversal(b *testing.B) { + + var storage = make(map[common.Hash]map[common.Hash][]byte) + + mkAccounts := func(num int) map[common.Hash][]byte { + accounts := make(map[common.Hash][]byte) + for i := 0; i < num; i++ { + h := common.Hash{} + binary.BigEndian.PutUint64(h[:], uint64(i+1)) + accounts[h] = randomAccount() + } + return accounts + } + parent := newDiffLayer(emptyLayer(), common.Hash{}, + mkAccounts(200), storage) + + child := parent.Update(common.Hash{}, + mkAccounts(200), storage) + + for i := 2; i < 100; i++ { + child = child.Update(common.Hash{}, + mkAccounts(200), storage) + + } + // We call this once before the benchmark, so the creation of + // sorted accountlists are not included in the results. + child.newBinaryAccountIterator() + b.Run("binary iterator", func(b *testing.B) { + for i := 0; i < b.N; i++ { + got := 0 + it := child.newBinaryAccountIterator() + for it.Next() { + got++ + } + if exp := 200; got != exp { + b.Errorf("iterator len wrong, expected %d, got %d", exp, got) + } + } + }) + b.Run("fast iterator", func(b *testing.B) { + for i := 0; i < b.N; i++ { + got := 0 + it := child.newFastAccountIterator() + for it.Next() { + got++ + } + if exp := 200; got != exp { + b.Errorf("iterator len wrong, expected %d, got %d", exp, got) + } + } + }) +} + +// BenchmarkIteratorLargeBaselayer is a pretty realistic benchmark, where +// the baselayer is a lot larger than the upper layer. +// +// This is heavy on the binary iterator, which in most cases will have to +// call recursively 100 times for the majority of the values +// +// BenchmarkIteratorLargeBaselayer/binary_iterator-6 585 2067377 ns/op 9520 B/op 199 allocs/op +// BenchmarkIteratorLargeBaselayer/fast_iterator-6 13198 91043 ns/op 8601 B/op 118 allocs/op +func BenchmarkIteratorLargeBaselayer(b *testing.B) { + var storage = make(map[common.Hash]map[common.Hash][]byte) + + mkAccounts := func(num int) map[common.Hash][]byte { + accounts := make(map[common.Hash][]byte) + for i := 0; i < num; i++ { + h := common.Hash{} + binary.BigEndian.PutUint64(h[:], uint64(i+1)) + accounts[h] = randomAccount() + } + return accounts + } + + parent := newDiffLayer(emptyLayer(), common.Hash{}, + mkAccounts(2000), storage) + + child := parent.Update(common.Hash{}, + mkAccounts(20), storage) + + for i := 2; i < 100; i++ { + child = child.Update(common.Hash{}, + mkAccounts(20), storage) + + } + // We call this once before the benchmark, so the creation of + // sorted accountlists are not included in the results. + child.newBinaryAccountIterator() + b.Run("binary iterator", func(b *testing.B) { + for i := 0; i < b.N; i++ { + got := 0 + it := child.newBinaryAccountIterator() + for it.Next() { + got++ + } + if exp := 2000; got != exp { + b.Errorf("iterator len wrong, expected %d, got %d", exp, got) + } + } + }) + b.Run("fast iterator", func(b *testing.B) { + for i := 0; i < b.N; i++ { + got := 0 + it := child.newFastAccountIterator() + for it.Next() { + got++ + } + if exp := 2000; got != exp { + b.Errorf("iterator len wrong, expected %d, got %d", exp, got) + } + } + }) +} + +// TestIteratorFlatting tests what happens when we +// - have a live iterator on child C (parent C1 -> C2 .. CN) +// - flattens C2 all the way into CN +// - continues iterating +// Right now, this "works" simply because the keys do not change -- the +// iterator is not aware that a layer has become stale. This naive +// solution probably won't work in the long run, however +func TestIteratorFlattning(t *testing.T) { + var ( + storage = make(map[common.Hash]map[common.Hash][]byte) + ) + mkAccounts := func(args ...string) map[common.Hash][]byte { + accounts := make(map[common.Hash][]byte) + for _, h := range args { + accounts[common.HexToHash(h)] = randomAccount() + } + return accounts + } + // entries in multiple layers should only become output once + parent := newDiffLayer(emptyLayer(), common.Hash{}, + mkAccounts("0xaa", "0xee", "0xff", "0xf0"), storage) + + child := parent.Update(common.Hash{}, + mkAccounts("0xbb", "0xdd", "0xf0"), storage) + + child = child.Update(common.Hash{}, + mkAccounts("0xcc", "0xf0", "0xff"), storage) + + it := child.newFastAccountIterator() + child.parent.(*diffLayer).flatten() + // The parent should now be stale + verifyIterator(t, 7, it) +} + +func TestIteratorSeek(t *testing.T) { + storage := make(map[common.Hash]map[common.Hash][]byte) + mkAccounts := func(args ...string) map[common.Hash][]byte { + accounts := make(map[common.Hash][]byte) + for _, h := range args { + accounts[common.HexToHash(h)] = randomAccount() + } + return accounts + } + parent := newDiffLayer(emptyLayer(), common.Hash{}, + mkAccounts("0xaa", "0xee", "0xff", "0xf0"), storage) + it := AccountIterator(parent.newAccountIterator()) + // expected: ee, f0, ff + it.Seek(common.HexToHash("0xdd")) + verifyIterator(t, 3, it) + + it = parent.newAccountIterator() + // expected: ee, f0, ff + it.Seek(common.HexToHash("0xaa")) + verifyIterator(t, 3, it) + + it = parent.newAccountIterator() + // expected: nothing + it.Seek(common.HexToHash("0xff")) + verifyIterator(t, 0, it) + + child := parent.Update(common.Hash{}, + mkAccounts("0xbb", "0xdd", "0xf0"), storage) + + child = child.Update(common.Hash{}, + mkAccounts("0xcc", "0xf0", "0xff"), storage) + + it = child.newFastAccountIterator() + // expected: cc, dd, ee, f0, ff + it.Seek(common.HexToHash("0xbb")) + verifyIterator(t, 5, it) + + it = child.newFastAccountIterator() + it.Seek(common.HexToHash("0xef")) + // exp: f0, ff + verifyIterator(t, 2, it) + + it = child.newFastAccountIterator() + it.Seek(common.HexToHash("0xf0")) + verifyIterator(t, 1, it) + + it.Seek(common.HexToHash("0xff")) + verifyIterator(t, 0, it) +}