From e5708353562405684ef7d34602635cc25231ad36 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Fri, 6 Dec 2019 23:27:18 +0100 Subject: [PATCH] core/state/snapshot: implement iterator priority for fast direct data lookup --- core/state/snapshot/iterator_fast.go | 117 +++++++++++------ core/state/snapshot/iterator_test.go | 180 +++++++++++++++++++++++++-- 2 files changed, 250 insertions(+), 47 deletions(-) diff --git a/core/state/snapshot/iterator_fast.go b/core/state/snapshot/iterator_fast.go index d3f315353..8df037e9f 100644 --- a/core/state/snapshot/iterator_fast.go +++ b/core/state/snapshot/iterator_fast.go @@ -24,20 +24,27 @@ import ( "github.com/ethereum/go-ethereum/common" ) +type weightedIterator struct { + it AccountIterator + priority int +} + // fastAccountIterator is a more optimized multi-layer iterator which maintains a // direct mapping of all iterators leading down to the bottom layer type fastAccountIterator struct { - iterators []AccountIterator + iterators []*weightedIterator initiated bool fail error } -// The fast iterator does not query parents as much. +// newFastAccountIterator creates a new fastAccountIterator func (dl *diffLayer) newFastAccountIterator() AccountIterator { f := &fastAccountIterator{ - iterators: dl.iterators(), initiated: false, } + for i, it := range dl.iterators() { + f.iterators = append(f.iterators, &weightedIterator{it, -i}) + } f.Seek(common.Hash{}) return f } @@ -49,9 +56,17 @@ func (fi *fastAccountIterator) Len() int { // Less implements sort.Interface func (fi *fastAccountIterator) Less(i, j int) bool { - a := fi.iterators[i].Key() - b := fi.iterators[j].Key() - return bytes.Compare(a[:], b[:]) < 0 + a := fi.iterators[i].it.Key() + b := fi.iterators[j].it.Key() + bDiff := bytes.Compare(a[:], b[:]) + if bDiff < 0 { + return true + } + if bDiff > 0 { + return false + } + // keys are equal, sort by iterator priority + return fi.iterators[i].priority < fi.iterators[j].priority } // Swap implements sort.Interface @@ -61,23 +76,42 @@ func (fi *fastAccountIterator) Swap(i, j int) { func (fi *fastAccountIterator) Seek(key common.Hash) { // We need to apply this across all iterators - var seen = make(map[common.Hash]struct{}) + var seen = make(map[common.Hash]int) length := len(fi.iterators) - for i, it := range fi.iterators { - it.Seek(key) + for i := 0; i < len(fi.iterators); i++ { + //for i, it := range fi.iterators { + it := fi.iterators[i] + it.it.Seek(key) for { - if !it.Next() { + if !it.it.Next() { // To be removed // swap it to the last position for now fi.iterators[i], fi.iterators[length-1] = fi.iterators[length-1], fi.iterators[i] length-- break } - v := it.Key() - if _, exist := seen[v]; !exist { - seen[v] = struct{}{} + v := it.it.Key() + if other, exist := seen[v]; !exist { + seen[v] = i break + } else { + // This whole else-block can be avoided, if we instead + // do an inital priority-sort of the iterators. If we do that, + // then we'll only wind up here if a lower-priority (preferred) iterator + // has the same value, and then we will always just continue. + // However, it costs an extra sort, so it's probably not better + + // One needs to be progressed, use priority to determine which + if fi.iterators[other].priority < it.priority { + // the 'it' should be progressed + continue + } else { + // the 'other' should be progressed - swap them + it = fi.iterators[other] + fi.iterators[other], fi.iterators[i] = fi.iterators[i], fi.iterators[other] + continue + } } } } @@ -110,7 +144,7 @@ func (fi *fastAccountIterator) Next() bool { // innerNext(3), which will call Next on elem 3 (the second '5'). It will continue // along the list and apply the same operation if needed func (fi *fastAccountIterator) innerNext(pos int) bool { - if !fi.iterators[pos].Next() { + if !fi.iterators[pos].it.Next() { //Exhausted, remove this iterator fi.remove(pos) if len(fi.iterators) == 0 { @@ -123,23 +157,23 @@ func (fi *fastAccountIterator) innerNext(pos int) bool { return true } // We next:ed the elem at 'pos'. Now we may have to re-sort that elem - val, neighbour := fi.iterators[pos].Key(), fi.iterators[pos+1].Key() - diff := bytes.Compare(val[:], neighbour[:]) - if diff < 0 { + var ( + current, neighbour = fi.iterators[pos], fi.iterators[pos+1] + val, neighbourVal = current.it.Key(), neighbour.it.Key() + ) + if diff := bytes.Compare(val[:], neighbourVal[:]); diff < 0 { // It is still in correct place return true - } - if diff == 0 { - // It has same value as the neighbour. So still in correct place, but - // we need to iterate on the neighbour + } else if diff == 0 && current.priority < neighbour.priority { + // So still in correct place, but we need to iterate on the neighbour fi.innerNext(pos + 1) return true } // At this point, the elem is in the wrong location, but the // remaining list is sorted. Find out where to move the elem - iterationNeeded := false + iteratee := -1 index := sort.Search(len(fi.iterators), func(n int) bool { - if n <= pos { + if n < pos { // No need to search 'behind' us return false } @@ -147,18 +181,29 @@ func (fi *fastAccountIterator) innerNext(pos int) bool { // Can always place an elem last return true } - neighbour := fi.iterators[n+1].Key() - diff := bytes.Compare(val[:], neighbour[:]) - if diff == 0 { - // The elem we're placing it next to has the same value, - // so it's going to need further iteration - iterationNeeded = true + neighbour := fi.iterators[n+1].it.Key() + if diff := bytes.Compare(val[:], neighbour[:]); diff < 0 { + return true + } else if diff > 0 { + return false + } + // The elem we're placing it next to has the same value, + // so whichever winds up on n+1 will need further iteraton + iteratee = n + 1 + if current.priority < fi.iterators[n+1].priority { + // We can drop the iterator here + return true } - return diff < 0 + // We need to move it one step further + return false + // TODO benchmark which is best, this works too: + //iteratee = n + //return true + // Doing so should finish the current search earlier }) fi.move(pos, index) - if iterationNeeded { - fi.innerNext(index) + if iteratee != -1 { + fi.innerNext(iteratee) } return true } @@ -171,7 +216,7 @@ func (fi *fastAccountIterator) move(index, newpos int) { var ( elem = fi.iterators[index] middle = fi.iterators[index+1 : newpos+1] - suffix []AccountIterator + suffix []*weightedIterator ) if newpos < len(fi.iterators)-1 { suffix = fi.iterators[newpos+1:] @@ -194,18 +239,18 @@ func (fi *fastAccountIterator) Error() error { // Key returns the current key func (fi *fastAccountIterator) Key() common.Hash { - return fi.iterators[0].Key() + return fi.iterators[0].it.Key() } // Value returns the current key func (fi *fastAccountIterator) Value() []byte { - panic("todo") + return fi.iterators[0].it.Value() } // Debug is a convencience helper during testing func (fi *fastAccountIterator) Debug() { for _, it := range fi.iterators { - fmt.Printf(" %v ", it.Key()[31]) + fmt.Printf("[p=%v v=%v] ", it.priority, it.it.Key()[0]) } fmt.Println() } diff --git a/core/state/snapshot/iterator_test.go b/core/state/snapshot/iterator_test.go index 597523189..01e525653 100644 --- a/core/state/snapshot/iterator_test.go +++ b/core/state/snapshot/iterator_test.go @@ -19,6 +19,7 @@ package snapshot import ( "bytes" "encoding/binary" + "fmt" "math/rand" "testing" @@ -95,9 +96,10 @@ func TestFastIteratorBasics(t *testing.T) { {9, 10}, {10, 13, 15, 16}}, expKeys: []byte{0, 1, 2, 7, 8, 9, 10, 13, 14, 15, 16}}, } { - var iterators []AccountIterator - for _, data := range tc.lists { - iterators = append(iterators, newTestIterator(data...)) + var iterators []*weightedIterator + for i, data := range tc.lists { + it := newTestIterator(data...) + iterators = append(iterators, &weightedIterator{it, i}) } fi := &fastAccountIterator{ @@ -162,6 +164,69 @@ func TestIteratorTraversal(t *testing.T) { verifyIterator(t, 7, child.newFastAccountIterator()) } +// TestIteratorTraversalValues tests some multi-layer iteration, where we +// also expect the correct values to show up +func TestIteratorTraversalValues(t *testing.T) { + var ( + storage = make(map[common.Hash]map[common.Hash][]byte) + a = make(map[common.Hash][]byte) + b = make(map[common.Hash][]byte) + c = make(map[common.Hash][]byte) + d = make(map[common.Hash][]byte) + e = make(map[common.Hash][]byte) + f = make(map[common.Hash][]byte) + g = make(map[common.Hash][]byte) + h = make(map[common.Hash][]byte) + ) + // entries in multiple layers should only become output once + for i := byte(2); i < 0xff; i++ { + a[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 0, i)) + if i > 20 && i%2 == 0 { + b[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 1, i)) + } + if i%4 == 0 { + c[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 2, i)) + } + if i%7 == 0 { + d[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 3, i)) + } + if i%8 == 0 { + e[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 4, i)) + } + if i > 50 || i < 85 { + f[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 5, i)) + } + if i%64 == 0 { + g[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 6, i)) + } + if i%128 == 0 { + h[common.Hash{i}] = []byte(fmt.Sprintf("layer-%d, key %d", 7, i)) + } + } + child := newDiffLayer(emptyLayer(), common.Hash{}, a, storage). + Update(common.Hash{}, b, storage). + Update(common.Hash{}, c, storage). + Update(common.Hash{}, d, storage). + Update(common.Hash{}, e, storage). + Update(common.Hash{}, f, storage). + Update(common.Hash{}, g, storage). + Update(common.Hash{}, h, storage) + + it := child.newFastAccountIterator() + for it.Next() { + key := it.Key() + exp, err := child.accountRLP(key, 0) + if err != nil { + t.Fatal(err) + } + got := it.Value() + if !bytes.Equal(exp, got) { + t.Fatalf("Error on key %x, got %v exp %v", key, string(got), string(exp)) + } + //fmt.Printf("val: %v\n", string(it.Value())) + } +} + func TestIteratorLargeTraversal(t *testing.T) { // This testcase is a bit notorious -- all layers contain the exact // same 200 accounts. @@ -195,8 +260,14 @@ func TestIteratorLargeTraversal(t *testing.T) { // same 200 accounts. That means that we need to process 2000 items, but only // spit out 200 values eventually. // -//BenchmarkIteratorTraversal/binary_iterator-6 2008 573290 ns/op 9520 B/op 199 allocs/op -//BenchmarkIteratorTraversal/fast_iterator-6 1946 575596 ns/op 20146 B/op 134 allocs/op +// The value-fetching benchmark is easy on the binary iterator, since it never has to reach +// down at any depth for retrieving the values -- all are on the toppmost layer +// +// BenchmarkIteratorTraversal/binary_iterator_keys-6 2239 483674 ns/op +// BenchmarkIteratorTraversal/binary_iterator_values-6 2403 501810 ns/op +// BenchmarkIteratorTraversal/fast_iterator_keys-6 1923 677966 ns/op +// BenchmarkIteratorTraversal/fast_iterator_values-6 1741 649967 ns/op +// func BenchmarkIteratorTraversal(b *testing.B) { var storage = make(map[common.Hash]map[common.Hash][]byte) @@ -224,7 +295,7 @@ func BenchmarkIteratorTraversal(b *testing.B) { // We call this once before the benchmark, so the creation of // sorted accountlists are not included in the results. child.newBinaryAccountIterator() - b.Run("binary iterator", func(b *testing.B) { + b.Run("binary iterator keys", func(b *testing.B) { for i := 0; i < b.N; i++ { got := 0 it := child.newBinaryAccountIterator() @@ -236,7 +307,20 @@ func BenchmarkIteratorTraversal(b *testing.B) { } } }) - b.Run("fast iterator", func(b *testing.B) { + b.Run("binary iterator values", func(b *testing.B) { + for i := 0; i < b.N; i++ { + got := 0 + it := child.newBinaryAccountIterator() + for it.Next() { + got++ + child.accountRLP(it.Key(), 0) + } + if exp := 200; got != exp { + b.Errorf("iterator len wrong, expected %d, got %d", exp, got) + } + } + }) + b.Run("fast iterator keys", func(b *testing.B) { for i := 0; i < b.N; i++ { got := 0 it := child.newFastAccountIterator() @@ -248,6 +332,19 @@ func BenchmarkIteratorTraversal(b *testing.B) { } } }) + b.Run("fast iterator values", func(b *testing.B) { + for i := 0; i < b.N; i++ { + got := 0 + it := child.newFastAccountIterator() + for it.Next() { + got++ + it.Value() + } + if exp := 200; got != exp { + b.Errorf("iterator len wrong, expected %d, got %d", exp, got) + } + } + }) } // BenchmarkIteratorLargeBaselayer is a pretty realistic benchmark, where @@ -256,8 +353,10 @@ func BenchmarkIteratorTraversal(b *testing.B) { // This is heavy on the binary iterator, which in most cases will have to // call recursively 100 times for the majority of the values // -// BenchmarkIteratorLargeBaselayer/binary_iterator-6 585 2067377 ns/op 9520 B/op 199 allocs/op -// BenchmarkIteratorLargeBaselayer/fast_iterator-6 13198 91043 ns/op 8601 B/op 118 allocs/op +// BenchmarkIteratorLargeBaselayer/binary_iterator_(keys)-6 514 1971999 ns/op +// BenchmarkIteratorLargeBaselayer/fast_iterator_(keys)-6 10000 114385 ns/op +// BenchmarkIteratorLargeBaselayer/binary_iterator_(values)-6 61 18997492 ns/op +// BenchmarkIteratorLargeBaselayer/fast_iterator_(values)-6 4047 296823 ns/op func BenchmarkIteratorLargeBaselayer(b *testing.B) { var storage = make(map[common.Hash]map[common.Hash][]byte) @@ -285,23 +384,51 @@ func BenchmarkIteratorLargeBaselayer(b *testing.B) { // We call this once before the benchmark, so the creation of // sorted accountlists are not included in the results. child.newBinaryAccountIterator() - b.Run("binary iterator", func(b *testing.B) { + b.Run("binary iterator (keys)", func(b *testing.B) { + for i := 0; i < b.N; i++ { + got := 0 + it := child.newBinaryAccountIterator() + for it.Next() { + got++ + } + if exp := 2000; got != exp { + b.Errorf("iterator len wrong, expected %d, got %d", exp, got) + } + } + }) + b.Run("fast iterator (keys)", func(b *testing.B) { + for i := 0; i < b.N; i++ { + got := 0 + it := child.newFastAccountIterator() + for it.Next() { + got++ + } + if exp := 2000; got != exp { + b.Errorf("iterator len wrong, expected %d, got %d", exp, got) + } + } + }) + b.Run("binary iterator (values)", func(b *testing.B) { for i := 0; i < b.N; i++ { got := 0 it := child.newBinaryAccountIterator() for it.Next() { got++ + v := it.Key() + child.accountRLP(v, -0) } if exp := 2000; got != exp { b.Errorf("iterator len wrong, expected %d, got %d", exp, got) } } }) - b.Run("fast iterator", func(b *testing.B) { + + b.Run("fast iterator (values)", func(b *testing.B) { for i := 0; i < b.N; i++ { got := 0 it := child.newFastAccountIterator() for it.Next() { + it.Value() got++ } if exp := 2000; got != exp { @@ -394,3 +521,34 @@ func TestIteratorSeek(t *testing.T) { it.Seek(common.HexToHash("0xff")) verifyIterator(t, 0, it) } + +//BenchmarkIteratorSeek/init+seek-6 4328 245477 ns/op +func BenchmarkIteratorSeek(b *testing.B) { + + var storage = make(map[common.Hash]map[common.Hash][]byte) + mkAccounts := func(num int) map[common.Hash][]byte { + accounts := make(map[common.Hash][]byte) + for i := 0; i < num; i++ { + h := common.Hash{} + binary.BigEndian.PutUint64(h[:], uint64(i+1)) + accounts[h] = randomAccount() + } + return accounts + } + layer := newDiffLayer(emptyLayer(), common.Hash{}, mkAccounts(200), storage) + for i := 1; i < 100; i++ { + layer = layer.Update(common.Hash{}, + mkAccounts(200), storage) + } + b.Run("init+seek", func(b *testing.B) { + b.ResetTimer() + seekpos := make([]byte, 20) + for i := 0; i < b.N; i++ { + b.StopTimer() + rand.Read(seekpos) + it := layer.newFastAccountIterator() + b.StartTimer() + it.Seek(common.BytesToHash(seekpos)) + } + }) +}