From bd74882d83509ba4a8477dd21b86d46bc7d12eb4 Mon Sep 17 00:00:00 2001 From: Zsolt Felfoldi Date: Sun, 5 Mar 2017 16:52:03 +0100 Subject: [PATCH] core: implement ChainIndexer --- core/chain_indexer.go | 294 +++++++++++++++++++++++++++++++++++++ core/chain_indexer_test.go | 235 +++++++++++++++++++++++++++++ 2 files changed, 529 insertions(+) create mode 100644 core/chain_indexer.go create mode 100644 core/chain_indexer_test.go diff --git a/core/chain_indexer.go b/core/chain_indexer.go new file mode 100644 index 0000000000..f1ead526de --- /dev/null +++ b/core/chain_indexer.go @@ -0,0 +1,294 @@ +// Copyright 2017 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +// Package core implements the Ethereum consensus protocol. +package core + +import ( + "encoding/binary" + "sync" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/event" +) + +// ChainIndexer does a post-processing job for equally sized sections of the canonical +// chain (like BlooomBits and CHT structures). A ChainIndexer is connected to the blockchain +// through the event system by starting a ChainEventLoop in a goroutine. +// Further child ChainIndexers can be added which use the output of the parent section +// indexer. These child indexers receive new head notifications only after an entire section +// has been finished or in case of rollbacks that might affect already finished sections. +type ChainIndexer struct { + chainDb, indexDb ethdb.Database + backend ChainIndexerBackend + sectionSize, confirmReq uint64 + stop chan struct{} + lock sync.Mutex + procWait time.Duration + tryUpdate chan struct{} + stored, targetCount, calcIdx, lastForwarded uint64 + updating bool + children []*ChainIndexer +} + +// ChainIndexerBackend interface is a backend for the indexer doing the actual post-processing job +type ChainIndexerBackend interface { + Reset(section uint64) // start processing a new section + Process(header *types.Header) // process a single block (called for each block in the section) + Commit(db ethdb.Database) error // do some more processing if necessary and store the results in the database + UpdateMsg(done, all uint64) // print a progress update message if necessary (only called when multiple sections need to be processed) +} + +// NewChainIndexer creates a new ChainIndexer +// db: database where the index of available processed sections is stored (the index is stored by the +// indexer, the actual processed chain data is stored by the backend) +// dbKey: key prefix where the index is stored +// backend: an implementation of ChainIndexerBackend +// sectionSize: the size of processable sections +// confirmReq: required number of confirmation blocks before a new section is being processed +// procWait: waiting time between processing sections (simple way of limiting the resource usage of a db upgrade) +// stop: quit channel +func NewChainIndexer(chainDb, indexDb ethdb.Database, backend ChainIndexerBackend, sectionSize, confirmReq uint64, procWait time.Duration, stop chan struct{}) *ChainIndexer { + c := &ChainIndexer{ + chainDb: chainDb, + indexDb: indexDb, + backend: backend, + sectionSize: sectionSize, + confirmReq: confirmReq, + tryUpdate: make(chan struct{}, 1), + stop: stop, + procWait: procWait, + } + c.stored = c.getValidSections() + go c.updateLoop() + return c +} + +// updateLoop is the main event loop of the indexer +func (c *ChainIndexer) updateLoop() { + updateMsg := false + + for { + select { + case <-c.stop: + return + case <-c.tryUpdate: + c.lock.Lock() + if c.targetCount > c.stored { + if !updateMsg && c.targetCount > c.stored+1 { + updateMsg = true + c.backend.UpdateMsg(c.stored, c.targetCount) + } + c.calcIdx = c.stored + + var lastSectionHead common.Hash + if c.calcIdx > 0 { + lastSectionHead = c.getSectionHead(c.calcIdx - 1) + } + + c.lock.Unlock() + sectionHead, ok := c.processSection(c.calcIdx, lastSectionHead) + c.lock.Lock() + + if ok && lastSectionHead == c.getSectionHead(c.calcIdx-1) { + c.stored = c.calcIdx + 1 + c.setSectionHead(c.calcIdx, sectionHead) + c.setValidSections(c.stored) + if updateMsg { + c.backend.UpdateMsg(c.stored, c.targetCount) + if c.stored >= c.targetCount { + updateMsg = false + } + } + c.lastForwarded = c.stored*c.sectionSize - 1 + for _, cp := range c.children { + cp.newHead(c.lastForwarded, false) + } + } else { + // if processing has failed, do not retry until further notification + c.targetCount = c.stored + } + } + + if c.targetCount > c.stored { + go func() { + time.Sleep(c.procWait) + c.tryUpdate <- struct{}{} + }() + } else { + c.updating = false + } + c.lock.Unlock() + } + } +} + +// ChainEventLoop runs in a goroutine and feeds blockchain events to the indexer by calling newHead +// (not needed for child indexers where the parent calls newHead) +func (c *ChainIndexer) ChainEventLoop(currentHeader *types.Header, eventMux *event.TypeMux) { + sub := eventMux.Subscribe(ChainEvent{}) + c.newHead(currentHeader.Number.Uint64(), false) + lastHead := currentHeader.Hash() + for { + select { + case <-c.stop: + return + case ev := <-sub.Chan(): + header := ev.Data.(ChainEvent).Block.Header() + c.newHead(header.Number.Uint64(), header.ParentHash != lastHead) + lastHead = header.Hash() + } + } +} + +// AddChildIndexer adds a child ChainIndexer that can use the output of this one +func (c *ChainIndexer) AddChildIndexer(ci *ChainIndexer) { + c.children = append(c.children, ci) +} + +// newHead notifies the indexer about new chain heads or rollbacks +func (c *ChainIndexer) newHead(headNum uint64, rollback bool) { + c.lock.Lock() + defer c.lock.Unlock() + + if rollback { + firstChanged := headNum / c.sectionSize + if firstChanged < c.targetCount { + c.targetCount = firstChanged + } + if firstChanged < c.stored { + c.stored = firstChanged + c.setValidSections(c.stored) + } + headNum = firstChanged * c.sectionSize + + if headNum < c.lastForwarded { + c.lastForwarded = headNum + for _, cp := range c.children { + cp.newHead(c.lastForwarded, true) + } + } + + } else { + var newCount uint64 + if headNum >= c.confirmReq { + newCount = (headNum + 1 - c.confirmReq) / c.sectionSize + if newCount > c.targetCount { + c.targetCount = newCount + if !c.updating { + c.updating = true + c.tryUpdate <- struct{}{} + } + } + } + } +} + +// processSection processes an entire section by calling backend functions while ensuring +// the continuity of the passed headers. Since the chain mutex is not held while processing, +// the continuity can be broken by a long reorg, in which case the function returns with ok == false. +func (c *ChainIndexer) processSection(section uint64, lastSectionHead common.Hash) (sectionHead common.Hash, ok bool) { + c.backend.Reset(section) + + head := lastSectionHead + for i := section * c.sectionSize; i < (section+1)*c.sectionSize; i++ { + hash := GetCanonicalHash(c.chainDb, i) + if hash == (common.Hash{}) { + return common.Hash{}, false + } + header := GetHeader(c.chainDb, hash, i) + if header == nil || header.ParentHash != head { + return common.Hash{}, false + } + c.backend.Process(header) + head = header.Hash() + } + if err := c.backend.Commit(c.chainDb); err != nil { + return common.Hash{}, false + } + return head, true +} + +// CanonicalSections returns the number of processed sections that are consistent with +// the current canonical chain +func (c *ChainIndexer) CanonicalSections() uint64 { + c.lock.Lock() + defer c.lock.Unlock() + + cnt := c.getValidSections() + for cnt > 0 { + if c.getSectionHead(cnt-1) == GetCanonicalHash(c.chainDb, cnt*c.sectionSize-1) { + break + } + cnt-- + c.setValidSections(cnt) + } + return cnt +} + +// getValidSections reads the number of valid sections from the index database +func (c *ChainIndexer) getValidSections() uint64 { + data, _ := c.indexDb.Get([]byte("count")) + if len(data) == 8 { + return binary.BigEndian.Uint64(data[:]) + } + return 0 +} + +// setValidSections writes the number of valid sections to the index database +func (c *ChainIndexer) setValidSections(cnt uint64) { + oldCnt := c.getValidSections() + if cnt < oldCnt { + for i := cnt; i < oldCnt; i++ { + c.removeSectionHead(i) + } + } + + var data [8]byte + binary.BigEndian.PutUint64(data[:], cnt) + c.indexDb.Put([]byte("count"), data[:]) +} + +// getSectionHead reads the last block hash of a processed section from the index database +func (c *ChainIndexer) getSectionHead(idx uint64) common.Hash { + var data [8]byte + binary.BigEndian.PutUint64(data[:], idx) + + hash, _ := c.indexDb.Get(append([]byte("shead"), data[:]...)) + if len(hash) == len(common.Hash{}) { + return common.BytesToHash(hash) + } + return common.Hash{} +} + +// setSectionHead writes the last block hash of a processed section to the index database +func (c *ChainIndexer) setSectionHead(idx uint64, shead common.Hash) { + var data [8]byte + binary.BigEndian.PutUint64(data[:], idx) + + c.indexDb.Put(append([]byte("shead"), data[:]...), shead.Bytes()) +} + +// removeSectionHead removes the reference to a processed section from the index database +func (c *ChainIndexer) removeSectionHead(idx uint64) { + var data [8]byte + binary.BigEndian.PutUint64(data[:], idx) + + c.indexDb.Delete(append([]byte("shead"), data[:]...)) +} diff --git a/core/chain_indexer_test.go b/core/chain_indexer_test.go new file mode 100644 index 0000000000..827976d51c --- /dev/null +++ b/core/chain_indexer_test.go @@ -0,0 +1,235 @@ +// Copyright 2017 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +// Package core implements the Ethereum consensus protocol. +package core + +import ( + "encoding/binary" + "math/big" + "math/rand" + "testing" + "time" + + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/ethdb" +) + +func TestChainIndexerSingle(t *testing.T) { + // run multiple tests with randomized parameters + for i := 0; i < 10; i++ { + testChainIndexer(t, 1) + } +} + +func TestChainIndexerWithChildren(t *testing.T) { + // run multiple tests with randomized parameters and different number of + // chained indexers + for i := 2; i < 8; i++ { + testChainIndexer(t, i) + } +} + +// testChainIndexer runs a test with either a single ChainIndexer or a chain of multiple indexers +// sectionSize and confirmReq parameters are randomized +func testChainIndexer(t *testing.T, tciCount int) { + db, _ := ethdb.NewMemDatabase() + stop := make(chan struct{}) + tciList := make([]*testChainIndex, tciCount) + var lastIndexer *ChainIndexer + for i, _ := range tciList { + tci := &testChainIndex{t: t, sectionSize: uint64(rand.Intn(100) + 1), confirmReq: uint64(rand.Intn(10)), processCh: make(chan uint64)} + tciList[i] = tci + tci.indexer = NewChainIndexer(db, ethdb.NewTable(db, string([]byte{byte(i)})), tci, tci.sectionSize, tci.confirmReq, 0, stop) + if cs := tci.indexer.CanonicalSections(); cs != 0 { + t.Errorf("Expected 0 canonical sections, got %d", cs) + } + if lastIndexer != nil { + lastIndexer.AddChildIndexer(tci.indexer) + } + lastIndexer = tci.indexer + } + + // expectCs expects a certain number of available canonical sections + expectCs := func(indexer *ChainIndexer, expCs uint64) { + cnt := 0 + for { + cs := indexer.CanonicalSections() + if cs == expCs { + return + } + // keep trying for 10 seconds if it does not match + cnt++ + if cnt == 10000 { + t.Fatalf("Expected %d canonical sections, got %d", expCs, cs) + } + time.Sleep(time.Millisecond) + } + } + + // notify the indexer about a new head or rollback, then expect processed blocks if a section is processable + notify := func(headNum, expFailAfter uint64, rollback bool) { + tciList[0].indexer.newHead(headNum, rollback) + if rollback { + for _, tci := range tciList { + headNum = tci.rollback(headNum) + expectCs(tci.indexer, tci.stored) + } + } else { + for _, tci := range tciList { + var more bool + headNum, more = tci.newBlocks(headNum, expFailAfter) + if !more { + break + } + expectCs(tci.indexer, tci.stored) + } + } + } + + for i := uint64(0); i <= 100; i++ { + testCanonicalHeader(db, i) + } + // start indexer with an already existing chain + notify(100, 100, false) + // add new blocks one by one + for i := uint64(101); i <= 1000; i++ { + testCanonicalHeader(db, i) + notify(i, i, false) + } + // do a rollback + notify(500, 500, true) + // create new fork + for i := uint64(501); i <= 1000; i++ { + testCanonicalHeader(db, i) + notify(i, i, false) + } + + for i := uint64(1001); i <= 1500; i++ { + testCanonicalHeader(db, i) + } + // create a failed processing scenario where less blocks are available at processing time than notified + notify(2000, 1500, false) + // notify about a rollback (which could have caused the missing blocks if happened during processing) + notify(1500, 1500, true) + + // create new fork + for i := uint64(1501); i <= 2000; i++ { + testCanonicalHeader(db, i) + notify(i, i, false) + } + close(stop) + db.Close() +} + +func testCanonicalHeader(db ethdb.Database, idx uint64) { + var rnd [8]byte + binary.BigEndian.PutUint64(rnd[:], uint64(rand.Int63())) + header := &types.Header{Number: big.NewInt(int64(idx)), Extra: rnd[:]} + if idx > 0 { + header.ParentHash = GetCanonicalHash(db, idx-1) + } + WriteHeader(db, header) + WriteCanonicalHash(db, header.Hash(), idx) +} + +// testChainIndex implements ChainIndexerBackend +type testChainIndex struct { + t *testing.T + sectionSize, confirmReq uint64 + section, headerCnt, stored uint64 + indexer *ChainIndexer + processCh chan uint64 +} + +// newBlocks expects process calls after new blocks have arrived. If expFailAfter < headNum then +// we are simulating a scenario where a rollback has happened after the processing has started and +// the processing of a section fails. +func (t *testChainIndex) newBlocks(headNum, expFailAfter uint64) (uint64, bool) { + var newCount uint64 + if headNum >= t.confirmReq { + newCount = (headNum + 1 - t.confirmReq) / t.sectionSize + if newCount > t.stored { + // expect processed blocks + for exp := t.stored * t.sectionSize; exp < newCount*t.sectionSize; exp++ { + if exp > expFailAfter { + // rolled back after processing started, no more process calls expected + // wait until updating is done to make sure that processing actually fails + for { + t.indexer.lock.Lock() + u := t.indexer.updating + t.indexer.lock.Unlock() + if !u { + break + } + time.Sleep(time.Millisecond) + } + + newCount = exp / t.sectionSize + break + } + select { + case <-time.After(10 * time.Second): + t.t.Fatalf("Expected processed block #%d, got nothing", exp) + case proc := <-t.processCh: + if proc != exp { + t.t.Errorf("Expected processed block #%d, got #%d", exp, proc) + } + } + } + t.stored = newCount + } + } + if t.stored == 0 { + return 0, false + } + return t.stored*t.sectionSize - 1, true +} + +func (t *testChainIndex) rollback(headNum uint64) uint64 { + firstChanged := headNum / t.sectionSize + if firstChanged < t.stored { + t.stored = firstChanged + } + return t.stored * t.sectionSize +} + +func (t *testChainIndex) Reset(section uint64) { + t.section = section + t.headerCnt = 0 +} + +func (t *testChainIndex) Process(header *types.Header) { + t.headerCnt++ + if t.headerCnt > t.sectionSize { + t.t.Error("Processing too many headers") + } + //t.processCh <- header.Number.Uint64() + select { + case <-time.After(10 * time.Second): + t.t.Fatal("Unexpected call to Process") + case t.processCh <- header.Number.Uint64(): + } +} + +func (t *testChainIndex) Commit(db ethdb.Database) error { + if t.headerCnt != t.sectionSize { + t.t.Error("Not enough headers processed") + } + return nil +} + +func (t *testChainIndex) UpdateMsg(done, all uint64) {}