From bd74882d83509ba4a8477dd21b86d46bc7d12eb4 Mon Sep 17 00:00:00 2001
From: Zsolt Felfoldi <zsfelfoldi@gmail.com>
Date: Sun, 5 Mar 2017 16:52:03 +0100
Subject: [PATCH] core: implement ChainIndexer

---
 core/chain_indexer.go      | 294 +++++++++++++++++++++++++++++++++++++
 core/chain_indexer_test.go | 235 +++++++++++++++++++++++++++++
 2 files changed, 529 insertions(+)
 create mode 100644 core/chain_indexer.go
 create mode 100644 core/chain_indexer_test.go

diff --git a/core/chain_indexer.go b/core/chain_indexer.go
new file mode 100644
index 0000000000..f1ead526de
--- /dev/null
+++ b/core/chain_indexer.go
@@ -0,0 +1,294 @@
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+// Package core implements the Ethereum consensus protocol.
+package core
+
+import (
+	"encoding/binary"
+	"sync"
+	"time"
+
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/core/types"
+	"github.com/ethereum/go-ethereum/ethdb"
+	"github.com/ethereum/go-ethereum/event"
+)
+
+// ChainIndexer does a post-processing job for equally sized sections of the canonical
+// chain (like BlooomBits and CHT structures). A ChainIndexer is connected to the blockchain
+// through the event system by starting a ChainEventLoop in a goroutine.
+// Further child ChainIndexers can be added which use the output of the parent section
+// indexer. These child indexers receive new head notifications only after an entire section
+// has been finished or in case of rollbacks that might affect already finished sections.
+type ChainIndexer struct {
+	chainDb, indexDb                            ethdb.Database
+	backend                                     ChainIndexerBackend
+	sectionSize, confirmReq                     uint64
+	stop                                        chan struct{}
+	lock                                        sync.Mutex
+	procWait                                    time.Duration
+	tryUpdate                                   chan struct{}
+	stored, targetCount, calcIdx, lastForwarded uint64
+	updating                                    bool
+	children                                    []*ChainIndexer
+}
+
+// ChainIndexerBackend interface is a backend for the indexer doing the actual post-processing job
+type ChainIndexerBackend interface {
+	Reset(section uint64)           // start processing a new section
+	Process(header *types.Header)   // process a single block (called for each block in the section)
+	Commit(db ethdb.Database) error // do some more processing if necessary and store the results in the database
+	UpdateMsg(done, all uint64)     // print a progress update message if necessary (only called when multiple sections need to be processed)
+}
+
+// NewChainIndexer creates a new  ChainIndexer
+//  db:				database where the index of available processed sections is stored (the index is stored by the
+//                  indexer, the actual processed chain data is stored by the backend)
+//  dbKey:			key prefix where the index is stored
+//  backend:		an implementation of ChainIndexerBackend
+//  sectionSize:	the size of processable sections
+//  confirmReq:		required number of confirmation blocks before a new section is being processed
+//  procWait:		waiting time between processing sections (simple way of limiting the resource usage of a db upgrade)
+//  stop:		    quit channel
+func NewChainIndexer(chainDb, indexDb ethdb.Database, backend ChainIndexerBackend, sectionSize, confirmReq uint64, procWait time.Duration, stop chan struct{}) *ChainIndexer {
+	c := &ChainIndexer{
+		chainDb:     chainDb,
+		indexDb:     indexDb,
+		backend:     backend,
+		sectionSize: sectionSize,
+		confirmReq:  confirmReq,
+		tryUpdate:   make(chan struct{}, 1),
+		stop:        stop,
+		procWait:    procWait,
+	}
+	c.stored = c.getValidSections()
+	go c.updateLoop()
+	return c
+}
+
+// updateLoop is the main event loop of the indexer
+func (c *ChainIndexer) updateLoop() {
+	updateMsg := false
+
+	for {
+		select {
+		case <-c.stop:
+			return
+		case <-c.tryUpdate:
+			c.lock.Lock()
+			if c.targetCount > c.stored {
+				if !updateMsg && c.targetCount > c.stored+1 {
+					updateMsg = true
+					c.backend.UpdateMsg(c.stored, c.targetCount)
+				}
+				c.calcIdx = c.stored
+
+				var lastSectionHead common.Hash
+				if c.calcIdx > 0 {
+					lastSectionHead = c.getSectionHead(c.calcIdx - 1)
+				}
+
+				c.lock.Unlock()
+				sectionHead, ok := c.processSection(c.calcIdx, lastSectionHead)
+				c.lock.Lock()
+
+				if ok && lastSectionHead == c.getSectionHead(c.calcIdx-1) {
+					c.stored = c.calcIdx + 1
+					c.setSectionHead(c.calcIdx, sectionHead)
+					c.setValidSections(c.stored)
+					if updateMsg {
+						c.backend.UpdateMsg(c.stored, c.targetCount)
+						if c.stored >= c.targetCount {
+							updateMsg = false
+						}
+					}
+					c.lastForwarded = c.stored*c.sectionSize - 1
+					for _, cp := range c.children {
+						cp.newHead(c.lastForwarded, false)
+					}
+				} else {
+					// if processing has failed, do not retry until further notification
+					c.targetCount = c.stored
+				}
+			}
+
+			if c.targetCount > c.stored {
+				go func() {
+					time.Sleep(c.procWait)
+					c.tryUpdate <- struct{}{}
+				}()
+			} else {
+				c.updating = false
+			}
+			c.lock.Unlock()
+		}
+	}
+}
+
+// ChainEventLoop runs in a goroutine and feeds blockchain events to the indexer by calling newHead
+// (not needed for child indexers where the parent calls newHead)
+func (c *ChainIndexer) ChainEventLoop(currentHeader *types.Header, eventMux *event.TypeMux) {
+	sub := eventMux.Subscribe(ChainEvent{})
+	c.newHead(currentHeader.Number.Uint64(), false)
+	lastHead := currentHeader.Hash()
+	for {
+		select {
+		case <-c.stop:
+			return
+		case ev := <-sub.Chan():
+			header := ev.Data.(ChainEvent).Block.Header()
+			c.newHead(header.Number.Uint64(), header.ParentHash != lastHead)
+			lastHead = header.Hash()
+		}
+	}
+}
+
+// AddChildIndexer adds a child ChainIndexer that can use the output of this one
+func (c *ChainIndexer) AddChildIndexer(ci *ChainIndexer) {
+	c.children = append(c.children, ci)
+}
+
+// newHead notifies the indexer about new chain heads or rollbacks
+func (c *ChainIndexer) newHead(headNum uint64, rollback bool) {
+	c.lock.Lock()
+	defer c.lock.Unlock()
+
+	if rollback {
+		firstChanged := headNum / c.sectionSize
+		if firstChanged < c.targetCount {
+			c.targetCount = firstChanged
+		}
+		if firstChanged < c.stored {
+			c.stored = firstChanged
+			c.setValidSections(c.stored)
+		}
+		headNum = firstChanged * c.sectionSize
+
+		if headNum < c.lastForwarded {
+			c.lastForwarded = headNum
+			for _, cp := range c.children {
+				cp.newHead(c.lastForwarded, true)
+			}
+		}
+
+	} else {
+		var newCount uint64
+		if headNum >= c.confirmReq {
+			newCount = (headNum + 1 - c.confirmReq) / c.sectionSize
+			if newCount > c.targetCount {
+				c.targetCount = newCount
+				if !c.updating {
+					c.updating = true
+					c.tryUpdate <- struct{}{}
+				}
+			}
+		}
+	}
+}
+
+// processSection processes an entire section by calling backend functions while ensuring
+// the continuity of the passed headers. Since the chain mutex is not held while processing,
+// the continuity can be broken by a long reorg, in which case the function returns with ok == false.
+func (c *ChainIndexer) processSection(section uint64, lastSectionHead common.Hash) (sectionHead common.Hash, ok bool) {
+	c.backend.Reset(section)
+
+	head := lastSectionHead
+	for i := section * c.sectionSize; i < (section+1)*c.sectionSize; i++ {
+		hash := GetCanonicalHash(c.chainDb, i)
+		if hash == (common.Hash{}) {
+			return common.Hash{}, false
+		}
+		header := GetHeader(c.chainDb, hash, i)
+		if header == nil || header.ParentHash != head {
+			return common.Hash{}, false
+		}
+		c.backend.Process(header)
+		head = header.Hash()
+	}
+	if err := c.backend.Commit(c.chainDb); err != nil {
+		return common.Hash{}, false
+	}
+	return head, true
+}
+
+// CanonicalSections returns the number of processed sections that are consistent with
+// the current canonical chain
+func (c *ChainIndexer) CanonicalSections() uint64 {
+	c.lock.Lock()
+	defer c.lock.Unlock()
+
+	cnt := c.getValidSections()
+	for cnt > 0 {
+		if c.getSectionHead(cnt-1) == GetCanonicalHash(c.chainDb, cnt*c.sectionSize-1) {
+			break
+		}
+		cnt--
+		c.setValidSections(cnt)
+	}
+	return cnt
+}
+
+// getValidSections reads the number of valid sections from the index database
+func (c *ChainIndexer) getValidSections() uint64 {
+	data, _ := c.indexDb.Get([]byte("count"))
+	if len(data) == 8 {
+		return binary.BigEndian.Uint64(data[:])
+	}
+	return 0
+}
+
+// setValidSections writes the number of valid sections to the index database
+func (c *ChainIndexer) setValidSections(cnt uint64) {
+	oldCnt := c.getValidSections()
+	if cnt < oldCnt {
+		for i := cnt; i < oldCnt; i++ {
+			c.removeSectionHead(i)
+		}
+	}
+
+	var data [8]byte
+	binary.BigEndian.PutUint64(data[:], cnt)
+	c.indexDb.Put([]byte("count"), data[:])
+}
+
+// getSectionHead reads the last block hash of a processed section from the index database
+func (c *ChainIndexer) getSectionHead(idx uint64) common.Hash {
+	var data [8]byte
+	binary.BigEndian.PutUint64(data[:], idx)
+
+	hash, _ := c.indexDb.Get(append([]byte("shead"), data[:]...))
+	if len(hash) == len(common.Hash{}) {
+		return common.BytesToHash(hash)
+	}
+	return common.Hash{}
+}
+
+// setSectionHead writes the last block hash of a processed section to the index database
+func (c *ChainIndexer) setSectionHead(idx uint64, shead common.Hash) {
+	var data [8]byte
+	binary.BigEndian.PutUint64(data[:], idx)
+
+	c.indexDb.Put(append([]byte("shead"), data[:]...), shead.Bytes())
+}
+
+// removeSectionHead removes the reference to a processed section from the index database
+func (c *ChainIndexer) removeSectionHead(idx uint64) {
+	var data [8]byte
+	binary.BigEndian.PutUint64(data[:], idx)
+
+	c.indexDb.Delete(append([]byte("shead"), data[:]...))
+}
diff --git a/core/chain_indexer_test.go b/core/chain_indexer_test.go
new file mode 100644
index 0000000000..827976d51c
--- /dev/null
+++ b/core/chain_indexer_test.go
@@ -0,0 +1,235 @@
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+// Package core implements the Ethereum consensus protocol.
+package core
+
+import (
+	"encoding/binary"
+	"math/big"
+	"math/rand"
+	"testing"
+	"time"
+
+	"github.com/ethereum/go-ethereum/core/types"
+	"github.com/ethereum/go-ethereum/ethdb"
+)
+
+func TestChainIndexerSingle(t *testing.T) {
+	// run multiple tests with randomized parameters
+	for i := 0; i < 10; i++ {
+		testChainIndexer(t, 1)
+	}
+}
+
+func TestChainIndexerWithChildren(t *testing.T) {
+	// run multiple tests with randomized parameters and different number of
+	// chained indexers
+	for i := 2; i < 8; i++ {
+		testChainIndexer(t, i)
+	}
+}
+
+// testChainIndexer runs a test with either a single ChainIndexer or a chain of multiple indexers
+// sectionSize and confirmReq parameters are randomized
+func testChainIndexer(t *testing.T, tciCount int) {
+	db, _ := ethdb.NewMemDatabase()
+	stop := make(chan struct{})
+	tciList := make([]*testChainIndex, tciCount)
+	var lastIndexer *ChainIndexer
+	for i, _ := range tciList {
+		tci := &testChainIndex{t: t, sectionSize: uint64(rand.Intn(100) + 1), confirmReq: uint64(rand.Intn(10)), processCh: make(chan uint64)}
+		tciList[i] = tci
+		tci.indexer = NewChainIndexer(db, ethdb.NewTable(db, string([]byte{byte(i)})), tci, tci.sectionSize, tci.confirmReq, 0, stop)
+		if cs := tci.indexer.CanonicalSections(); cs != 0 {
+			t.Errorf("Expected 0 canonical sections, got %d", cs)
+		}
+		if lastIndexer != nil {
+			lastIndexer.AddChildIndexer(tci.indexer)
+		}
+		lastIndexer = tci.indexer
+	}
+
+	// expectCs expects a certain number of available canonical sections
+	expectCs := func(indexer *ChainIndexer, expCs uint64) {
+		cnt := 0
+		for {
+			cs := indexer.CanonicalSections()
+			if cs == expCs {
+				return
+			}
+			// keep trying for 10 seconds if it does not match
+			cnt++
+			if cnt == 10000 {
+				t.Fatalf("Expected %d canonical sections, got %d", expCs, cs)
+			}
+			time.Sleep(time.Millisecond)
+		}
+	}
+
+	// notify the indexer about a new head or rollback, then expect processed blocks if a section is processable
+	notify := func(headNum, expFailAfter uint64, rollback bool) {
+		tciList[0].indexer.newHead(headNum, rollback)
+		if rollback {
+			for _, tci := range tciList {
+				headNum = tci.rollback(headNum)
+				expectCs(tci.indexer, tci.stored)
+			}
+		} else {
+			for _, tci := range tciList {
+				var more bool
+				headNum, more = tci.newBlocks(headNum, expFailAfter)
+				if !more {
+					break
+				}
+				expectCs(tci.indexer, tci.stored)
+			}
+		}
+	}
+
+	for i := uint64(0); i <= 100; i++ {
+		testCanonicalHeader(db, i)
+	}
+	// start indexer with an already existing chain
+	notify(100, 100, false)
+	// add new blocks one by one
+	for i := uint64(101); i <= 1000; i++ {
+		testCanonicalHeader(db, i)
+		notify(i, i, false)
+	}
+	// do a rollback
+	notify(500, 500, true)
+	// create new fork
+	for i := uint64(501); i <= 1000; i++ {
+		testCanonicalHeader(db, i)
+		notify(i, i, false)
+	}
+
+	for i := uint64(1001); i <= 1500; i++ {
+		testCanonicalHeader(db, i)
+	}
+	// create a failed processing scenario where less blocks are available at processing time than notified
+	notify(2000, 1500, false)
+	// notify about a rollback (which could have caused the missing blocks if happened during processing)
+	notify(1500, 1500, true)
+
+	// create new fork
+	for i := uint64(1501); i <= 2000; i++ {
+		testCanonicalHeader(db, i)
+		notify(i, i, false)
+	}
+	close(stop)
+	db.Close()
+}
+
+func testCanonicalHeader(db ethdb.Database, idx uint64) {
+	var rnd [8]byte
+	binary.BigEndian.PutUint64(rnd[:], uint64(rand.Int63()))
+	header := &types.Header{Number: big.NewInt(int64(idx)), Extra: rnd[:]}
+	if idx > 0 {
+		header.ParentHash = GetCanonicalHash(db, idx-1)
+	}
+	WriteHeader(db, header)
+	WriteCanonicalHash(db, header.Hash(), idx)
+}
+
+// testChainIndex implements ChainIndexerBackend
+type testChainIndex struct {
+	t                          *testing.T
+	sectionSize, confirmReq    uint64
+	section, headerCnt, stored uint64
+	indexer                    *ChainIndexer
+	processCh                  chan uint64
+}
+
+// newBlocks expects process calls after new blocks have arrived. If expFailAfter < headNum then
+// we are simulating a scenario where a rollback has happened after the processing has started and
+// the processing of a section fails.
+func (t *testChainIndex) newBlocks(headNum, expFailAfter uint64) (uint64, bool) {
+	var newCount uint64
+	if headNum >= t.confirmReq {
+		newCount = (headNum + 1 - t.confirmReq) / t.sectionSize
+		if newCount > t.stored {
+			// expect processed blocks
+			for exp := t.stored * t.sectionSize; exp < newCount*t.sectionSize; exp++ {
+				if exp > expFailAfter {
+					// rolled back after processing started, no more process calls expected
+					// wait until updating is done to make sure that processing actually fails
+					for {
+						t.indexer.lock.Lock()
+						u := t.indexer.updating
+						t.indexer.lock.Unlock()
+						if !u {
+							break
+						}
+						time.Sleep(time.Millisecond)
+					}
+
+					newCount = exp / t.sectionSize
+					break
+				}
+				select {
+				case <-time.After(10 * time.Second):
+					t.t.Fatalf("Expected processed block #%d, got nothing", exp)
+				case proc := <-t.processCh:
+					if proc != exp {
+						t.t.Errorf("Expected processed block #%d, got #%d", exp, proc)
+					}
+				}
+			}
+			t.stored = newCount
+		}
+	}
+	if t.stored == 0 {
+		return 0, false
+	}
+	return t.stored*t.sectionSize - 1, true
+}
+
+func (t *testChainIndex) rollback(headNum uint64) uint64 {
+	firstChanged := headNum / t.sectionSize
+	if firstChanged < t.stored {
+		t.stored = firstChanged
+	}
+	return t.stored * t.sectionSize
+}
+
+func (t *testChainIndex) Reset(section uint64) {
+	t.section = section
+	t.headerCnt = 0
+}
+
+func (t *testChainIndex) Process(header *types.Header) {
+	t.headerCnt++
+	if t.headerCnt > t.sectionSize {
+		t.t.Error("Processing too many headers")
+	}
+	//t.processCh <- header.Number.Uint64()
+	select {
+	case <-time.After(10 * time.Second):
+		t.t.Fatal("Unexpected call to Process")
+	case t.processCh <- header.Number.Uint64():
+	}
+}
+
+func (t *testChainIndex) Commit(db ethdb.Database) error {
+	if t.headerCnt != t.sectionSize {
+		t.t.Error("Not enough headers processed")
+	}
+	return nil
+}
+
+func (t *testChainIndex) UpdateMsg(done, all uint64) {}