diff --git a/core/blockchain.go b/core/blockchain.go index faf6bb94a4..ff372870dd 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -1436,11 +1436,10 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, er // Peek the error for the first block to decide the directing import logic it := newInsertIterator(chain, results, bc.validator) - block, err := it.next() - // Left-trim all the known blocks - if err == ErrKnownBlock { + // Left-trim all the known blocks that don't need to build snapshot + if bc.skipBlock(err, it) { // First block (and state) is known // 1. We did a roll-back, and should now do a re-import // 2. The block is stored as a sidechain, and is lying about it's stateroot, and passes a stateroot @@ -1451,7 +1450,7 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, er localTd = bc.GetTd(current.Hash(), current.NumberU64()) externTd = bc.GetTd(block.ParentHash(), block.NumberU64()-1) // The first block can't be nil ) - for block != nil && err == ErrKnownBlock { + for block != nil && bc.skipBlock(err, it) { externTd = new(big.Int).Add(externTd, block.Difficulty()) if localTd.Cmp(externTd) < 0 { break @@ -1469,7 +1468,7 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, er // When node runs a fast sync again, it can re-import a batch of known blocks via // `insertChain` while a part of them have higher total difficulty than current // head full block(new pivot point). - for block != nil && err == ErrKnownBlock { + for block != nil && bc.skipBlock(err, it) { log.Debug("Writing previously known block", "number", block.Number(), "hash", block.Hash()) if err := bc.writeKnownBlock(block); err != nil { return it.index, err @@ -1501,8 +1500,10 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, er // If there are any still remaining, mark as ignored return it.index, err - // Some other error occurred, abort - case err != nil: + // Some other error(except ErrKnownBlock) occurred, abort. + // ErrKnownBlock is allowed here since some known blocks + // still need re-execution to generate snapshots that are missing + case err != nil && !errors.Is(err, ErrKnownBlock): bc.futureBlocks.Remove(block.Hash()) stats.ignored += len(it.chain) bc.reportBlock(block, nil, err) @@ -1520,7 +1521,7 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, er } }() - for ; block != nil && err == nil || err == ErrKnownBlock; block, err = it.next() { + for ; block != nil && err == nil || errors.Is(err, ErrKnownBlock); block, err = it.next() { // If the chain is terminating, stop processing blocks if bc.insertStopped() { log.Debug("Abort during block processing") @@ -1535,8 +1536,9 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, er // Clique blocks where they can share state among each other, so importing an // older block might complete the state of the subsequent one. In this case, // just skip the block (we already validated it once fully (and crashed), since - // its header and body was already in the database). - if err == ErrKnownBlock { + // its header and body was already in the database). But if the corresponding + // snapshot layer is missing, forcibly rerun the execution to build it. + if bc.skipBlock(err, it) { logger := log.Debug if bc.chainConfig.Clique == nil { logger = log.Warn @@ -2013,6 +2015,47 @@ func (bc *BlockChain) futureBlocksLoop() { } } +// skipBlock returns 'true', if the block being imported can be skipped over, meaning +// that the block does not need to be processed but can be considered already fully 'done'. +func (bc *BlockChain) skipBlock(err error, it *insertIterator) bool { + // We can only ever bypass processing if the only error returned by the validator + // is ErrKnownBlock, which means all checks passed, but we already have the block + // and state. + if !errors.Is(err, ErrKnownBlock) { + return false + } + // If we're not using snapshots, we can skip this, since we have both block + // and (trie-) state + if bc.snaps == nil { + return true + } + var ( + header = it.current() // header can't be nil + parentRoot common.Hash + ) + // If we also have the snapshot-state, we can skip the processing. + if bc.snaps.Snapshot(header.Root) != nil { + return true + } + // In this case, we have the trie-state but not snapshot-state. If the parent + // snapshot-state exists, we need to process this in order to not get a gap + // in the snapshot layers. + // Resolve parent block + if parent := it.previous(); parent != nil { + parentRoot = parent.Root + } else if parent = bc.GetHeaderByHash(header.ParentHash); parent != nil { + parentRoot = parent.Root + } + if parentRoot == (common.Hash{}) { + return false // Theoretically impossible case + } + // Parent is also missing snapshot: we can skip this. Otherwise process. + if bc.snaps.Snapshot(parentRoot) == nil { + return true + } + return false +} + // maintainTxIndex is responsible for the construction and deletion of the // transaction index. // diff --git a/core/blockchain_insert.go b/core/blockchain_insert.go index cb8473c084..4464870279 100644 --- a/core/blockchain_insert.go +++ b/core/blockchain_insert.go @@ -150,6 +150,14 @@ func (it *insertIterator) previous() *types.Header { return it.chain[it.index-1].Header() } +// current returns the current header that is being processed, or nil. +func (it *insertIterator) current() *types.Header { + if it.index == -1 || it.index >= len(it.chain) { + return nil + } + return it.chain[it.index].Header() +} + // first returns the first block in the it. func (it *insertIterator) first() *types.Block { return it.chain[0] diff --git a/core/blockchain_repair_test.go b/core/blockchain_repair_test.go index aca5546e20..f4f7620787 100644 --- a/core/blockchain_repair_test.go +++ b/core/blockchain_repair_test.go @@ -1863,3 +1863,124 @@ func testRepair(t *testing.T, tt *rewindTest, snapshots bool) { t.Errorf("Frozen block count mismatch: have %d, want %d", frozen, tt.expFrozen) } } + +// TestIssue23496 tests scenario described in https://github.com/ethereum/go-ethereum/pull/23496#issuecomment-926393893 +// Credits to @zzyalbert for finding the issue. +// +// Local chain owns these blocks: +// G B1 B2 B3 B4 +// B1: state committed +// B2: snapshot disk layer +// B3: state committed +// B4: head block +// +// Crash happens without fully persisting snapshot and in-memory states, +// chain rewinds itself to the B1 (skip B3 in order to recover snapshot) +// In this case the snapshot layer of B3 is not created because of existent +// state. +func TestIssue23496(t *testing.T) { + // It's hard to follow the test case, visualize the input + //log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) + + // Create a temporary persistent database + datadir, err := ioutil.TempDir("", "") + if err != nil { + t.Fatalf("Failed to create temporary datadir: %v", err) + } + os.RemoveAll(datadir) + + db, err := rawdb.NewLevelDBDatabaseWithFreezer(datadir, 0, 0, datadir, "", false) + if err != nil { + t.Fatalf("Failed to create persistent database: %v", err) + } + defer db.Close() // Might double close, should be fine + + // Initialize a fresh chain + var ( + genesis = (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(db) + engine = ethash.NewFullFaker() + config = &CacheConfig{ + TrieCleanLimit: 256, + TrieDirtyLimit: 256, + TrieTimeLimit: 5 * time.Minute, + SnapshotLimit: 256, + SnapshotWait: true, + } + ) + chain, err := NewBlockChain(db, config, params.AllEthashProtocolChanges, engine, vm.Config{}, nil, nil) + if err != nil { + t.Fatalf("Failed to create chain: %v", err) + } + blocks, _ := GenerateChain(params.TestChainConfig, genesis, engine, rawdb.NewMemoryDatabase(), 4, func(i int, b *BlockGen) { + b.SetCoinbase(common.Address{0x02}) + b.SetDifficulty(big.NewInt(1000000)) + }) + + // Insert block B1 and commit the state into disk + if _, err := chain.InsertChain(blocks[:1]); err != nil { + t.Fatalf("Failed to import canonical chain start: %v", err) + } + chain.stateCache.TrieDB().Commit(blocks[0].Root(), true, nil) + + // Insert block B2 and commit the snapshot into disk + if _, err := chain.InsertChain(blocks[1:2]); err != nil { + t.Fatalf("Failed to import canonical chain start: %v", err) + } + if err := chain.snaps.Cap(blocks[1].Root(), 0); err != nil { + t.Fatalf("Failed to flatten snapshots: %v", err) + } + + // Insert block B3 and commit the state into disk + if _, err := chain.InsertChain(blocks[2:3]); err != nil { + t.Fatalf("Failed to import canonical chain start: %v", err) + } + chain.stateCache.TrieDB().Commit(blocks[2].Root(), true, nil) + + // Insert the remaining blocks + if _, err := chain.InsertChain(blocks[3:]); err != nil { + t.Fatalf("Failed to import canonical chain tail: %v", err) + } + + // Pull the plug on the database, simulating a hard crash + db.Close() + + // Start a new blockchain back up and see where the repair leads us + db, err = rawdb.NewLevelDBDatabaseWithFreezer(datadir, 0, 0, datadir, "", false) + if err != nil { + t.Fatalf("Failed to reopen persistent database: %v", err) + } + defer db.Close() + + chain, err = NewBlockChain(db, nil, params.AllEthashProtocolChanges, engine, vm.Config{}, nil, nil) + if err != nil { + t.Fatalf("Failed to recreate chain: %v", err) + } + defer chain.Stop() + + if head := chain.CurrentHeader(); head.Number.Uint64() != uint64(4) { + t.Errorf("Head header mismatch: have %d, want %d", head.Number, 4) + } + if head := chain.CurrentFastBlock(); head.NumberU64() != uint64(4) { + t.Errorf("Head fast block mismatch: have %d, want %d", head.NumberU64(), uint64(4)) + } + if head := chain.CurrentBlock(); head.NumberU64() != uint64(1) { + t.Errorf("Head block mismatch: have %d, want %d", head.NumberU64(), uint64(1)) + } + + // Reinsert B2-B4 + if _, err := chain.InsertChain(blocks[1:]); err != nil { + t.Fatalf("Failed to import canonical chain tail: %v", err) + } + if head := chain.CurrentHeader(); head.Number.Uint64() != uint64(4) { + t.Errorf("Head header mismatch: have %d, want %d", head.Number, 4) + } + if head := chain.CurrentFastBlock(); head.NumberU64() != uint64(4) { + t.Errorf("Head fast block mismatch: have %d, want %d", head.NumberU64(), uint64(4)) + } + if head := chain.CurrentBlock(); head.NumberU64() != uint64(4) { + t.Errorf("Head block mismatch: have %d, want %d", head.NumberU64(), uint64(4)) + } + if layer := chain.Snapshots().Snapshot(blocks[2].Root()); layer == nil { + t.Error("Failed to regenerate the snapshot of known state") + } +}