From d71831255da7ecb3817c9e14c9142fe9d4441d3b Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Fri, 6 Sep 2024 23:02:34 +0800 Subject: [PATCH] core/state/snapshot: port changes from 29995 (#30040) #29995 has been reverted due to an unexpected flaw in the state snapshot process. Specifically, it attempts to stop the state snapshot generation, which could potentially cause the system to halt if the generation is not currently running. This pull request ports the changes made in #29995 and fixes the flaw. --- core/state/snapshot/disklayer.go | 23 +++++++++++++++++++ core/state/snapshot/generate.go | 9 +------- core/state/snapshot/snapshot.go | 38 +++++++++----------------------- 3 files changed, 34 insertions(+), 36 deletions(-) diff --git a/core/state/snapshot/disklayer.go b/core/state/snapshot/disklayer.go index f5518a204c..76928edf07 100644 --- a/core/state/snapshot/disklayer.go +++ b/core/state/snapshot/disklayer.go @@ -74,6 +74,14 @@ func (dl *diskLayer) Stale() bool { return dl.stale } +// markStale sets the stale flag as true. +func (dl *diskLayer) markStale() { + dl.lock.Lock() + defer dl.lock.Unlock() + + dl.stale = true +} + // Account directly retrieves the account associated with a particular hash in // the snapshot slim data format. func (dl *diskLayer) Account(hash common.Hash) (*types.SlimAccount, error) { @@ -175,3 +183,18 @@ func (dl *diskLayer) Storage(accountHash, storageHash common.Hash) ([]byte, erro func (dl *diskLayer) Update(blockHash common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer { return newDiffLayer(dl, blockHash, destructs, accounts, storage) } + +// stopGeneration aborts the state snapshot generation if it is currently running. +func (dl *diskLayer) stopGeneration() { + dl.lock.RLock() + generating := dl.genMarker != nil + dl.lock.RUnlock() + if !generating { + return + } + if dl.genAbort != nil { + abort := make(chan *generatorStats) + dl.genAbort <- abort + <-abort + } +} diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index d81a628c91..6d9e163075 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -631,16 +631,10 @@ func generateAccounts(ctx *generatorContext, dl *diskLayer, accMarker []byte) er accMarker = nil return nil } - // Always reset the initial account range as 1 whenever recover from the - // interruption. TODO(rjl493456442) can we remove it? - var accountRange = accountCheckRange - if len(accMarker) > 0 { - accountRange = 1 - } origin := common.CopyBytes(accMarker) for { id := trie.StateTrieID(dl.root) - exhausted, last, err := dl.generateRange(ctx, id, rawdb.SnapshotAccountPrefix, snapAccount, origin, accountRange, onAccount, types.FullAccountRLP) + exhausted, last, err := dl.generateRange(ctx, id, rawdb.SnapshotAccountPrefix, snapAccount, origin, accountCheckRange, onAccount, types.FullAccountRLP) if err != nil { return err // The procedure it aborted, either by external signal or internal error. } @@ -652,7 +646,6 @@ func generateAccounts(ctx *generatorContext, dl *diskLayer, accMarker []byte) er ctx.removeStorageLeft() break } - accountRange = accountCheckRange } return nil } diff --git a/core/state/snapshot/snapshot.go b/core/state/snapshot/snapshot.go index 752f4359fb..d1ffb5cc2d 100644 --- a/core/state/snapshot/snapshot.go +++ b/core/state/snapshot/snapshot.go @@ -258,24 +258,11 @@ func (t *Tree) Disable() { for _, layer := range t.layers { switch layer := layer.(type) { case *diskLayer: - - layer.lock.RLock() - generating := layer.genMarker != nil - layer.lock.RUnlock() - if !generating { - // Generator is already aborted or finished - break - } - // If the base layer is generating, abort it - if layer.genAbort != nil { - abort := make(chan *generatorStats) - layer.genAbort <- abort - <-abort - } - // Layer should be inactive now, mark it as stale - layer.lock.Lock() - layer.stale = true - layer.lock.Unlock() + // TODO this function will hang if it's called twice. Will + // fix it in the following PRs. + layer.stopGeneration() + layer.markStale() + layer.Release() case *diffLayer: // If the layer is a simple diff, simply mark as stale @@ -730,16 +717,11 @@ func (t *Tree) Rebuild(root common.Hash) { for _, layer := range t.layers { switch layer := layer.(type) { case *diskLayer: - // If the base layer is generating, abort it and save - if layer.genAbort != nil { - abort := make(chan *generatorStats) - layer.genAbort <- abort - <-abort - } - // Layer should be inactive now, mark it as stale - layer.lock.Lock() - layer.stale = true - layer.lock.Unlock() + // TODO this function will hang if it's called twice. Will + // fix it in the following PRs. + layer.stopGeneration() + layer.markStale() + layer.Release() case *diffLayer: // If the layer is a simple diff, simply mark as stale