mirror of https://github.com/ethereum/go-ethereum
all: bloom-filter based pruning mechanism (#21724)
* cmd, core, tests: initial state pruner core: fix db inspector cmd/geth: add verify-state cmd/geth: add verification tool core/rawdb: implement flatdb cmd, core: fix rebase core/state: use new contract code layout core/state/pruner: avoid deleting genesis state cmd/geth: add helper function core, cmd: fix extract genesis core: minor fixes contracts: remove useless core/state/snapshot: plugin stacktrie core: polish core/state/snapshot: iterate storage concurrently core/state/snapshot: fix iteration core: add comments core/state/snapshot: polish code core/state: polish core/state/snapshot: rebase core/rawdb: add comments core/rawdb: fix tests core/rawdb: improve tests core/state/snapshot: fix concurrent iteration core/state: run pruning during the recovery core, trie: implement martin's idea core, eth: delete flatdb and polish pruner trie: fix import core/state/pruner: add log core/state/pruner: fix issues core/state/pruner: don't read back core/state/pruner: fix contract code write core/state/pruner: check root node presence cmd, core: polish log core/state: use HEAD-127 as the target core/state/snapshot: improve tests cmd/geth: fix verification tool cmd/geth: use HEAD as the verification default target all: replace the bloomfilter with martin's fork cmd, core: polish code core, cmd: forcibly delete state root core/state/pruner: add hash64 core/state/pruner: fix blacklist core/state: remove blacklist cmd, core: delete trie clean cache before pruning cmd, core: fix lint cmd, core: fix rebase core/state: fix the special case for clique networks core/state/snapshot: remove useless code core/state/pruner: capping the snapshot after pruning cmd, core, eth: fixes core/rawdb: update db inspector cmd/geth: polish code core/state/pruner: fsync bloom filter cmd, core: print warning log core/state/pruner: adjust the parameters for bloom filter cmd, core: create the bloom filter by size core: polish core/state/pruner: sanitize invalid bloomfilter size cmd: address comments cmd/geth: address comments cmd/geth: address comment core/state/pruner: address comments core/state/pruner: rename homedir to datadir cmd, core: address comments core/state/pruner: address comment core/state: address comments core, cmd, tests: address comments core: address comments core/state/pruner: release the iterator after each commit core/state/pruner: improve pruner cmd, core: adjust bloom paramters core/state/pruner: fix lint core/state/pruner: fix tests core: fix rebase core/state/pruner: remove atomic rename core/state/pruner: address comments all: run go mod tidy core/state/pruner: avoid false-positive for the middle state roots core/state/pruner: add checks for middle roots cmd/geth: replace crit with error * core/state/pruner: fix lint * core: drop legacy bloom filter * core/state/snapshot: improve pruner * core/state/snapshot: polish concurrent logs to report ETA vs. hashes * core/state/pruner: add progress report for pruning and compaction too * core: fix snapshot test API * core/state: fix some pruning logs * core/state/pruner: support recovering from bloom flush fail Co-authored-by: Péter Szilágyi <peterke@gmail.com>pull/22291/head
parent
bbe694fc52
commit
f566dd305e
@ -0,0 +1,437 @@ |
|||||||
|
// Copyright 2020 The go-ethereum Authors
|
||||||
|
// This file is part of go-ethereum.
|
||||||
|
//
|
||||||
|
// go-ethereum is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 3 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// go-ethereum is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU General Public License
|
||||||
|
// along with go-ethereum. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
package main |
||||||
|
|
||||||
|
import ( |
||||||
|
"bytes" |
||||||
|
"errors" |
||||||
|
"time" |
||||||
|
|
||||||
|
"github.com/ethereum/go-ethereum/cmd/utils" |
||||||
|
"github.com/ethereum/go-ethereum/common" |
||||||
|
"github.com/ethereum/go-ethereum/core/rawdb" |
||||||
|
"github.com/ethereum/go-ethereum/core/state" |
||||||
|
"github.com/ethereum/go-ethereum/core/state/pruner" |
||||||
|
"github.com/ethereum/go-ethereum/core/state/snapshot" |
||||||
|
"github.com/ethereum/go-ethereum/crypto" |
||||||
|
"github.com/ethereum/go-ethereum/log" |
||||||
|
"github.com/ethereum/go-ethereum/rlp" |
||||||
|
"github.com/ethereum/go-ethereum/trie" |
||||||
|
cli "gopkg.in/urfave/cli.v1" |
||||||
|
) |
||||||
|
|
||||||
|
var ( |
||||||
|
// emptyRoot is the known root hash of an empty trie.
|
||||||
|
emptyRoot = common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421") |
||||||
|
|
||||||
|
// emptyCode is the known hash of the empty EVM bytecode.
|
||||||
|
emptyCode = crypto.Keccak256(nil) |
||||||
|
) |
||||||
|
|
||||||
|
var ( |
||||||
|
snapshotCommand = cli.Command{ |
||||||
|
Name: "snapshot", |
||||||
|
Usage: "A set of commands based on the snapshot", |
||||||
|
Category: "MISCELLANEOUS COMMANDS", |
||||||
|
Description: "", |
||||||
|
Subcommands: []cli.Command{ |
||||||
|
{ |
||||||
|
Name: "prune-state", |
||||||
|
Usage: "Prune stale ethereum state data based on the snapshot", |
||||||
|
ArgsUsage: "<root>", |
||||||
|
Action: utils.MigrateFlags(pruneState), |
||||||
|
Category: "MISCELLANEOUS COMMANDS", |
||||||
|
Flags: []cli.Flag{ |
||||||
|
utils.DataDirFlag, |
||||||
|
utils.RopstenFlag, |
||||||
|
utils.RinkebyFlag, |
||||||
|
utils.GoerliFlag, |
||||||
|
utils.LegacyTestnetFlag, |
||||||
|
utils.CacheTrieJournalFlag, |
||||||
|
utils.BloomFilterSizeFlag, |
||||||
|
}, |
||||||
|
Description: ` |
||||||
|
geth snapshot prune-state <state-root> |
||||||
|
will prune historical state data with the help of the state snapshot. |
||||||
|
All trie nodes and contract codes that do not belong to the specified |
||||||
|
version state will be deleted from the database. After pruning, only |
||||||
|
two version states are available: genesis and the specific one. |
||||||
|
|
||||||
|
The default pruning target is the HEAD-127 state. |
||||||
|
|
||||||
|
WARNING: It's necessary to delete the trie clean cache after the pruning. |
||||||
|
If you specify another directory for the trie clean cache via "--cache.trie.journal" |
||||||
|
during the use of Geth, please also specify it here for correct deletion. Otherwise |
||||||
|
the trie clean cache with default directory will be deleted. |
||||||
|
`, |
||||||
|
}, |
||||||
|
{ |
||||||
|
Name: "verify-state", |
||||||
|
Usage: "Recalculate state hash based on the snapshot for verification", |
||||||
|
ArgsUsage: "<root>", |
||||||
|
Action: utils.MigrateFlags(verifyState), |
||||||
|
Category: "MISCELLANEOUS COMMANDS", |
||||||
|
Flags: []cli.Flag{ |
||||||
|
utils.DataDirFlag, |
||||||
|
utils.RopstenFlag, |
||||||
|
utils.RinkebyFlag, |
||||||
|
utils.GoerliFlag, |
||||||
|
utils.LegacyTestnetFlag, |
||||||
|
}, |
||||||
|
Description: ` |
||||||
|
geth snapshot verify-state <state-root> |
||||||
|
will traverse the whole accounts and storages set based on the specified |
||||||
|
snapshot and recalculate the root hash of state for verification. |
||||||
|
In other words, this command does the snapshot to trie conversion. |
||||||
|
`, |
||||||
|
}, |
||||||
|
{ |
||||||
|
Name: "traverse-state", |
||||||
|
Usage: "Traverse the state with given root hash for verification", |
||||||
|
ArgsUsage: "<root>", |
||||||
|
Action: utils.MigrateFlags(traverseState), |
||||||
|
Category: "MISCELLANEOUS COMMANDS", |
||||||
|
Flags: []cli.Flag{ |
||||||
|
utils.DataDirFlag, |
||||||
|
utils.RopstenFlag, |
||||||
|
utils.RinkebyFlag, |
||||||
|
utils.GoerliFlag, |
||||||
|
utils.LegacyTestnetFlag, |
||||||
|
}, |
||||||
|
Description: ` |
||||||
|
geth snapshot traverse-state <state-root> |
||||||
|
will traverse the whole state from the given state root and will abort if any |
||||||
|
referenced trie node or contract code is missing. This command can be used for |
||||||
|
state integrity verification. The default checking target is the HEAD state. |
||||||
|
|
||||||
|
It's also usable without snapshot enabled. |
||||||
|
`, |
||||||
|
}, |
||||||
|
{ |
||||||
|
Name: "traverse-rawstate", |
||||||
|
Usage: "Traverse the state with given root hash for verification", |
||||||
|
ArgsUsage: "<root>", |
||||||
|
Action: utils.MigrateFlags(traverseRawState), |
||||||
|
Category: "MISCELLANEOUS COMMANDS", |
||||||
|
Flags: []cli.Flag{ |
||||||
|
utils.DataDirFlag, |
||||||
|
utils.RopstenFlag, |
||||||
|
utils.RinkebyFlag, |
||||||
|
utils.GoerliFlag, |
||||||
|
utils.LegacyTestnetFlag, |
||||||
|
}, |
||||||
|
Description: ` |
||||||
|
geth snapshot traverse-rawstate <state-root> |
||||||
|
will traverse the whole state from the given root and will abort if any referenced |
||||||
|
trie node or contract code is missing. This command can be used for state integrity |
||||||
|
verification. The default checking target is the HEAD state. It's basically identical |
||||||
|
to traverse-state, but the check granularity is smaller.
|
||||||
|
|
||||||
|
It's also usable without snapshot enabled. |
||||||
|
`, |
||||||
|
}, |
||||||
|
}, |
||||||
|
} |
||||||
|
) |
||||||
|
|
||||||
|
func pruneState(ctx *cli.Context) error { |
||||||
|
stack, config := makeConfigNode(ctx) |
||||||
|
defer stack.Close() |
||||||
|
|
||||||
|
chain, chaindb := utils.MakeChain(ctx, stack, true) |
||||||
|
defer chaindb.Close() |
||||||
|
|
||||||
|
pruner, err := pruner.NewPruner(chaindb, chain.CurrentBlock().Header(), stack.ResolvePath(""), stack.ResolvePath(config.Eth.TrieCleanCacheJournal), ctx.GlobalUint64(utils.BloomFilterSizeFlag.Name)) |
||||||
|
if err != nil { |
||||||
|
log.Error("Failed to open snapshot tree", "error", err) |
||||||
|
return err |
||||||
|
} |
||||||
|
if ctx.NArg() > 1 { |
||||||
|
log.Error("Too many arguments given") |
||||||
|
return errors.New("too many arguments") |
||||||
|
} |
||||||
|
var targetRoot common.Hash |
||||||
|
if ctx.NArg() == 1 { |
||||||
|
targetRoot, err = parseRoot(ctx.Args()[0]) |
||||||
|
if err != nil { |
||||||
|
log.Error("Failed to resolve state root", "error", err) |
||||||
|
return err |
||||||
|
} |
||||||
|
} |
||||||
|
if err = pruner.Prune(targetRoot); err != nil { |
||||||
|
log.Error("Failed to prune state", "error", err) |
||||||
|
return err |
||||||
|
} |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
func verifyState(ctx *cli.Context) error { |
||||||
|
stack, _ := makeConfigNode(ctx) |
||||||
|
defer stack.Close() |
||||||
|
|
||||||
|
chain, chaindb := utils.MakeChain(ctx, stack, true) |
||||||
|
defer chaindb.Close() |
||||||
|
|
||||||
|
snaptree, err := snapshot.New(chaindb, trie.NewDatabase(chaindb), 256, chain.CurrentBlock().Root(), false, false, false) |
||||||
|
if err != nil { |
||||||
|
log.Error("Failed to open snapshot tree", "error", err) |
||||||
|
return err |
||||||
|
} |
||||||
|
if ctx.NArg() > 1 { |
||||||
|
log.Error("Too many arguments given") |
||||||
|
return errors.New("too many arguments") |
||||||
|
} |
||||||
|
var root = chain.CurrentBlock().Root() |
||||||
|
if ctx.NArg() == 1 { |
||||||
|
root, err = parseRoot(ctx.Args()[0]) |
||||||
|
if err != nil { |
||||||
|
log.Error("Failed to resolve state root", "error", err) |
||||||
|
return err |
||||||
|
} |
||||||
|
} |
||||||
|
if err := snaptree.Verify(root); err != nil { |
||||||
|
log.Error("Failed to verfiy state", "error", err) |
||||||
|
return err |
||||||
|
} |
||||||
|
log.Info("Verified the state") |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
// traverseState is a helper function used for pruning verification.
|
||||||
|
// Basically it just iterates the trie, ensure all nodes and associated
|
||||||
|
// contract codes are present.
|
||||||
|
func traverseState(ctx *cli.Context) error { |
||||||
|
stack, _ := makeConfigNode(ctx) |
||||||
|
defer stack.Close() |
||||||
|
|
||||||
|
chain, chaindb := utils.MakeChain(ctx, stack, true) |
||||||
|
defer chaindb.Close() |
||||||
|
|
||||||
|
if ctx.NArg() > 1 { |
||||||
|
log.Error("Too many arguments given") |
||||||
|
return errors.New("too many arguments") |
||||||
|
} |
||||||
|
// Use the HEAD root as the default
|
||||||
|
head := chain.CurrentBlock() |
||||||
|
if head == nil { |
||||||
|
log.Error("Head block is missing") |
||||||
|
return errors.New("head block is missing") |
||||||
|
} |
||||||
|
var ( |
||||||
|
root common.Hash |
||||||
|
err error |
||||||
|
) |
||||||
|
if ctx.NArg() == 1 { |
||||||
|
root, err = parseRoot(ctx.Args()[0]) |
||||||
|
if err != nil { |
||||||
|
log.Error("Failed to resolve state root", "error", err) |
||||||
|
return err |
||||||
|
} |
||||||
|
log.Info("Start traversing the state", "root", root) |
||||||
|
} else { |
||||||
|
root = head.Root() |
||||||
|
log.Info("Start traversing the state", "root", root, "number", head.NumberU64()) |
||||||
|
} |
||||||
|
triedb := trie.NewDatabase(chaindb) |
||||||
|
t, err := trie.NewSecure(root, triedb) |
||||||
|
if err != nil { |
||||||
|
log.Error("Failed to open trie", "root", root, "error", err) |
||||||
|
return err |
||||||
|
} |
||||||
|
var ( |
||||||
|
accounts int |
||||||
|
slots int |
||||||
|
codes int |
||||||
|
lastReport time.Time |
||||||
|
start = time.Now() |
||||||
|
) |
||||||
|
accIter := trie.NewIterator(t.NodeIterator(nil)) |
||||||
|
for accIter.Next() { |
||||||
|
accounts += 1 |
||||||
|
var acc state.Account |
||||||
|
if err := rlp.DecodeBytes(accIter.Value, &acc); err != nil { |
||||||
|
log.Error("Invalid account encountered during traversal", "error", err) |
||||||
|
return err |
||||||
|
} |
||||||
|
if acc.Root != emptyRoot { |
||||||
|
storageTrie, err := trie.NewSecure(acc.Root, triedb) |
||||||
|
if err != nil { |
||||||
|
log.Error("Failed to open storage trie", "root", acc.Root, "error", err) |
||||||
|
return err |
||||||
|
} |
||||||
|
storageIter := trie.NewIterator(storageTrie.NodeIterator(nil)) |
||||||
|
for storageIter.Next() { |
||||||
|
slots += 1 |
||||||
|
} |
||||||
|
if storageIter.Err != nil { |
||||||
|
log.Error("Failed to traverse storage trie", "root", acc.Root, "error", storageIter.Err) |
||||||
|
return storageIter.Err |
||||||
|
} |
||||||
|
} |
||||||
|
if !bytes.Equal(acc.CodeHash, emptyCode) { |
||||||
|
code := rawdb.ReadCode(chaindb, common.BytesToHash(acc.CodeHash)) |
||||||
|
if len(code) == 0 { |
||||||
|
log.Error("Code is missing", "hash", common.BytesToHash(acc.CodeHash)) |
||||||
|
return errors.New("missing code") |
||||||
|
} |
||||||
|
codes += 1 |
||||||
|
} |
||||||
|
if time.Since(lastReport) > time.Second*8 { |
||||||
|
log.Info("Traversing state", "accounts", accounts, "slots", slots, "codes", codes, "elapsed", common.PrettyDuration(time.Since(start))) |
||||||
|
lastReport = time.Now() |
||||||
|
} |
||||||
|
} |
||||||
|
if accIter.Err != nil { |
||||||
|
log.Error("Failed to traverse state trie", "root", root, "error", accIter.Err) |
||||||
|
return accIter.Err |
||||||
|
} |
||||||
|
log.Info("State is complete", "accounts", accounts, "slots", slots, "codes", codes, "elapsed", common.PrettyDuration(time.Since(start))) |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
// traverseRawState is a helper function used for pruning verification.
|
||||||
|
// Basically it just iterates the trie, ensure all nodes and associated
|
||||||
|
// contract codes are present. It's basically identical to traverseState
|
||||||
|
// but it will check each trie node.
|
||||||
|
func traverseRawState(ctx *cli.Context) error { |
||||||
|
stack, _ := makeConfigNode(ctx) |
||||||
|
defer stack.Close() |
||||||
|
|
||||||
|
chain, chaindb := utils.MakeChain(ctx, stack, true) |
||||||
|
defer chaindb.Close() |
||||||
|
|
||||||
|
if ctx.NArg() > 1 { |
||||||
|
log.Error("Too many arguments given") |
||||||
|
return errors.New("too many arguments") |
||||||
|
} |
||||||
|
// Use the HEAD root as the default
|
||||||
|
head := chain.CurrentBlock() |
||||||
|
if head == nil { |
||||||
|
log.Error("Head block is missing") |
||||||
|
return errors.New("head block is missing") |
||||||
|
} |
||||||
|
var ( |
||||||
|
root common.Hash |
||||||
|
err error |
||||||
|
) |
||||||
|
if ctx.NArg() == 1 { |
||||||
|
root, err = parseRoot(ctx.Args()[0]) |
||||||
|
if err != nil { |
||||||
|
log.Error("Failed to resolve state root", "error", err) |
||||||
|
return err |
||||||
|
} |
||||||
|
log.Info("Start traversing the state", "root", root) |
||||||
|
} else { |
||||||
|
root = head.Root() |
||||||
|
log.Info("Start traversing the state", "root", root, "number", head.NumberU64()) |
||||||
|
} |
||||||
|
triedb := trie.NewDatabase(chaindb) |
||||||
|
t, err := trie.NewSecure(root, triedb) |
||||||
|
if err != nil { |
||||||
|
log.Error("Failed to open trie", "root", root, "error", err) |
||||||
|
return err |
||||||
|
} |
||||||
|
var ( |
||||||
|
nodes int |
||||||
|
accounts int |
||||||
|
slots int |
||||||
|
codes int |
||||||
|
lastReport time.Time |
||||||
|
start = time.Now() |
||||||
|
) |
||||||
|
accIter := t.NodeIterator(nil) |
||||||
|
for accIter.Next(true) { |
||||||
|
nodes += 1 |
||||||
|
node := accIter.Hash() |
||||||
|
|
||||||
|
if node != (common.Hash{}) { |
||||||
|
// Check the present for non-empty hash node(embedded node doesn't
|
||||||
|
// have their own hash).
|
||||||
|
blob := rawdb.ReadTrieNode(chaindb, node) |
||||||
|
if len(blob) == 0 { |
||||||
|
log.Error("Missing trie node(account)", "hash", node) |
||||||
|
return errors.New("missing account") |
||||||
|
} |
||||||
|
} |
||||||
|
// If it's a leaf node, yes we are touching an account,
|
||||||
|
// dig into the storage trie further.
|
||||||
|
if accIter.Leaf() { |
||||||
|
accounts += 1 |
||||||
|
var acc state.Account |
||||||
|
if err := rlp.DecodeBytes(accIter.LeafBlob(), &acc); err != nil { |
||||||
|
log.Error("Invalid account encountered during traversal", "error", err) |
||||||
|
return errors.New("invalid account") |
||||||
|
} |
||||||
|
if acc.Root != emptyRoot { |
||||||
|
storageTrie, err := trie.NewSecure(acc.Root, triedb) |
||||||
|
if err != nil { |
||||||
|
log.Error("Failed to open storage trie", "root", acc.Root, "error", err) |
||||||
|
return errors.New("missing storage trie") |
||||||
|
} |
||||||
|
storageIter := storageTrie.NodeIterator(nil) |
||||||
|
for storageIter.Next(true) { |
||||||
|
nodes += 1 |
||||||
|
node := storageIter.Hash() |
||||||
|
|
||||||
|
// Check the present for non-empty hash node(embedded node doesn't
|
||||||
|
// have their own hash).
|
||||||
|
if node != (common.Hash{}) { |
||||||
|
blob := rawdb.ReadTrieNode(chaindb, node) |
||||||
|
if len(blob) == 0 { |
||||||
|
log.Error("Missing trie node(storage)", "hash", node) |
||||||
|
return errors.New("missing storage") |
||||||
|
} |
||||||
|
} |
||||||
|
// Bump the counter if it's leaf node.
|
||||||
|
if storageIter.Leaf() { |
||||||
|
slots += 1 |
||||||
|
} |
||||||
|
} |
||||||
|
if storageIter.Error() != nil { |
||||||
|
log.Error("Failed to traverse storage trie", "root", acc.Root, "error", storageIter.Error()) |
||||||
|
return storageIter.Error() |
||||||
|
} |
||||||
|
} |
||||||
|
if !bytes.Equal(acc.CodeHash, emptyCode) { |
||||||
|
code := rawdb.ReadCode(chaindb, common.BytesToHash(acc.CodeHash)) |
||||||
|
if len(code) == 0 { |
||||||
|
log.Error("Code is missing", "account", common.BytesToHash(accIter.LeafKey())) |
||||||
|
return errors.New("missing code") |
||||||
|
} |
||||||
|
codes += 1 |
||||||
|
} |
||||||
|
if time.Since(lastReport) > time.Second*8 { |
||||||
|
log.Info("Traversing state", "nodes", nodes, "accounts", accounts, "slots", slots, "codes", codes, "elapsed", common.PrettyDuration(time.Since(start))) |
||||||
|
lastReport = time.Now() |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
if accIter.Error() != nil { |
||||||
|
log.Error("Failed to traverse state trie", "root", root, "error", accIter.Error()) |
||||||
|
return accIter.Error() |
||||||
|
} |
||||||
|
log.Info("State is complete", "nodes", nodes, "accounts", accounts, "slots", slots, "codes", codes, "elapsed", common.PrettyDuration(time.Since(start))) |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
func parseRoot(input string) (common.Hash, error) { |
||||||
|
var h common.Hash |
||||||
|
if err := h.UnmarshalText([]byte(input)); err != nil { |
||||||
|
return h, err |
||||||
|
} |
||||||
|
return h, nil |
||||||
|
} |
@ -0,0 +1,132 @@ |
|||||||
|
// Copyright 2020 The go-ethereum Authors
|
||||||
|
// This file is part of the go-ethereum library.
|
||||||
|
//
|
||||||
|
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Lesser General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 3 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU Lesser General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU Lesser General Public License
|
||||||
|
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
package pruner |
||||||
|
|
||||||
|
import ( |
||||||
|
"encoding/binary" |
||||||
|
"errors" |
||||||
|
"os" |
||||||
|
|
||||||
|
"github.com/ethereum/go-ethereum/common" |
||||||
|
"github.com/ethereum/go-ethereum/core/rawdb" |
||||||
|
"github.com/ethereum/go-ethereum/log" |
||||||
|
bloomfilter "github.com/holiman/bloomfilter/v2" |
||||||
|
) |
||||||
|
|
||||||
|
// stateBloomHasher is a wrapper around a byte blob to satisfy the interface API
|
||||||
|
// requirements of the bloom library used. It's used to convert a trie hash or
|
||||||
|
// contract code hash into a 64 bit mini hash.
|
||||||
|
type stateBloomHasher []byte |
||||||
|
|
||||||
|
func (f stateBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") } |
||||||
|
func (f stateBloomHasher) Sum(b []byte) []byte { panic("not implemented") } |
||||||
|
func (f stateBloomHasher) Reset() { panic("not implemented") } |
||||||
|
func (f stateBloomHasher) BlockSize() int { panic("not implemented") } |
||||||
|
func (f stateBloomHasher) Size() int { return 8 } |
||||||
|
func (f stateBloomHasher) Sum64() uint64 { return binary.BigEndian.Uint64(f) } |
||||||
|
|
||||||
|
// stateBloom is a bloom filter used during the state convesion(snapshot->state).
|
||||||
|
// The keys of all generated entries will be recorded here so that in the pruning
|
||||||
|
// stage the entries belong to the specific version can be avoided for deletion.
|
||||||
|
//
|
||||||
|
// The false-positive is allowed here. The "false-positive" entries means they
|
||||||
|
// actually don't belong to the specific version but they are not deleted in the
|
||||||
|
// pruning. The downside of the false-positive allowance is we may leave some "dangling"
|
||||||
|
// nodes in the disk. But in practice the it's very unlike the dangling node is
|
||||||
|
// state root. So in theory this pruned state shouldn't be visited anymore. Another
|
||||||
|
// potential issue is for fast sync. If we do another fast sync upon the pruned
|
||||||
|
// database, it's problematic which will stop the expansion during the syncing.
|
||||||
|
// TODO address it @rjl493456442 @holiman @karalabe.
|
||||||
|
//
|
||||||
|
// After the entire state is generated, the bloom filter should be persisted into
|
||||||
|
// the disk. It indicates the whole generation procedure is finished.
|
||||||
|
type stateBloom struct { |
||||||
|
bloom *bloomfilter.Filter |
||||||
|
} |
||||||
|
|
||||||
|
// newStateBloomWithSize creates a brand new state bloom for state generation.
|
||||||
|
// The bloom filter will be created by the passing bloom filter size. According
|
||||||
|
// to the https://hur.st/bloomfilter/?n=600000000&p=&m=2048MB&k=4, the parameters
|
||||||
|
// are picked so that the false-positive rate for mainnet is low enough.
|
||||||
|
func newStateBloomWithSize(size uint64) (*stateBloom, error) { |
||||||
|
bloom, err := bloomfilter.New(size*1024*1024*8, 4) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
log.Info("Initialized state bloom", "size", common.StorageSize(float64(bloom.M()/8))) |
||||||
|
return &stateBloom{bloom: bloom}, nil |
||||||
|
} |
||||||
|
|
||||||
|
// NewStateBloomFromDisk loads the state bloom from the given file.
|
||||||
|
// In this case the assumption is held the bloom filter is complete.
|
||||||
|
func NewStateBloomFromDisk(filename string) (*stateBloom, error) { |
||||||
|
bloom, _, err := bloomfilter.ReadFile(filename) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
return &stateBloom{bloom: bloom}, nil |
||||||
|
} |
||||||
|
|
||||||
|
// Commit flushes the bloom filter content into the disk and marks the bloom
|
||||||
|
// as complete.
|
||||||
|
func (bloom *stateBloom) Commit(filename, tempname string) error { |
||||||
|
// Write the bloom out into a temporary file
|
||||||
|
_, err := bloom.bloom.WriteFile(tempname) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
// Ensure the file is synced to disk
|
||||||
|
f, err := os.Open(tempname) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
if err := f.Sync(); err != nil { |
||||||
|
f.Close() |
||||||
|
return err |
||||||
|
} |
||||||
|
f.Close() |
||||||
|
|
||||||
|
// Move the teporary file into it's final location
|
||||||
|
return os.Rename(tempname, filename) |
||||||
|
} |
||||||
|
|
||||||
|
// Put implements the KeyValueWriter interface. But here only the key is needed.
|
||||||
|
func (bloom *stateBloom) Put(key []byte, value []byte) error { |
||||||
|
// If the key length is not 32bytes, ensure it's contract code
|
||||||
|
// entry with new scheme.
|
||||||
|
if len(key) != common.HashLength { |
||||||
|
isCode, codeKey := rawdb.IsCodeKey(key) |
||||||
|
if !isCode { |
||||||
|
return errors.New("invalid entry") |
||||||
|
} |
||||||
|
bloom.bloom.Add(stateBloomHasher(codeKey)) |
||||||
|
return nil |
||||||
|
} |
||||||
|
bloom.bloom.Add(stateBloomHasher(key)) |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
// Delete removes the key from the key-value data store.
|
||||||
|
func (bloom *stateBloom) Delete(key []byte) error { panic("not supported") } |
||||||
|
|
||||||
|
// Contain is the wrapper of the underlying contains function which
|
||||||
|
// reports whether the key is contained.
|
||||||
|
// - If it says yes, the key may be contained
|
||||||
|
// - If it says no, the key is definitely not contained.
|
||||||
|
func (bloom *stateBloom) Contain(key []byte) (bool, error) { |
||||||
|
return bloom.bloom.Contains(stateBloomHasher(key)), nil |
||||||
|
} |
@ -0,0 +1,537 @@ |
|||||||
|
// Copyright 2020 The go-ethereum Authors
|
||||||
|
// This file is part of the go-ethereum library.
|
||||||
|
//
|
||||||
|
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Lesser General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 3 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU Lesser General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU Lesser General Public License
|
||||||
|
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
package pruner |
||||||
|
|
||||||
|
import ( |
||||||
|
"bytes" |
||||||
|
"encoding/binary" |
||||||
|
"errors" |
||||||
|
"fmt" |
||||||
|
"math" |
||||||
|
"os" |
||||||
|
"path/filepath" |
||||||
|
"strings" |
||||||
|
"time" |
||||||
|
|
||||||
|
"github.com/ethereum/go-ethereum/common" |
||||||
|
"github.com/ethereum/go-ethereum/core/rawdb" |
||||||
|
"github.com/ethereum/go-ethereum/core/state" |
||||||
|
"github.com/ethereum/go-ethereum/core/state/snapshot" |
||||||
|
"github.com/ethereum/go-ethereum/core/types" |
||||||
|
"github.com/ethereum/go-ethereum/crypto" |
||||||
|
"github.com/ethereum/go-ethereum/ethdb" |
||||||
|
"github.com/ethereum/go-ethereum/log" |
||||||
|
"github.com/ethereum/go-ethereum/rlp" |
||||||
|
"github.com/ethereum/go-ethereum/trie" |
||||||
|
) |
||||||
|
|
||||||
|
const ( |
||||||
|
// stateBloomFilePrefix is the filename prefix of state bloom filter.
|
||||||
|
stateBloomFilePrefix = "statebloom" |
||||||
|
|
||||||
|
// stateBloomFilePrefix is the filename suffix of state bloom filter.
|
||||||
|
stateBloomFileSuffix = "bf.gz" |
||||||
|
|
||||||
|
// stateBloomFileTempSuffix is the filename suffix of state bloom filter
|
||||||
|
// while it is being written out to detect write aborts.
|
||||||
|
stateBloomFileTempSuffix = ".tmp" |
||||||
|
|
||||||
|
// rangeCompactionThreshold is the minimal deleted entry number for
|
||||||
|
// triggering range compaction. It's a quite arbitrary number but just
|
||||||
|
// to avoid triggering range compaction because of small deletion.
|
||||||
|
rangeCompactionThreshold = 100000 |
||||||
|
) |
||||||
|
|
||||||
|
var ( |
||||||
|
// emptyRoot is the known root hash of an empty trie.
|
||||||
|
emptyRoot = common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421") |
||||||
|
|
||||||
|
// emptyCode is the known hash of the empty EVM bytecode.
|
||||||
|
emptyCode = crypto.Keccak256(nil) |
||||||
|
) |
||||||
|
|
||||||
|
// Pruner is an offline tool to prune the stale state with the
|
||||||
|
// help of the snapshot. The workflow of pruner is very simple:
|
||||||
|
//
|
||||||
|
// - iterate the snapshot, reconstruct the relevant state
|
||||||
|
// - iterate the database, delete all other state entries which
|
||||||
|
// don't belong to the target state and the genesis state
|
||||||
|
//
|
||||||
|
// It can take several hours(around 2 hours for mainnet) to finish
|
||||||
|
// the whole pruning work. It's recommended to run this offline tool
|
||||||
|
// periodically in order to release the disk usage and improve the
|
||||||
|
// disk read performance to some extent.
|
||||||
|
type Pruner struct { |
||||||
|
db ethdb.Database |
||||||
|
stateBloom *stateBloom |
||||||
|
datadir string |
||||||
|
trieCachePath string |
||||||
|
headHeader *types.Header |
||||||
|
snaptree *snapshot.Tree |
||||||
|
} |
||||||
|
|
||||||
|
// NewPruner creates the pruner instance.
|
||||||
|
func NewPruner(db ethdb.Database, headHeader *types.Header, datadir, trieCachePath string, bloomSize uint64) (*Pruner, error) { |
||||||
|
snaptree, err := snapshot.New(db, trie.NewDatabase(db), 256, headHeader.Root, false, false, false) |
||||||
|
if err != nil { |
||||||
|
return nil, err // The relevant snapshot(s) might not exist
|
||||||
|
} |
||||||
|
// Sanitize the bloom filter size if it's too small.
|
||||||
|
if bloomSize < 256 { |
||||||
|
log.Warn("Sanitizing bloomfilter size", "provided(MB)", bloomSize, "updated(MB)", 256) |
||||||
|
bloomSize = 256 |
||||||
|
} |
||||||
|
stateBloom, err := newStateBloomWithSize(bloomSize) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
return &Pruner{ |
||||||
|
db: db, |
||||||
|
stateBloom: stateBloom, |
||||||
|
datadir: datadir, |
||||||
|
trieCachePath: trieCachePath, |
||||||
|
headHeader: headHeader, |
||||||
|
snaptree: snaptree, |
||||||
|
}, nil |
||||||
|
} |
||||||
|
|
||||||
|
func prune(maindb ethdb.Database, stateBloom *stateBloom, middleStateRoots map[common.Hash]struct{}, start time.Time) error { |
||||||
|
// Delete all stale trie nodes in the disk. With the help of state bloom
|
||||||
|
// the trie nodes(and codes) belong to the active state will be filtered
|
||||||
|
// out. A very small part of stale tries will also be filtered because of
|
||||||
|
// the false-positive rate of bloom filter. But the assumption is held here
|
||||||
|
// that the false-positive is low enough(~0.05%). The probablity of the
|
||||||
|
// dangling node is the state root is super low. So the dangling nodes in
|
||||||
|
// theory will never ever be visited again.
|
||||||
|
var ( |
||||||
|
count int |
||||||
|
size common.StorageSize |
||||||
|
pstart = time.Now() |
||||||
|
logged = time.Now() |
||||||
|
batch = maindb.NewBatch() |
||||||
|
iter = maindb.NewIterator(nil, nil) |
||||||
|
) |
||||||
|
for iter.Next() { |
||||||
|
key := iter.Key() |
||||||
|
|
||||||
|
// All state entries don't belong to specific state and genesis are deleted here
|
||||||
|
// - trie node
|
||||||
|
// - legacy contract code
|
||||||
|
// - new-scheme contract code
|
||||||
|
isCode, codeKey := rawdb.IsCodeKey(key) |
||||||
|
if len(key) == common.HashLength || isCode { |
||||||
|
checkKey := key |
||||||
|
if isCode { |
||||||
|
checkKey = codeKey |
||||||
|
} |
||||||
|
if _, exist := middleStateRoots[common.BytesToHash(checkKey)]; exist { |
||||||
|
log.Debug("Forcibly delete the middle state roots", "hash", common.BytesToHash(checkKey)) |
||||||
|
} else { |
||||||
|
if ok, err := stateBloom.Contain(checkKey); err != nil { |
||||||
|
return err |
||||||
|
} else if ok { |
||||||
|
continue |
||||||
|
} |
||||||
|
} |
||||||
|
count += 1 |
||||||
|
size += common.StorageSize(len(key) + len(iter.Value())) |
||||||
|
batch.Delete(key) |
||||||
|
|
||||||
|
var eta time.Duration // Realistically will never remain uninited
|
||||||
|
if done := binary.BigEndian.Uint64(key[:8]); done > 0 { |
||||||
|
var ( |
||||||
|
left = math.MaxUint64 - binary.BigEndian.Uint64(key[:8]) |
||||||
|
speed = done/uint64(time.Since(start)/time.Millisecond+1) + 1 // +1s to avoid division by zero
|
||||||
|
) |
||||||
|
eta = time.Duration(left/speed) * time.Millisecond |
||||||
|
} |
||||||
|
if time.Since(logged) > 8*time.Second { |
||||||
|
log.Info("Pruning state data", "nodes", count, "size", size, |
||||||
|
"elapsed", common.PrettyDuration(time.Since(pstart)), "eta", common.PrettyDuration(eta)) |
||||||
|
logged = time.Now() |
||||||
|
} |
||||||
|
// Recreate the iterator after every batch commit in order
|
||||||
|
// to allow the underlying compactor to delete the entries.
|
||||||
|
if batch.ValueSize() >= ethdb.IdealBatchSize { |
||||||
|
batch.Write() |
||||||
|
batch.Reset() |
||||||
|
|
||||||
|
iter.Release() |
||||||
|
iter = maindb.NewIterator(nil, key) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
if batch.ValueSize() > 0 { |
||||||
|
batch.Write() |
||||||
|
batch.Reset() |
||||||
|
} |
||||||
|
iter.Release() |
||||||
|
log.Info("Pruned state data", "nodes", count, "size", size, "elapsed", common.PrettyDuration(time.Since(pstart))) |
||||||
|
|
||||||
|
// Start compactions, will remove the deleted data from the disk immediately.
|
||||||
|
// Note for small pruning, the compaction is skipped.
|
||||||
|
if count >= rangeCompactionThreshold { |
||||||
|
cstart := time.Now() |
||||||
|
|
||||||
|
for b := byte(0); b < byte(16); b++ { |
||||||
|
log.Info("Compacting database", "range", fmt.Sprintf("%#x-%#x", b, b+1), "elapsed", common.PrettyDuration(time.Since(cstart))) |
||||||
|
if err := maindb.Compact([]byte{b}, []byte{b + 1}); err != nil { |
||||||
|
log.Error("Database compaction failed", "error", err) |
||||||
|
return err |
||||||
|
} |
||||||
|
} |
||||||
|
log.Info("Database compaction finished", "elapsed", common.PrettyDuration(time.Since(cstart))) |
||||||
|
} |
||||||
|
log.Info("State pruning successful", "pruned", size, "elapsed", common.PrettyDuration(time.Since(start))) |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
// Prune deletes all historical state nodes except the nodes belong to the
|
||||||
|
// specified state version. If user doesn't specify the state version, use
|
||||||
|
// the bottom-most snapshot diff layer as the target.
|
||||||
|
func (p *Pruner) Prune(root common.Hash) error { |
||||||
|
// If the state bloom filter is already committed previously,
|
||||||
|
// reuse it for pruning instead of generating a new one. It's
|
||||||
|
// mandatory because a part of state may already be deleted,
|
||||||
|
// the recovery procedure is necessary.
|
||||||
|
_, stateBloomRoot, err := findBloomFilter(p.datadir) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
if stateBloomRoot != (common.Hash{}) { |
||||||
|
return RecoverPruning(p.datadir, p.db, p.trieCachePath) |
||||||
|
} |
||||||
|
// If the target state root is not specified, use the HEAD-127 as the
|
||||||
|
// target. The reason for picking it is:
|
||||||
|
// - in most of the normal cases, the related state is available
|
||||||
|
// - the probability of this layer being reorg is very low
|
||||||
|
var layers []snapshot.Snapshot |
||||||
|
if root == (common.Hash{}) { |
||||||
|
// Retrieve all snapshot layers from the current HEAD.
|
||||||
|
// In theory there are 128 difflayers + 1 disk layer present,
|
||||||
|
// so 128 diff layers are expected to be returned.
|
||||||
|
layers = p.snaptree.Snapshots(p.headHeader.Root, 128, true) |
||||||
|
if len(layers) != 128 { |
||||||
|
// Reject if the accumulated diff layers are less than 128. It
|
||||||
|
// means in most of normal cases, there is no associated state
|
||||||
|
// with bottom-most diff layer.
|
||||||
|
return errors.New("the snapshot difflayers are less than 128") |
||||||
|
} |
||||||
|
// Use the bottom-most diff layer as the target
|
||||||
|
root = layers[len(layers)-1].Root() |
||||||
|
} |
||||||
|
// Ensure the root is really present. The weak assumption
|
||||||
|
// is the presence of root can indicate the presence of the
|
||||||
|
// entire trie.
|
||||||
|
if blob := rawdb.ReadTrieNode(p.db, root); len(blob) == 0 { |
||||||
|
// The special case is for clique based networks(rinkeby, goerli
|
||||||
|
// and some other private networks), it's possible that two
|
||||||
|
// consecutive blocks will have same root. In this case snapshot
|
||||||
|
// difflayer won't be created. So HEAD-127 may not paired with
|
||||||
|
// head-127 layer. Instead the paired layer is higher than the
|
||||||
|
// bottom-most diff layer. Try to find the bottom-most snapshot
|
||||||
|
// layer with state available.
|
||||||
|
//
|
||||||
|
// Note HEAD and HEAD-1 is ignored. Usually there is the associated
|
||||||
|
// state available, but we don't want to use the topmost state
|
||||||
|
// as the pruning target.
|
||||||
|
var found bool |
||||||
|
for i := len(layers) - 2; i >= 2; i-- { |
||||||
|
if blob := rawdb.ReadTrieNode(p.db, layers[i].Root()); len(blob) != 0 { |
||||||
|
root = layers[i].Root() |
||||||
|
found = true |
||||||
|
log.Info("Selecting middle-layer as the pruning target", "root", root, "depth", i) |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
if !found { |
||||||
|
if len(layers) > 0 { |
||||||
|
return errors.New("no snapshot paired state") |
||||||
|
} |
||||||
|
return fmt.Errorf("associated state[%x] is not present", root) |
||||||
|
} |
||||||
|
} else { |
||||||
|
if len(layers) > 0 { |
||||||
|
log.Info("Selecting bottom-most difflayer as the pruning target", "root", root, "height", p.headHeader.Number.Uint64()-127) |
||||||
|
} else { |
||||||
|
log.Info("Selecting user-specified state as the pruning target", "root", root) |
||||||
|
} |
||||||
|
} |
||||||
|
// Before start the pruning, delete the clean trie cache first.
|
||||||
|
// It's necessary otherwise in the next restart we will hit the
|
||||||
|
// deleted state root in the "clean cache" so that the incomplete
|
||||||
|
// state is picked for usage.
|
||||||
|
deleteCleanTrieCache(p.trieCachePath) |
||||||
|
|
||||||
|
// All the state roots of the middle layer should be forcibly pruned,
|
||||||
|
// otherwise the dangling state will be left.
|
||||||
|
middleRoots := make(map[common.Hash]struct{}) |
||||||
|
for _, layer := range layers { |
||||||
|
if layer.Root() == root { |
||||||
|
break |
||||||
|
} |
||||||
|
middleRoots[layer.Root()] = struct{}{} |
||||||
|
} |
||||||
|
// Traverse the target state, re-construct the whole state trie and
|
||||||
|
// commit to the given bloom filter.
|
||||||
|
start := time.Now() |
||||||
|
if err := snapshot.GenerateTrie(p.snaptree, root, p.db, p.stateBloom); err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
// Traverse the genesis, put all genesis state entries into the
|
||||||
|
// bloom filter too.
|
||||||
|
if err := extractGenesis(p.db, p.stateBloom); err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
filterName := bloomFilterName(p.datadir, root) |
||||||
|
|
||||||
|
log.Info("Writing state bloom to disk", "name", filterName) |
||||||
|
if err := p.stateBloom.Commit(filterName, filterName+stateBloomFileTempSuffix); err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
log.Info("State bloom filter committed", "name", filterName) |
||||||
|
|
||||||
|
if err := prune(p.db, p.stateBloom, middleRoots, start); err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
// Pruning is done, now drop the "useless" layers from the snapshot.
|
||||||
|
// Firstly, flushing the target layer into the disk. After that all
|
||||||
|
// diff layers below the target will all be merged into the disk.
|
||||||
|
if err := p.snaptree.Cap(root, 0); err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
// Secondly, flushing the snapshot journal into the disk. All diff
|
||||||
|
// layers upon the target layer are dropped silently. Eventually the
|
||||||
|
// entire snapshot tree is converted into a single disk layer with
|
||||||
|
// the pruning target as the root.
|
||||||
|
if _, err := p.snaptree.Journal(root); err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
// Delete the state bloom, it marks the entire pruning procedure is
|
||||||
|
// finished. If any crashes or manual exit happens before this,
|
||||||
|
// `RecoverPruning` will pick it up in the next restarts to redo all
|
||||||
|
// the things.
|
||||||
|
os.RemoveAll(filterName) |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
// RecoverPruning will resume the pruning procedure during the system restart.
|
||||||
|
// This function is used in this case: user tries to prune state data, but the
|
||||||
|
// system was interrupted midway because of crash or manual-kill. In this case
|
||||||
|
// if the bloom filter for filtering active state is already constructed, the
|
||||||
|
// pruning can be resumed. What's more if the bloom filter is constructed, the
|
||||||
|
// pruning **has to be resumed**. Otherwise a lot of dangling nodes may be left
|
||||||
|
// in the disk.
|
||||||
|
func RecoverPruning(datadir string, db ethdb.Database, trieCachePath string) error { |
||||||
|
stateBloomPath, stateBloomRoot, err := findBloomFilter(datadir) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
if stateBloomPath == "" { |
||||||
|
return nil // nothing to recover
|
||||||
|
} |
||||||
|
headHeader, err := getHeadHeader(db) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
// Initialize the snapshot tree in recovery mode to handle this special case:
|
||||||
|
// - Users run the `prune-state` command multiple times
|
||||||
|
// - Neither these `prune-state` running is finished(e.g. interrupted manually)
|
||||||
|
// - The state bloom filter is already generated, a part of state is deleted,
|
||||||
|
// so that resuming the pruning here is mandatory
|
||||||
|
// - The state HEAD is rewound already because of multiple incomplete `prune-state`
|
||||||
|
// In this case, even the state HEAD is not exactly matched with snapshot, it
|
||||||
|
// still feasible to recover the pruning correctly.
|
||||||
|
snaptree, err := snapshot.New(db, trie.NewDatabase(db), 256, headHeader.Root, false, false, true) |
||||||
|
if err != nil { |
||||||
|
return err // The relevant snapshot(s) might not exist
|
||||||
|
} |
||||||
|
stateBloom, err := NewStateBloomFromDisk(stateBloomPath) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
log.Info("Loaded state bloom filter", "path", stateBloomPath) |
||||||
|
|
||||||
|
// Before start the pruning, delete the clean trie cache first.
|
||||||
|
// It's necessary otherwise in the next restart we will hit the
|
||||||
|
// deleted state root in the "clean cache" so that the incomplete
|
||||||
|
// state is picked for usage.
|
||||||
|
deleteCleanTrieCache(trieCachePath) |
||||||
|
|
||||||
|
// All the state roots of the middle layers should be forcibly pruned,
|
||||||
|
// otherwise the dangling state will be left.
|
||||||
|
var ( |
||||||
|
found bool |
||||||
|
layers = snaptree.Snapshots(headHeader.Root, 128, true) |
||||||
|
middleRoots = make(map[common.Hash]struct{}) |
||||||
|
) |
||||||
|
for _, layer := range layers { |
||||||
|
if layer.Root() == stateBloomRoot { |
||||||
|
found = true |
||||||
|
break |
||||||
|
} |
||||||
|
middleRoots[layer.Root()] = struct{}{} |
||||||
|
} |
||||||
|
if !found { |
||||||
|
log.Error("Pruning target state is not existent") |
||||||
|
return errors.New("non-existent target state") |
||||||
|
} |
||||||
|
if err := prune(db, stateBloom, middleRoots, time.Now()); err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
// Pruning is done, now drop the "useless" layers from the snapshot.
|
||||||
|
// Firstly, flushing the target layer into the disk. After that all
|
||||||
|
// diff layers below the target will all be merged into the disk.
|
||||||
|
if err := snaptree.Cap(stateBloomRoot, 0); err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
// Secondly, flushing the snapshot journal into the disk. All diff
|
||||||
|
// layers upon are dropped silently. Eventually the entire snapshot
|
||||||
|
// tree is converted into a single disk layer with the pruning target
|
||||||
|
// as the root.
|
||||||
|
if _, err := snaptree.Journal(stateBloomRoot); err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
// Delete the state bloom, it marks the entire pruning procedure is
|
||||||
|
// finished. If any crashes or manual exit happens before this,
|
||||||
|
// `RecoverPruning` will pick it up in the next restarts to redo all
|
||||||
|
// the things.
|
||||||
|
os.RemoveAll(stateBloomPath) |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
// extractGenesis loads the genesis state and commits all the state entries
|
||||||
|
// into the given bloomfilter.
|
||||||
|
func extractGenesis(db ethdb.Database, stateBloom *stateBloom) error { |
||||||
|
genesisHash := rawdb.ReadCanonicalHash(db, 0) |
||||||
|
if genesisHash == (common.Hash{}) { |
||||||
|
return errors.New("missing genesis hash") |
||||||
|
} |
||||||
|
genesis := rawdb.ReadBlock(db, genesisHash, 0) |
||||||
|
if genesis == nil { |
||||||
|
return errors.New("missing genesis block") |
||||||
|
} |
||||||
|
t, err := trie.NewSecure(genesis.Root(), trie.NewDatabase(db)) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
accIter := t.NodeIterator(nil) |
||||||
|
for accIter.Next(true) { |
||||||
|
hash := accIter.Hash() |
||||||
|
|
||||||
|
// Embedded nodes don't have hash.
|
||||||
|
if hash != (common.Hash{}) { |
||||||
|
stateBloom.Put(hash.Bytes(), nil) |
||||||
|
} |
||||||
|
// If it's a leaf node, yes we are touching an account,
|
||||||
|
// dig into the storage trie further.
|
||||||
|
if accIter.Leaf() { |
||||||
|
var acc state.Account |
||||||
|
if err := rlp.DecodeBytes(accIter.LeafBlob(), &acc); err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
if acc.Root != emptyRoot { |
||||||
|
storageTrie, err := trie.NewSecure(acc.Root, trie.NewDatabase(db)) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
storageIter := storageTrie.NodeIterator(nil) |
||||||
|
for storageIter.Next(true) { |
||||||
|
hash := storageIter.Hash() |
||||||
|
if hash != (common.Hash{}) { |
||||||
|
stateBloom.Put(hash.Bytes(), nil) |
||||||
|
} |
||||||
|
} |
||||||
|
if storageIter.Error() != nil { |
||||||
|
return storageIter.Error() |
||||||
|
} |
||||||
|
} |
||||||
|
if !bytes.Equal(acc.CodeHash, emptyCode) { |
||||||
|
stateBloom.Put(acc.CodeHash, nil) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
return accIter.Error() |
||||||
|
} |
||||||
|
|
||||||
|
func bloomFilterName(datadir string, hash common.Hash) string { |
||||||
|
return filepath.Join(datadir, fmt.Sprintf("%s.%s.%s", stateBloomFilePrefix, hash.Hex(), stateBloomFileSuffix)) |
||||||
|
} |
||||||
|
|
||||||
|
func isBloomFilter(filename string) (bool, common.Hash) { |
||||||
|
filename = filepath.Base(filename) |
||||||
|
if strings.HasPrefix(filename, stateBloomFilePrefix) && strings.HasSuffix(filename, stateBloomFileSuffix) { |
||||||
|
return true, common.HexToHash(filename[len(stateBloomFilePrefix)+1 : len(filename)-len(stateBloomFileSuffix)-1]) |
||||||
|
} |
||||||
|
return false, common.Hash{} |
||||||
|
} |
||||||
|
|
||||||
|
func findBloomFilter(datadir string) (string, common.Hash, error) { |
||||||
|
var ( |
||||||
|
stateBloomPath string |
||||||
|
stateBloomRoot common.Hash |
||||||
|
) |
||||||
|
if err := filepath.Walk(datadir, func(path string, info os.FileInfo, err error) error { |
||||||
|
if info != nil && !info.IsDir() { |
||||||
|
ok, root := isBloomFilter(path) |
||||||
|
if ok { |
||||||
|
stateBloomPath = path |
||||||
|
stateBloomRoot = root |
||||||
|
} |
||||||
|
} |
||||||
|
return nil |
||||||
|
}); err != nil { |
||||||
|
return "", common.Hash{}, err |
||||||
|
} |
||||||
|
return stateBloomPath, stateBloomRoot, nil |
||||||
|
} |
||||||
|
|
||||||
|
func getHeadHeader(db ethdb.Database) (*types.Header, error) { |
||||||
|
headHeaderHash := rawdb.ReadHeadBlockHash(db) |
||||||
|
if headHeaderHash == (common.Hash{}) { |
||||||
|
return nil, errors.New("empty head block hash") |
||||||
|
} |
||||||
|
headHeaderNumber := rawdb.ReadHeaderNumber(db, headHeaderHash) |
||||||
|
if headHeaderNumber == nil { |
||||||
|
return nil, errors.New("empty head block number") |
||||||
|
} |
||||||
|
headHeader := rawdb.ReadHeader(db, headHeaderHash, *headHeaderNumber) |
||||||
|
if headHeader == nil { |
||||||
|
return nil, errors.New("empty head header") |
||||||
|
} |
||||||
|
return headHeader, nil |
||||||
|
} |
||||||
|
|
||||||
|
const warningLog = ` |
||||||
|
|
||||||
|
WARNING! |
||||||
|
|
||||||
|
The clean trie cache is not found. Please delete it by yourself after the
|
||||||
|
pruning. Remember don't start the Geth without deleting the clean trie cache |
||||||
|
otherwise the entire database may be damaged! |
||||||
|
|
||||||
|
Check the command description "geth snapshot prune-state --help" for more details. |
||||||
|
` |
||||||
|
|
||||||
|
func deleteCleanTrieCache(path string) { |
||||||
|
if _, err := os.Stat(path); os.IsNotExist(err) { |
||||||
|
log.Warn(warningLog) |
||||||
|
return |
||||||
|
} |
||||||
|
os.RemoveAll(path) |
||||||
|
log.Info("Deleted trie clean cache", "path", path) |
||||||
|
} |
Loading…
Reference in new issue