@ -22,6 +22,7 @@ import (
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/common/prque"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/ethdb"
)
@ -37,7 +38,7 @@ var ErrAlreadyProcessed = errors.New("already processed")
type request struct {
hash common . Hash // Hash of the node data content to retrieve
data [ ] byte // Data content of the node, cached until all subtrees complete
raw bool // Whether this is a raw entry (code) or a trie node
code bool // Whether this is a code entry
parents [ ] * request // Parent state nodes referencing this entry (notify all upon completion)
depth int // Depth level within the trie the node is located to prioritise DFS
@ -46,8 +47,7 @@ type request struct {
callback LeafCallback // Callback to invoke if a leaf node it reached on this branch
}
// SyncResult is a simple list to return missing nodes along with their request
// hashes.
// SyncResult is a response with requested data along with it's hash.
type SyncResult struct {
Hash common . Hash // Hash of the originally unknown trie node
Data [ ] byte // Data content of the retrieved node
@ -56,25 +56,40 @@ type SyncResult struct {
// syncMemBatch is an in-memory buffer of successfully downloaded but not yet
// persisted data items.
type syncMemBatch struct {
batch map [ common . Hash ] [ ] byte // In-memory membatch of recently completed items
nodes map [ common . Hash ] [ ] byte // In-memory membatch of recently completed nodes
codes map [ common . Hash ] [ ] byte // In-memory membatch of recently completed codes
}
// newSyncMemBatch allocates a new memory-buffer for not-yet persisted trie nodes.
func newSyncMemBatch ( ) * syncMemBatch {
return & syncMemBatch {
batch : make ( map [ common . Hash ] [ ] byte ) ,
nodes : make ( map [ common . Hash ] [ ] byte ) ,
codes : make ( map [ common . Hash ] [ ] byte ) ,
}
}
// hasNode reports the trie node with specific hash is already cached.
func ( batch * syncMemBatch ) hasNode ( hash common . Hash ) bool {
_ , ok := batch . nodes [ hash ]
return ok
}
// hasCode reports the contract code with specific hash is already cached.
func ( batch * syncMemBatch ) hasCode ( hash common . Hash ) bool {
_ , ok := batch . codes [ hash ]
return ok
}
// Sync is the main state trie synchronisation scheduler, which provides yet
// unknown trie hashes to retrieve, accepts node data associated with said hashes
// and reconstructs the trie step by step until all is done.
type Sync struct {
database ethdb . KeyValueReader // Persistent database to check for existing entries
membatch * syncMemBatch // Memory buffer to avoid frequent database writes
requests map [ common . Hash ] * request // Pending requests pertaining to a key hash
nodeReqs map [ common . Hash ] * request // Pending requests pertaining to a trie node hash
codeReqs map [ common . Hash ] * request // Pending requests pertaining to a code hash
queue * prque . Prque // Priority queue with the pending requests
bloom * SyncBloom // Bloom filter for fast node existence checks
bloom * SyncBloom // Bloom filter for fast stat e existence checks
}
// NewSync creates a new trie data download scheduler.
@ -82,7 +97,8 @@ func NewSync(root common.Hash, database ethdb.KeyValueReader, callback LeafCallb
ts := & Sync {
database : database ,
membatch : newSyncMemBatch ( ) ,
requests : make ( map [ common . Hash ] * request ) ,
nodeReqs : make ( map [ common . Hash ] * request ) ,
codeReqs : make ( map [ common . Hash ] * request ) ,
queue : prque . New ( nil ) ,
bloom : bloom ,
}
@ -96,13 +112,15 @@ func (s *Sync) AddSubTrie(root common.Hash, depth int, parent common.Hash, callb
if root == emptyRoot {
return
}
if _ , ok := s . membatch . batch [ root ] ; ok {
if s . membatch . hasNode ( root ) {
return
}
if s . bloom == nil || s . bloom . Contains ( root [ : ] ) {
// Bloom filter says this might be a duplicate, double check
blob , _ := s . database . Get ( root [ : ] )
if local , err := decodeNode ( root [ : ] , blob ) ; local != nil && err == nil {
// Bloom filter says this might be a duplicate, double check.
// If database says yes, then at least the trie node is present
// and we hold the assumption that it's NOT legacy contract code.
blob := rawdb . ReadTrieNode ( s . database , root )
if len ( blob ) > 0 {
return
}
// False positive, bump fault meter
@ -116,7 +134,7 @@ func (s *Sync) AddSubTrie(root common.Hash, depth int, parent common.Hash, callb
}
// If this sub-trie has a designated parent, link them together
if parent != ( common . Hash { } ) {
ancestor := s . request s[ parent ]
ancestor := s . nodeReq s[ parent ]
if ancestor == nil {
panic ( fmt . Sprintf ( "sub-trie ancestor not found: %x" , parent ) )
}
@ -126,21 +144,25 @@ func (s *Sync) AddSubTrie(root common.Hash, depth int, parent common.Hash, callb
s . schedule ( req )
}
// AddRawEntry schedules the direct retrieval of a state entry that should not be
// interpreted as a trie node, but rather accepted and stored into the database
// as is. This method's goal is to support misc state metadata retrievals (e.g.
// contract code).
func ( s * Sync ) AddRawEntry ( hash common . Hash , depth int , parent common . Hash ) {
// AddCodeEntry schedules the direct retrieval of a contract code that should not
// be interpreted as a trie node, but rather accepted and stored into the database
// as is.
func ( s * Sync ) AddCodeEntry ( hash common . Hash , depth int , parent common . Hash ) {
// Short circuit if the entry is empty or already known
if hash == emptyState {
return
}
if _ , ok := s . membatch . batch [ hash ] ; ok {
if s . membatch . hasCode ( hash ) {
return
}
if s . bloom == nil || s . bloom . Contains ( hash [ : ] ) {
// Bloom filter says this might be a duplicate, double check
if ok , _ := s . database . Has ( hash [ : ] ) ; ok {
// Bloom filter says this might be a duplicate, double check.
// If database says yes, the blob is present for sure.
// Note we only check the existence with new code scheme, fast
// sync is expected to run with a fresh new node. Even there
// exists the code with legacy format, fetch and store with
// new scheme anyway.
if blob := rawdb . ReadCodeWithPrefix ( s . database , hash ) ; len ( blob ) > 0 {
return
}
// False positive, bump fault meter
@ -149,12 +171,12 @@ func (s *Sync) AddRawEntry(hash common.Hash, depth int, parent common.Hash) {
// Assemble the new sub-trie sync request
req := & request {
hash : hash ,
raw : true ,
code : true ,
depth : depth ,
}
// If this sub-trie has a designated parent, link them together
if parent != ( common . Hash { } ) {
ancestor := s . request s[ parent ]
ancestor := s . nodeReq s[ parent ] // the parent of codereq can ONLY be nodereq
if ancestor == nil {
panic ( fmt . Sprintf ( "raw-entry ancestor not found: %x" , parent ) )
}
@ -173,61 +195,64 @@ func (s *Sync) Missing(max int) []common.Hash {
return requests
}
// Process injects a batch of retrieved trie nodes data, returning if something
// was committed to the database and also the index of an entry if its processing
// failed.
func ( s * Sync ) Process ( results [ ] SyncResult ) ( bool , int , error ) {
committed := false
for i , item := range results {
// If the item was not requested, bail out
request := s . requests [ item . Hash ]
if request == nil {
return committed , i , ErrNotRequested
}
if request . data != nil {
return committed , i , ErrAlreadyProcessed
}
// If the item is a raw entry request, commit directly
if request . raw {
request . data = item . Data
s . commit ( request )
committed = true
continue
}
// Process injects the received data for requested item. Note it can
// happpen that the single response commits two pending requests(e.g.
// there are two requests one for code and one for node but the hash
// is same). In this case the second response for the same hash will
// be treated as "non-requested" item or "already-processed" item but
// there is no downside.
func ( s * Sync ) Process ( result SyncResult ) error {
// If the item was not requested either for code or node, bail out
if s . nodeReqs [ result . Hash ] == nil && s . codeReqs [ result . Hash ] == nil {
return ErrNotRequested
}
// There is an pending code request for this data, commit directly
var filled bool
if req := s . codeReqs [ result . Hash ] ; req != nil && req . data == nil {
filled = true
req . data = result . Data
s . commit ( req )
}
// There is an pending node request for this data, fill it.
if req := s . nodeReqs [ result . Hash ] ; req != nil && req . data == nil {
filled = true
// Decode the node data content and update the request
node , err := decodeNode ( item . Hash [ : ] , item . Data )
node , err := decodeNode ( result . Hash [ : ] , result . Data )
if err != nil {
return committed , i , err
return err
}
request . data = item . Data
req . data = result . Data
// Create and schedule a request for all the children nodes
requests , err := s . children ( request , node )
requests , err := s . children ( req , node )
if err != nil {
return committed , i , err
}
if len ( requests ) == 0 && request . deps == 0 {
s . commit ( request )
committed = true
continue
return err
}
request . deps += len ( requests )
if len ( requests ) == 0 && req . deps == 0 {
s . commit ( req )
} else {
req . deps += len ( requests )
for _ , child := range requests {
s . schedule ( child )
}
}
return committed , 0 , nil
}
if ! filled {
return ErrAlreadyProcessed
}
return nil
}
// Commit flushes the data stored in the internal membatch out to persistent
// storage, returning any occurred error.
func ( s * Sync ) Commit ( dbw ethdb . Batch ) error {
// Dump the membatch into a database dbw
for key , value := range s . membatch . batch {
if err := dbw . Put ( key [ : ] , value ) ; err != nil {
return err
for key , value := range s . membatch . nodes {
rawdb . WriteTrieNode ( dbw , key , value )
s . bloom . Add ( key [ : ] )
}
for key , value := range s . membatch . codes {
rawdb . WriteCode ( dbw , key , value )
s . bloom . Add ( key [ : ] )
}
// Drop the membatch data and return
@ -237,21 +262,30 @@ func (s *Sync) Commit(dbw ethdb.Batch) error {
// Pending returns the number of state entries currently pending for download.
func ( s * Sync ) Pending ( ) int {
return len ( s . request s)
return len ( s . nodeReqs ) + len ( s . codeReq s)
}
// schedule inserts a new state retrieval request into the fetch queue. If there
// is already a pending request for this node, the new request will be discarded
// and only a parent reference added to the old one.
func ( s * Sync ) schedule ( req * request ) {
var reqset = s . nodeReqs
if req . code {
reqset = s . codeReqs
}
// If we're already requesting this node, add a new reference and stop
if old , ok := s . requests [ req . hash ] ; ok {
if old , ok := reqse t [ req . hash ] ; ok {
old . parents = append ( old . parents , req . parents ... )
return
}
// Schedule the request for future retrieval
reqset [ req . hash ] = req
// Schedule the request for future retrieval. This queue is shared
// by both node requests and code requests. It can happen that there
// is a trie node and code has same hash. In this case two elements
// with same hash and same or different depth will be pushed. But it's
// ok the worst case is the second response will be treated as duplicated.
s . queue . Push ( req . hash , int64 ( req . depth ) )
s . requests [ req . hash ] = req
}
// children retrieves all the missing children of a state trie entry for future
@ -297,12 +331,14 @@ func (s *Sync) children(req *request, object node) ([]*request, error) {
if node , ok := ( child . node ) . ( hashNode ) ; ok {
// Try to resolve the node from the local database
hash := common . BytesToHash ( node )
if _ , ok := s . membatch . batch [ hash ] ; ok {
if s . membatch . hasNode ( hash ) {
continue
}
if s . bloom == nil || s . bloom . Contains ( node ) {
// Bloom filter says this might be a duplicate, double check
if ok , _ := s . database . Has ( node ) ; ok {
// Bloom filter says this might be a duplicate, double check.
// If database says yes, then at least the trie node is present
// and we hold the assumption that it's NOT legacy contract code.
if blob := rawdb . ReadTrieNode ( s . database , common . BytesToHash ( node ) ) ; len ( blob ) > 0 {
continue
}
// False positive, bump fault meter
@ -325,10 +361,13 @@ func (s *Sync) children(req *request, object node) ([]*request, error) {
// committed themselves.
func ( s * Sync ) commit ( req * request ) ( err error ) {
// Write the node content to the membatch
s . membatch . batch [ req . hash ] = req . data
delete ( s . requests , req . hash )
if req . code {
s . membatch . codes [ req . hash ] = req . data
delete ( s . codeReqs , req . hash )
} else {
s . membatch . nodes [ req . hash ] = req . data
delete ( s . nodeReqs , req . hash )
}
// Check all parents for completion
for _ , parent := range req . parents {
parent . deps --