@ -59,6 +59,11 @@ const secureKeyLength = 11 + 32
// Database is an intermediate write layer between the trie data structures and
// the disk database. The aim is to accumulate trie writes in-memory and only
// periodically flush a couple tries to disk, garbage collecting the remainder.
//
// Note, the trie Database is **not** thread safe in its mutations, but it **is**
// thread safe in providing individual, independent node access. The rationale
// behind this split design is to provide read access to RPC handlers and sync
// servers even while the trie is executing expensive garbage collection.
type Database struct {
diskdb ethdb . KeyValueStore // Persistent storage for matured trie nodes
@ -465,8 +470,8 @@ func (db *Database) Nodes() []common.Hash {
// Reference adds a new reference from a parent node to a child node.
func ( db * Database ) Reference ( child common . Hash , parent common . Hash ) {
db . lock . R Lock( )
defer db . lock . R Unlock( )
db . lock . Lock ( )
defer db . lock . Unlock ( )
db . reference ( child , parent )
}
@ -561,13 +566,14 @@ func (db *Database) dereference(child common.Hash, parent common.Hash) {
// Cap iteratively flushes old but still referenced trie nodes until the total
// memory usage goes below the given threshold.
//
// Note, this method is a non-synchronized mutator. It is unsafe to call this
// concurrently with other mutators.
func ( db * Database ) Cap ( limit common . StorageSize ) error {
// Create a database batch to flush persistent data out. It is important that
// outside code doesn't see an inconsistent state (referenced data removed from
// memory cache during commit but not yet in persistent storage). This is ensured
// by only uncaching existing data when the database write finalizes.
db . lock . RLock ( )
nodes , storage , start := len ( db . dirties ) , db . dirtiesSize , time . Now ( )
batch := db . diskdb . NewBatch ( )
@ -583,12 +589,10 @@ func (db *Database) Cap(limit common.StorageSize) error {
for hash , preimage := range db . preimages {
if err := batch . Put ( db . secureKey ( hash [ : ] ) , preimage ) ; err != nil {
log . Error ( "Failed to commit preimage from trie database" , "err" , err )
db . lock . RUnlock ( )
return err
}
if batch . ValueSize ( ) > ethdb . IdealBatchSize {
if err := batch . Write ( ) ; err != nil {
db . lock . RUnlock ( )
return err
}
batch . Reset ( )
@ -601,14 +605,12 @@ func (db *Database) Cap(limit common.StorageSize) error {
// Fetch the oldest referenced node and push into the batch
node := db . dirties [ oldest ]
if err := batch . Put ( oldest [ : ] , node . rlp ( ) ) ; err != nil {
db . lock . RUnlock ( )
return err
}
// If we exceeded the ideal batch size, commit and reset
if batch . ValueSize ( ) >= ethdb . IdealBatchSize {
if err := batch . Write ( ) ; err != nil {
log . Error ( "Failed to write flush list to disk" , "err" , err )
db . lock . RUnlock ( )
return err
}
batch . Reset ( )
@ -623,11 +625,8 @@ func (db *Database) Cap(limit common.StorageSize) error {
// Flush out any remainder data from the last batch
if err := batch . Write ( ) ; err != nil {
log . Error ( "Failed to write flush list to disk" , "err" , err )
db . lock . RUnlock ( )
return err
}
db . lock . RUnlock ( )
// Write successful, clear out the flushed data
db . lock . Lock ( )
defer db . lock . Unlock ( )
@ -661,16 +660,16 @@ func (db *Database) Cap(limit common.StorageSize) error {
}
// Commit iterates over all the children of a particular node, writes them out
// to disk, forcefully tearing down all references in both directions.
// to disk, forcefully tearing down all references in both directions. As a side
// effect, all pre-images accumulated up to this point are also written.
//
// As a side effect, all pre-images accumulated up to this point are also written.
// Note, this method is a non-synchronized mutator. It is unsafe to call this
// concurrently with other mutators.
func ( db * Database ) Commit ( node common . Hash , report bool ) error {
// Create a database batch to flush persistent data out. It is important that
// outside code doesn't see an inconsistent state (referenced data removed from
// memory cache during commit but not yet in persistent storage). This is ensured
// by only uncaching existing data when the database write finalizes.
db . lock . RLock ( )
start := time . Now ( )
batch := db . diskdb . NewBatch ( )
@ -678,41 +677,47 @@ func (db *Database) Commit(node common.Hash, report bool) error {
for hash , preimage := range db . preimages {
if err := batch . Put ( db . secureKey ( hash [ : ] ) , preimage ) ; err != nil {
log . Error ( "Failed to commit preimage from trie database" , "err" , err )
db . lock . RUnlock ( )
return err
}
// If the batch is too large, flush to disk
if batch . ValueSize ( ) > ethdb . IdealBatchSize {
if err := batch . Write ( ) ; err != nil {
db . lock . RUnlock ( )
return err
}
batch . Reset ( )
}
}
// Since we're going to replay trie node writes into the clean cache, flush out
// any batched pre-images before continuing.
if err := batch . Write ( ) ; err != nil {
return err
}
batch . Reset ( )
// Move the trie itself into the batch, flushing if enough data is accumulated
nodes , storage := len ( db . dirties ) , db . dirtiesSize
if err := db . commit ( node , batch ) ; err != nil {
uncacher := & cleaner { db }
if err := db . commit ( node , batch , uncacher ) ; err != nil {
log . Error ( "Failed to commit trie from trie database" , "err" , err )
db . lock . RUnlock ( )
return err
}
// Write batch ready, unlock for readers during persistence
// Trie mostly committed to disk, flush any batch leftovers
if err := batch . Write ( ) ; err != nil {
log . Error ( "Failed to write trie to disk" , "err" , err )
db . lock . RUnlock ( )
return err
}
db . lock . RUnlock ( )
// Write successful, clear out the flushed data
// Uncache any leftovers in the last batch
db . lock . Lock ( )
defer db . lock . Unlock ( )
batch . Replay ( uncacher )
batch . Reset ( )
// Reset the storage counters and bumpd metrics
db . preimages = make ( map [ common . Hash ] [ ] byte )
db . preimagesSize = 0
db . uncache ( node )
memcacheCommitTimeTimer . Update ( time . Since ( start ) )
memcacheCommitSizeMeter . Mark ( int64 ( storage - db . dirtiesSize ) )
memcacheCommitNodesMeter . Mark ( int64 ( nodes - len ( db . dirties ) ) )
@ -732,14 +737,14 @@ func (db *Database) Commit(node common.Hash, report bool) error {
}
// commit is the private locked version of Commit.
func ( db * Database ) commit ( hash common . Hash , batch ethdb . Batch ) error {
func ( db * Database ) commit ( hash common . Hash , batch ethdb . Batch , uncacher * cleaner ) error {
// If the node does not exist, it's a previously committed node
node , ok := db . dirties [ hash ]
if ! ok {
return nil
}
for _ , child := range node . childs ( ) {
if err := db . commit ( child , batch ) ; err != nil {
if err := db . commit ( child , batch , uncacher ) ; err != nil {
return err
}
}
@ -751,39 +756,58 @@ func (db *Database) commit(hash common.Hash, batch ethdb.Batch) error {
if err := batch . Write ( ) ; err != nil {
return err
}
db . lock . Lock ( )
batch . Replay ( uncacher )
batch . Reset ( )
db . lock . Unlock ( )
}
return nil
}
// uncache is the post-processing step of a commit operation where the already
// persisted trie is removed from the cache. The reason behind the two-phase
// commit is to ensure consistent data availability while moving from memory
// to disk.
func ( db * Database ) uncache ( hash common . Hash ) {
// cleaner is a database batch replayer that takes a batch of write operations
// and cleans up the trie database from anything written to disk.
type cleaner struct {
db * Database
}
// Put reacts to database writes and implements dirty data uncaching. This is the
// post-processing step of a commit operation where the already persisted trie is
// removed from the dirty cache and moved into the clean cache. The reason behind
// the two-phase commit is to ensure ensure data availability while moving from
// memory to disk.
func ( c * cleaner ) Put ( key [ ] byte , rlp [ ] byte ) error {
hash := common . BytesToHash ( key )
// If the node does not exist, we're done on this path
node , ok := db . dirties [ hash ]
node , ok := c . db . dirties [ hash ]
if ! ok {
return
return nil
}
// Node still exists, remove it from the flush-list
switch hash {
case db . oldest :
db . oldest = node . flushNext
db . dirties [ node . flushNext ] . flushPrev = common . Hash { }
case db . newest :
db . newest = node . flushPrev
db . dirties [ node . flushPrev ] . flushNext = common . Hash { }
case c . db . oldest :
c . db . oldest = node . flushNext
c . db . dirties [ node . flushNext ] . flushPrev = common . Hash { }
case c . db . newest :
c . db . newest = node . flushPrev
c . db . dirties [ node . flushPrev ] . flushNext = common . Hash { }
default :
db . dirties [ node . flushPrev ] . flushNext = node . flushNext
db . dirties [ node . flushNext ] . flushPrev = node . flushPrev
c . db . dirties [ node . flushPrev ] . flushNext = node . flushNext
c . db . dirties [ node . flushNext ] . flushPrev = node . flushPrev
}
// Uncache the node's subtries and remove the node itself too
for _ , child := range node . childs ( ) {
db . uncache ( child )
// Remove the node from the dirty cache
delete ( c . db . dirties , hash )
c . db . dirtiesSize -= common . StorageSize ( common . HashLength + int ( node . size ) )
// Move the flushed node into the clean cache to prevent insta-reloads
if c . db . cleans != nil {
c . db . cleans . Set ( string ( hash [ : ] ) , rlp )
}
delete ( db . dirties , hash )
db . dirtiesSize -= common . StorageSize ( common . HashLength + int ( node . size ) )
return nil
}
func ( c * cleaner ) Delete ( key [ ] byte ) error {
panic ( "Not implemented" )
}
// Size returns the current storage size of the memory cache in front of the