@ -17,6 +17,8 @@
package trie
package trie
import (
import (
"fmt"
"io"
"sync"
"sync"
"time"
"time"
@ -24,6 +26,7 @@ import (
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/metrics"
"github.com/ethereum/go-ethereum/metrics"
"github.com/ethereum/go-ethereum/rlp"
)
)
var (
var (
@ -82,25 +85,188 @@ type Database struct {
lock sync . RWMutex
lock sync . RWMutex
}
}
// rawNode is a simple binary blob used to differentiate between collapsed trie
// nodes and already encoded RLP binary blobs (while at the same time store them
// in the same cache fields).
type rawNode [ ] byte
func ( n rawNode ) canUnload ( uint16 , uint16 ) bool { panic ( "this should never end up in a live trie" ) }
func ( n rawNode ) cache ( ) ( hashNode , bool ) { panic ( "this should never end up in a live trie" ) }
func ( n rawNode ) fstring ( ind string ) string { panic ( "this should never end up in a live trie" ) }
// rawFullNode represents only the useful data content of a full node, with the
// caches and flags stripped out to minimize its data storage. This type honors
// the same RLP encoding as the original parent.
type rawFullNode [ 17 ] node
func ( n rawFullNode ) canUnload ( uint16 , uint16 ) bool { panic ( "this should never end up in a live trie" ) }
func ( n rawFullNode ) cache ( ) ( hashNode , bool ) { panic ( "this should never end up in a live trie" ) }
func ( n rawFullNode ) fstring ( ind string ) string { panic ( "this should never end up in a live trie" ) }
func ( n rawFullNode ) EncodeRLP ( w io . Writer ) error {
var nodes [ 17 ] node
for i , child := range n {
if child != nil {
nodes [ i ] = child
} else {
nodes [ i ] = nilValueNode
}
}
return rlp . Encode ( w , nodes )
}
// rawShortNode represents only the useful data content of a short node, with the
// caches and flags stripped out to minimize its data storage. This type honors
// the same RLP encoding as the original parent.
type rawShortNode struct {
Key [ ] byte
Val node
}
func ( n rawShortNode ) canUnload ( uint16 , uint16 ) bool { panic ( "this should never end up in a live trie" ) }
func ( n rawShortNode ) cache ( ) ( hashNode , bool ) { panic ( "this should never end up in a live trie" ) }
func ( n rawShortNode ) fstring ( ind string ) string { panic ( "this should never end up in a live trie" ) }
// cachedNode is all the information we know about a single cached node in the
// cachedNode is all the information we know about a single cached node in the
// memory database write layer.
// memory database write layer.
type cachedNode struct {
type cachedNode struct {
blob [ ] byte // Cached data block of the trie node
node node // Cached collapsed trie node, or raw rlp data
parents int // Number of live nodes referencing this one
size uint16 // Byte size of the useful cached data
children map [ common . Hash ] int // Children referenced by this nodes
parents uint16 // Number of live nodes referencing this one
children map [ common . Hash ] uint16 // External children referenced by this node
flushPrev common . Hash // Previous node in the flush-list
flushPrev common . Hash // Previous node in the flush-list
flushNext common . Hash // Next node in the flush-list
flushNext common . Hash // Next node in the flush-list
}
}
// rlp returns the raw rlp encoded blob of the cached node, either directly from
// the cache, or by regenerating it from the collapsed node.
func ( n * cachedNode ) rlp ( ) [ ] byte {
if node , ok := n . node . ( rawNode ) ; ok {
return node
}
blob , err := rlp . EncodeToBytes ( n . node )
if err != nil {
panic ( err )
}
return blob
}
// obj returns the decoded and expanded trie node, either directly from the cache,
// or by regenerating it from the rlp encoded blob.
func ( n * cachedNode ) obj ( hash common . Hash , cachegen uint16 ) node {
if node , ok := n . node . ( rawNode ) ; ok {
return mustDecodeNode ( hash [ : ] , node , cachegen )
}
return expandNode ( hash [ : ] , n . node , cachegen )
}
// childs returns all the tracked children of this node, both the implicit ones
// from inside the node as well as the explicit ones from outside the node.
func ( n * cachedNode ) childs ( ) [ ] common . Hash {
children := make ( [ ] common . Hash , 0 , 16 )
for child := range n . children {
children = append ( children , child )
}
if _ , ok := n . node . ( rawNode ) ; ! ok {
gatherChildren ( n . node , & children )
}
return children
}
// gatherChildren traverses the node hierarchy of a collapsed storage node and
// retrieves all the hashnode children.
func gatherChildren ( n node , children * [ ] common . Hash ) {
switch n := n . ( type ) {
case * rawShortNode :
gatherChildren ( n . Val , children )
case rawFullNode :
for i := 0 ; i < 16 ; i ++ {
gatherChildren ( n [ i ] , children )
}
case hashNode :
* children = append ( * children , common . BytesToHash ( n ) )
case valueNode , nil :
default :
panic ( fmt . Sprintf ( "unknown node type: %T" , n ) )
}
}
// simplifyNode traverses the hierarchy of an expanded memory node and discards
// all the internal caches, returning a node that only contains the raw data.
func simplifyNode ( n node ) node {
switch n := n . ( type ) {
case * shortNode :
// Short nodes discard the flags and cascade
return & rawShortNode { Key : n . Key , Val : simplifyNode ( n . Val ) }
case * fullNode :
// Full nodes discard the flags and cascade
node := rawFullNode ( n . Children )
for i := 0 ; i < len ( node ) ; i ++ {
if node [ i ] != nil {
node [ i ] = simplifyNode ( node [ i ] )
}
}
return node
case valueNode , hashNode , rawNode :
return n
default :
panic ( fmt . Sprintf ( "unknown node type: %T" , n ) )
}
}
// expandNode traverses the node hierarchy of a collapsed storage node and converts
// all fields and keys into expanded memory form.
func expandNode ( hash hashNode , n node , cachegen uint16 ) node {
switch n := n . ( type ) {
case * rawShortNode :
// Short nodes need key and child expansion
return & shortNode {
Key : compactToHex ( n . Key ) ,
Val : expandNode ( nil , n . Val , cachegen ) ,
flags : nodeFlag {
hash : hash ,
gen : cachegen ,
} ,
}
case rawFullNode :
// Full nodes need child expansion
node := & fullNode {
flags : nodeFlag {
hash : hash ,
gen : cachegen ,
} ,
}
for i := 0 ; i < len ( node . Children ) ; i ++ {
if n [ i ] != nil {
node . Children [ i ] = expandNode ( nil , n [ i ] , cachegen )
}
}
return node
case valueNode , hashNode :
return n
default :
panic ( fmt . Sprintf ( "unknown node type: %T" , n ) )
}
}
// NewDatabase creates a new trie database to store ephemeral trie content before
// NewDatabase creates a new trie database to store ephemeral trie content before
// its written out to disk or garbage collected.
// its written out to disk or garbage collected.
func NewDatabase ( diskdb ethdb . Database ) * Database {
func NewDatabase ( diskdb ethdb . Database ) * Database {
return & Database {
return & Database {
diskdb : diskdb ,
diskdb : diskdb ,
nodes : map [ common . Hash ] * cachedNode {
nodes : map [ common . Hash ] * cachedNode { { } : { } } ,
{ } : { children : make ( map [ common . Hash ] int ) } ,
} ,
preimages : make ( map [ common . Hash ] [ ] byte ) ,
preimages : make ( map [ common . Hash ] [ ] byte ) ,
}
}
}
}
@ -110,33 +276,46 @@ func (db *Database) DiskDB() DatabaseReader {
return db . diskdb
return db . diskdb
}
}
// Insert writes a new trie node to the memory database if it's yet unknown. The
// InsertBlob writes a new reference tracked blob to the memory database if it's
// method will make a copy of the slice.
// yet unknown. This method should only be used for non-trie nodes that require
func ( db * Database ) Insert ( hash common . Hash , blob [ ] byte ) {
// reference counting, since trie nodes are garbage collected directly through
// their embedded children.
func ( db * Database ) InsertBlob ( hash common . Hash , blob [ ] byte ) {
db . lock . Lock ( )
db . lock . Lock ( )
defer db . lock . Unlock ( )
defer db . lock . Unlock ( )
db . insert ( hash , blob )
db . insert ( hash , blob , rawNode ( blob ) )
}
}
// insert is the private locked version of Insert.
// insert inserts a collapsed trie node into the memory database. This method is
func ( db * Database ) insert ( hash common . Hash , blob [ ] byte ) {
// a more generic version of InsertBlob, supporting both raw blob insertions as
// well ex trie node insertions. The blob must always be specified to allow proper
// size tracking.
func ( db * Database ) insert ( hash common . Hash , blob [ ] byte , node node ) {
// If the node's already cached, skip
// If the node's already cached, skip
if _ , ok := db . nodes [ hash ] ; ok {
if _ , ok := db . nodes [ hash ] ; ok {
return
return
}
}
db . nodes [ hash ] = & cachedNode {
// Create the cached entry for this node
blob : common . CopyBytes ( blob ) ,
entry := & cachedNode {
children : make ( map [ common . Hash ] int ) ,
node : simplifyNode ( node ) ,
size : uint16 ( len ( blob ) ) ,
flushPrev : db . newest ,
flushPrev : db . newest ,
}
}
for _ , child := range entry . childs ( ) {
if c := db . nodes [ child ] ; c != nil {
c . parents ++
}
}
db . nodes [ hash ] = entry
// Update the flush-list endpoints
// Update the flush-list endpoints
if db . oldest == ( common . Hash { } ) {
if db . oldest == ( common . Hash { } ) {
db . oldest , db . newest = hash , hash
db . oldest , db . newest = hash , hash
} else {
} else {
db . nodes [ db . newest ] . flushNext , db . newest = hash , hash
db . nodes [ db . newest ] . flushNext , db . newest = hash , hash
}
}
db . nodesSize += common . StorageSize ( common . HashLength + len ( blob ) )
db . nodesSize += common . StorageSize ( common . HashLength + entry . size )
}
}
// insertPreimage writes a new trie node pre-image to the memory database if it's
// insertPreimage writes a new trie node pre-image to the memory database if it's
@ -151,8 +330,27 @@ func (db *Database) insertPreimage(hash common.Hash, preimage []byte) {
db . preimagesSize += common . StorageSize ( common . HashLength + len ( preimage ) )
db . preimagesSize += common . StorageSize ( common . HashLength + len ( preimage ) )
}
}
// Node retrieves a cached trie node from memory. If it cannot be found cached,
// node retrieves a cached trie node from memory, or returns nil if none can be
// the method queries the persistent database for the content.
// found in the memory cache.
func ( db * Database ) node ( hash common . Hash , cachegen uint16 ) node {
// Retrieve the node from cache if available
db . lock . RLock ( )
node := db . nodes [ hash ]
db . lock . RUnlock ( )
if node != nil {
return node . obj ( hash , cachegen )
}
// Content unavailable in memory, attempt to retrieve from disk
enc , err := db . diskdb . Get ( hash [ : ] )
if err != nil || enc == nil {
return nil
}
return mustDecodeNode ( hash [ : ] , enc , cachegen )
}
// Node retrieves an encoded cached trie node from memory. If it cannot be found
// cached, the method queries the persistent database for the content.
func ( db * Database ) Node ( hash common . Hash ) ( [ ] byte , error ) {
func ( db * Database ) Node ( hash common . Hash ) ( [ ] byte , error ) {
// Retrieve the node from cache if available
// Retrieve the node from cache if available
db . lock . RLock ( )
db . lock . RLock ( )
@ -160,7 +358,7 @@ func (db *Database) Node(hash common.Hash) ([]byte, error) {
db . lock . RUnlock ( )
db . lock . RUnlock ( )
if node != nil {
if node != nil {
return node . blob , nil
return node . rlp ( ) , nil
}
}
// Content unavailable in memory, attempt to retrieve from disk
// Content unavailable in memory, attempt to retrieve from disk
return db . diskdb . Get ( hash [ : ] )
return db . diskdb . Get ( hash [ : ] )
@ -222,20 +420,22 @@ func (db *Database) reference(child common.Hash, parent common.Hash) {
return
return
}
}
// If the reference already exists, only duplicate for roots
// If the reference already exists, only duplicate for roots
if _ , ok = db . nodes [ parent ] . children [ child ] ; ok && parent != ( common . Hash { } ) {
if db . nodes [ parent ] . children == nil {
db . nodes [ parent ] . children = make ( map [ common . Hash ] uint16 )
} else if _ , ok = db . nodes [ parent ] . children [ child ] ; ok && parent != ( common . Hash { } ) {
return
return
}
}
node . parents ++
node . parents ++
db . nodes [ parent ] . children [ child ] ++
db . nodes [ parent ] . children [ child ] ++
}
}
// Dereference removes an existing reference from a parent node to a child node.
// Dereference removes an existing reference from a root node.
func ( db * Database ) Dereference ( child common . Hash , paren t common . Hash ) {
func ( db * Database ) Dereference ( roo t common . Hash ) {
db . lock . Lock ( )
db . lock . Lock ( )
defer db . lock . Unlock ( )
defer db . lock . Unlock ( )
nodes , storage , start := len ( db . nodes ) , db . nodesSize , time . Now ( )
nodes , storage , start := len ( db . nodes ) , db . nodesSize , time . Now ( )
db . dereference ( child , parent )
db . dereference ( root , common . Hash { } )
db . gcnodes += uint64 ( nodes - len ( db . nodes ) )
db . gcnodes += uint64 ( nodes - len ( db . nodes ) )
db . gcsize += storage - db . nodesSize
db . gcsize += storage - db . nodesSize
@ -254,10 +454,12 @@ func (db *Database) dereference(child common.Hash, parent common.Hash) {
// Dereference the parent-child
// Dereference the parent-child
node := db . nodes [ parent ]
node := db . nodes [ parent ]
if node . children != nil && node . children [ child ] > 0 {
node . children [ child ] --
node . children [ child ] --
if node . children [ child ] == 0 {
if node . children [ child ] == 0 {
delete ( node . children , child )
delete ( node . children , child )
}
}
}
// If the child does not exist, it's a previously committed node.
// If the child does not exist, it's a previously committed node.
node , ok := db . nodes [ child ]
node , ok := db . nodes [ child ]
if ! ok {
if ! ok {
@ -274,11 +476,11 @@ func (db *Database) dereference(child common.Hash, parent common.Hash) {
db . nodes [ node . flushNext ] . flushPrev = node . flushPrev
db . nodes [ node . flushNext ] . flushPrev = node . flushPrev
}
}
// Dereference all children and delete the node
// Dereference all children and delete the node
for hash := range node . children {
for _ , hash := range node . childs ( ) {
db . dereference ( hash , child )
db . dereference ( hash , child )
}
}
delete ( db . nodes , child )
delete ( db . nodes , child )
db . nodesSize -= common . StorageSize ( common . HashLength + len ( node . blob ) )
db . nodesSize -= common . StorageSize ( common . HashLength + int ( node . size ) )
}
}
}
}
@ -323,7 +525,7 @@ func (db *Database) Cap(limit common.StorageSize) error {
for size > limit && oldest != ( common . Hash { } ) {
for size > limit && oldest != ( common . Hash { } ) {
// Fetch the oldest referenced node and push into the batch
// Fetch the oldest referenced node and push into the batch
node := db . nodes [ oldest ]
node := db . nodes [ oldest ]
if err := batch . Put ( oldest [ : ] , node . blob ) ; err != nil {
if err := batch . Put ( oldest [ : ] , node . rlp ( ) ) ; err != nil {
db . lock . RUnlock ( )
db . lock . RUnlock ( )
return err
return err
}
}
@ -340,7 +542,7 @@ func (db *Database) Cap(limit common.StorageSize) error {
// is the total size, including both the useful cached data (hash -> blob), as
// is the total size, including both the useful cached data (hash -> blob), as
// well as the flushlist metadata (2*hash). When flushing items from the cache,
// well as the flushlist metadata (2*hash). When flushing items from the cache,
// we need to reduce both.
// we need to reduce both.
size -= common . StorageSize ( 3 * common . HashLength + len ( node . blob ) )
size -= common . StorageSize ( 3 * common . HashLength + int ( node . size ) )
oldest = node . flushNext
oldest = node . flushNext
}
}
// Flush out any remainder data from the last batch
// Flush out any remainder data from the last batch
@ -364,7 +566,7 @@ func (db *Database) Cap(limit common.StorageSize) error {
delete ( db . nodes , db . oldest )
delete ( db . nodes , db . oldest )
db . oldest = node . flushNext
db . oldest = node . flushNext
db . nodesSize -= common . StorageSize ( common . HashLength + len ( node . blob ) )
db . nodesSize -= common . StorageSize ( common . HashLength + int ( node . size ) )
}
}
if db . oldest != ( common . Hash { } ) {
if db . oldest != ( common . Hash { } ) {
db . nodes [ db . oldest ] . flushPrev = common . Hash { }
db . nodes [ db . oldest ] . flushPrev = common . Hash { }
@ -460,12 +662,12 @@ func (db *Database) commit(hash common.Hash, batch ethdb.Batch) error {
if ! ok {
if ! ok {
return nil
return nil
}
}
for child := range node . children {
for _ , child := range node . childs ( ) {
if err := db . commit ( child , batch ) ; err != nil {
if err := db . commit ( child , batch ) ; err != nil {
return err
return err
}
}
}
}
if err := batch . Put ( hash [ : ] , node . blob ) ; err != nil {
if err := batch . Put ( hash [ : ] , node . rlp ( ) ) ; err != nil {
return err
return err
}
}
// If we've reached an optimal batch size, commit and start over
// If we've reached an optimal batch size, commit and start over
@ -496,11 +698,11 @@ func (db *Database) uncache(hash common.Hash) {
db . nodes [ node . flushNext ] . flushPrev = node . flushPrev
db . nodes [ node . flushNext ] . flushPrev = node . flushPrev
}
}
// Uncache the node's subtries and remove the node itself too
// Uncache the node's subtries and remove the node itself too
for child := range node . children {
for _ , child := range node . childs ( ) {
db . uncache ( child )
db . uncache ( child )
}
}
delete ( db . nodes , hash )
delete ( db . nodes , hash )
db . nodesSize -= common . StorageSize ( common . HashLength + len ( node . blob ) )
db . nodesSize -= common . StorageSize ( common . HashLength + int ( node . size ) )
}
}
// Size returns the current storage size of the memory cache in front of the
// Size returns the current storage size of the memory cache in front of the