From 0f460ad26e864ae8b4c4cf99147c5b57a10f3be9 Mon Sep 17 00:00:00 2001 From: obscuren Date: Wed, 19 Nov 2014 16:21:28 +0100 Subject: [PATCH] Added caching and database interface to trie * Reimplemented caching for trie * Reimplemented resetting and persisting trie --- ethdb/memory_database.go | 4 +++ ptrie/fullnode.go | 9 ++++- ptrie/shortnode.go | 2 +- ptrie/trie.go | 77 +++++++++++++++++++++++++++++++-------- ptrie/trie_test.go | 78 +++++++++++++++++++++++++++++----------- ptrie/valuenode.go | 2 +- 6 files changed, 134 insertions(+), 38 deletions(-) diff --git a/ethdb/memory_database.go b/ethdb/memory_database.go index 459373eeae..48aa830e77 100644 --- a/ethdb/memory_database.go +++ b/ethdb/memory_database.go @@ -23,6 +23,10 @@ func (db *MemDatabase) Put(key []byte, value []byte) { db.db[string(key)] = value } +func (db *MemDatabase) Set(key []byte, value []byte) { + db.Put(key, value) +} + func (db *MemDatabase) Get(key []byte) ([]byte, error) { return db.db[string(key)], nil } diff --git a/ptrie/fullnode.go b/ptrie/fullnode.go index eaa4611b66..7a7f7d22d4 100644 --- a/ptrie/fullnode.go +++ b/ptrie/fullnode.go @@ -18,7 +18,14 @@ func (self *FullNode) Branches() []Node { return self.nodes[:16] } -func (self *FullNode) Copy() Node { return self } +func (self *FullNode) Copy() Node { + nnode := NewFullNode(self.trie) + for i, node := range self.nodes { + nnode.nodes[i] = node + } + + return nnode +} // Returns the length of non-nil nodes func (self *FullNode) Len() (amount int) { diff --git a/ptrie/shortnode.go b/ptrie/shortnode.go index 49319c555d..73ff2914bf 100644 --- a/ptrie/shortnode.go +++ b/ptrie/shortnode.go @@ -17,7 +17,7 @@ func (self *ShortNode) Value() Node { return self.value } func (self *ShortNode) Dirty() bool { return true } -func (self *ShortNode) Copy() Node { return self } +func (self *ShortNode) Copy() Node { return NewShortNode(self.trie, self.key, self.value) } func (self *ShortNode) RlpData() interface{} { return []interface{}{self.key, self.value.Hash()} diff --git a/ptrie/trie.go b/ptrie/trie.go index bb2b3845ad..687126aef8 100644 --- a/ptrie/trie.go +++ b/ptrie/trie.go @@ -2,6 +2,7 @@ package ptrie import ( "bytes" + "container/list" "sync" "github.com/ethereum/go-ethereum/crypto" @@ -14,33 +15,61 @@ type Backend interface { Set([]byte, []byte) } -type Cache map[string][]byte +type Cache struct { + store map[string][]byte + backend Backend +} + +func NewCache(backend Backend) *Cache { + return &Cache{make(map[string][]byte), backend} +} + +func (self *Cache) Get(key []byte) []byte { + data := self.store[string(key)] + if data == nil { + data = self.backend.Get(key) + } + + return data +} + +func (self *Cache) Set(key []byte, data []byte) { + self.store[string(key)] = data +} + +func (self *Cache) Flush() { + for k, v := range self.store { + self.backend.Set([]byte(k), v) + } -func (self Cache) Get(key []byte) []byte { - return self[string(key)] + // This will eventually grow too large. We'd could + // do a make limit on storage and push out not-so-popular nodes. + //self.Reset() } -func (self Cache) Set(key []byte, data []byte) { - self[string(key)] = data + +func (self *Cache) Reset() { + self.store = make(map[string][]byte) } type Trie struct { mu sync.Mutex root Node roothash []byte - backend Backend -} + cache *Cache -func NewEmpty() *Trie { - return &Trie{sync.Mutex{}, nil, nil, make(Cache)} + revisions *list.List } func New(root []byte, backend Backend) *Trie { trie := &Trie{} + trie.revisions = list.New() trie.roothash = root - trie.backend = backend + trie.cache = NewCache(backend) - value := ethutil.NewValueFromBytes(trie.backend.Get(root)) - trie.root = trie.mknode(value) + if root != nil { + value := ethutil.NewValueFromBytes(trie.cache.Get(root)) + trie.root = trie.mknode(value) + } return trie } @@ -64,10 +93,28 @@ func (self *Trie) Hash() []byte { hash = crypto.Sha3(ethutil.Encode(self.root)) } - self.roothash = hash + if !bytes.Equal(hash, self.roothash) { + self.revisions.PushBack(self.roothash) + self.roothash = hash + } return hash } +func (self *Trie) Commit() { + // Hash first + self.Hash() + + self.cache.Flush() +} + +func (self *Trie) Reset() { + self.cache.Reset() + + revision := self.revisions.Remove(self.revisions.Back()).([]byte) + self.roothash = revision + value := ethutil.NewValueFromBytes(self.cache.Get(self.roothash)) + self.root = self.mknode(value) +} func (self *Trie) UpdateString(key, value string) Node { return self.Update([]byte(key), []byte(value)) } func (self *Trie) Update(key, value []byte) Node { @@ -272,7 +319,7 @@ func (self *Trie) mknode(value *ethutil.Value) Node { func (self *Trie) trans(node Node) Node { switch node := node.(type) { case *HashNode: - value := ethutil.NewValueFromBytes(self.backend.Get(node.key)) + value := ethutil.NewValueFromBytes(self.cache.Get(node.key)) return self.mknode(value) default: return node @@ -283,7 +330,7 @@ func (self *Trie) store(node Node) interface{} { data := ethutil.Encode(node) if len(data) >= 32 { key := crypto.Sha3(data) - self.backend.Set(key, data) + self.cache.Set(key, data) return key } diff --git a/ptrie/trie_test.go b/ptrie/trie_test.go index 6af6e1b406..478f59c602 100644 --- a/ptrie/trie_test.go +++ b/ptrie/trie_test.go @@ -8,6 +8,16 @@ import ( "github.com/ethereum/go-ethereum/ethutil" ) +type Db map[string][]byte + +func (self Db) Get(k []byte) []byte { return self[string(k)] } +func (self Db) Set(k, v []byte) { self[string(k)] = v } + +// Used for testing +func NewEmpty() *Trie { + return New(nil, make(Db)) +} + func TestInsert(t *testing.T) { trie := NewEmpty() @@ -91,7 +101,7 @@ func TestReplication(t *testing.T) { } trie.Hash() - trie2 := New(trie.roothash, trie.backend) + trie2 := New(trie.roothash, trie.cache) if string(trie2.GetString("horse")) != "stallion" { t.Error("expected to have harse => stallion") } @@ -104,37 +114,32 @@ func TestReplication(t *testing.T) { } -func BenchmarkGets(b *testing.B) { +func TestReset(t *testing.T) { trie := NewEmpty() vals := []struct{ k, v string }{ {"do", "verb"}, {"ether", "wookiedoo"}, {"horse", "stallion"}, - {"shaman", "horse"}, - {"doge", "coin"}, - {"ether", ""}, - {"dog", "puppy"}, - {"shaman", ""}, - {"somethingveryoddindeedthis is", "myothernodedata"}, } for _, val := range vals { trie.UpdateString(val.k, val.v) } + trie.Commit() - b.ResetTimer() - for i := 0; i < b.N; i++ { - trie.Get([]byte("horse")) - } -} + before := ethutil.CopyBytes(trie.roothash) + trie.UpdateString("should", "revert") + trie.Hash() + // Should have no effect + trie.Hash() + trie.Hash() + // ### -func BenchmarkUpdate(b *testing.B) { - trie := NewEmpty() + trie.Reset() + after := ethutil.CopyBytes(trie.roothash) - b.ResetTimer() - for i := 0; i < b.N; i++ { - trie.UpdateString(fmt.Sprintf("aaaaaaaaa%d", i), "value") + if !bytes.Equal(before, after) { + t.Errorf("expected roots to be equal. %x - %x", before, after) } - trie.Hash() } // Not actual test @@ -150,8 +155,41 @@ func TestOutput(t *testing.T) { fmt.Println("############################## FULL ################################") fmt.Println(trie.root) - trie2 := New(trie.roothash, trie.backend) + trie2 := New(trie.roothash, trie.cache) trie2.GetString(base + "20") fmt.Println("############################## SMALL ################################") fmt.Println(trie2.root) } + +func BenchmarkGets(b *testing.B) { + trie := NewEmpty() + vals := []struct{ k, v string }{ + {"do", "verb"}, + {"ether", "wookiedoo"}, + {"horse", "stallion"}, + {"shaman", "horse"}, + {"doge", "coin"}, + {"ether", ""}, + {"dog", "puppy"}, + {"shaman", ""}, + {"somethingveryoddindeedthis is", "myothernodedata"}, + } + for _, val := range vals { + trie.UpdateString(val.k, val.v) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + trie.Get([]byte("horse")) + } +} + +func BenchmarkUpdate(b *testing.B) { + trie := NewEmpty() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + trie.UpdateString(fmt.Sprintf("aaaaaaaaa%d", i), "value") + } + trie.Hash() +} diff --git a/ptrie/valuenode.go b/ptrie/valuenode.go index c226621a70..c593eb6c60 100644 --- a/ptrie/valuenode.go +++ b/ptrie/valuenode.go @@ -8,6 +8,6 @@ type ValueNode struct { func (self *ValueNode) Value() Node { return self } // Best not to call :-) func (self *ValueNode) Val() []byte { return self.data } func (self *ValueNode) Dirty() bool { return true } -func (self *ValueNode) Copy() Node { return self } +func (self *ValueNode) Copy() Node { return &ValueNode{self.trie, self.data} } func (self *ValueNode) RlpData() interface{} { return self.data } func (self *ValueNode) Hash() interface{} { return self.data }