@ -23,10 +23,11 @@
package discover
import (
"crypto/rand"
crand "crypto/rand"
"encoding/binary"
"errors"
"fmt"
mrand "math/rand"
"net"
"sort"
"sync"
@ -35,29 +36,45 @@ import (
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/p2p/netutil"
)
const (
alpha = 3 // Kademlia concurrency factor
bucketSize = 16 // Kademlia bucket size
hashBits = len ( common . Hash { } ) * 8
nBuckets = hashBits + 1 // Number of buckets
maxBondingPingPongs = 16
maxFindnodeFailures = 5
autoRefreshInterval = 1 * time . Hour
seedCount = 30
seedMaxAge = 5 * 24 * time . Hour
alpha = 3 // Kademlia concurrency factor
bucketSize = 16 // Kademlia bucket size
maxReplacements = 10 // Size of per-bucket replacement list
// We keep buckets for the upper 1/15 of distances because
// it's very unlikely we'll ever encounter a node that's closer.
hashBits = len ( common . Hash { } ) * 8
nBuckets = hashBits / 15 // Number of buckets
bucketMinDistance = hashBits - nBuckets // Log distance of closest bucket
// IP address limits.
bucketIPLimit , bucketSubnet = 2 , 24 // at most 2 addresses from the same /24
tableIPLimit , tableSubnet = 10 , 24
maxBondingPingPongs = 16 // Limit on the number of concurrent ping/pong interactions
maxFindnodeFailures = 5 // Nodes exceeding this limit are dropped
refreshInterval = 30 * time . Minute
revalidateInterval = 10 * time . Second
copyNodesInterval = 30 * time . Second
seedMinTableTime = 5 * time . Minute
seedCount = 30
seedMaxAge = 5 * 24 * time . Hour
)
type Table struct {
mutex sync . Mutex // protects buckets, their content, and nursery
mutex sync . Mutex // protects buckets, bucke t content, nursery, rand
buckets [ nBuckets ] * bucket // index of known nodes by distance
nursery [ ] * Node // bootstrap nodes
db * nodeDB // database of known nodes
rand * mrand . Rand // source of randomness, periodically reseeded
ips netutil . DistinctNetSet
db * nodeDB // database of known nodes
refreshReq chan chan struct { }
initDone chan struct { }
closeReq chan struct { }
closed chan struct { }
@ -89,9 +106,13 @@ type transport interface {
// bucket contains nodes, ordered by their last activity. the entry
// that was most recently active is the first element in entries.
type bucket struct { entries [ ] * Node }
type bucket struct {
entries [ ] * Node // live entries, sorted by time of last contact
replacements [ ] * Node // recently seen nodes to be used if revalidation fails
ips netutil . DistinctNetSet
}
func newTable ( t transport , ourID NodeID , ourAddr * net . UDPAddr , nodeDBPath string ) ( * Table , error ) {
func newTable ( t transport , ourID NodeID , ourAddr * net . UDPAddr , nodeDBPath string , bootnodes [ ] * Node ) ( * Table , error ) {
// If no node database was given, use an in-memory one
db , err := newNodeDB ( nodeDBPath , Version , ourID )
if err != nil {
@ -104,19 +125,42 @@ func newTable(t transport, ourID NodeID, ourAddr *net.UDPAddr, nodeDBPath string
bonding : make ( map [ NodeID ] * bondproc ) ,
bondslots : make ( chan struct { } , maxBondingPingPongs ) ,
refreshReq : make ( chan chan struct { } ) ,
initDone : make ( chan struct { } ) ,
closeReq : make ( chan struct { } ) ,
closed : make ( chan struct { } ) ,
rand : mrand . New ( mrand . NewSource ( 0 ) ) ,
ips : netutil . DistinctNetSet { Subnet : tableSubnet , Limit : tableIPLimit } ,
}
if err := tab . setFallbackNodes ( bootnodes ) ; err != nil {
return nil , err
}
for i := 0 ; i < cap ( tab . bondslots ) ; i ++ {
tab . bondslots <- struct { } { }
}
for i := range tab . buckets {
tab . buckets [ i ] = new ( bucket )
tab . buckets [ i ] = & bucket {
ips : netutil . DistinctNetSet { Subnet : bucketSubnet , Limit : bucketIPLimit } ,
}
}
go tab . refreshLoop ( )
tab . seedRand ( )
tab . loadSeedNodes ( false )
// Start the background expiration goroutine after loading seeds so that the search for
// seed nodes also considers older nodes that would otherwise be removed by the
// expiration.
tab . db . ensureExpirer ( )
go tab . loop ( )
return tab , nil
}
func ( tab * Table ) seedRand ( ) {
var b [ 8 ] byte
crand . Read ( b [ : ] )
tab . mutex . Lock ( )
tab . rand . Seed ( int64 ( binary . BigEndian . Uint64 ( b [ : ] ) ) )
tab . mutex . Unlock ( )
}
// Self returns the local node.
// The returned node should not be modified by the caller.
func ( tab * Table ) Self ( ) * Node {
@ -127,9 +171,12 @@ func (tab *Table) Self() *Node {
// table. It will not write the same node more than once. The nodes in
// the slice are copies and can be modified by the caller.
func ( tab * Table ) ReadRandomNodes ( buf [ ] * Node ) ( n int ) {
if ! tab . isInitDone ( ) {
return 0
}
tab . mutex . Lock ( )
defer tab . mutex . Unlock ( )
// TODO: tree-based buckets would help here
// Find all non-empty buckets and get a fresh slice of their entries.
var buckets [ ] [ ] * Node
for _ , b := range tab . buckets {
@ -141,8 +188,8 @@ func (tab *Table) ReadRandomNodes(buf []*Node) (n int) {
return 0
}
// Shuffle the buckets.
for i := uint32 ( len ( buckets ) ) - 1 ; i > 0 ; i -- {
j := randUint ( i )
for i := len ( buckets ) - 1 ; i > 0 ; i -- {
j := tab . rand . Intn ( len ( buckets ) )
buckets [ i ] , buckets [ j ] = buckets [ j ] , buckets [ i ]
}
// Move head of each bucket into buf, removing buckets that become empty.
@ -161,15 +208,6 @@ func (tab *Table) ReadRandomNodes(buf []*Node) (n int) {
return i + 1
}
func randUint ( max uint32 ) uint32 {
if max == 0 {
return 0
}
var b [ 4 ] byte
rand . Read ( b [ : ] )
return binary . BigEndian . Uint32 ( b [ : ] ) % max
}
// Close terminates the network listener and flushes the node database.
func ( tab * Table ) Close ( ) {
select {
@ -180,16 +218,15 @@ func (tab *Table) Close() {
}
}
// S etFallbackNodes sets the initial points of contact. These nodes
// s etFallbackNodes sets the initial points of contact. These nodes
// are used to connect to the network if the table is empty and there
// are no known nodes in the database.
func ( tab * Table ) S etFallbackNodes( nodes [ ] * Node ) error {
func ( tab * Table ) s etFallbackNodes( nodes [ ] * Node ) error {
for _ , n := range nodes {
if err := n . validateComplete ( ) ; err != nil {
return fmt . Errorf ( "bad bootstrap/fallback node %q (%v)" , n , err )
}
}
tab . mutex . Lock ( )
tab . nursery = make ( [ ] * Node , 0 , len ( nodes ) )
for _ , n := range nodes {
cpy := * n
@ -198,11 +235,19 @@ func (tab *Table) SetFallbackNodes(nodes []*Node) error {
cpy . sha = crypto . Keccak256Hash ( n . ID [ : ] )
tab . nursery = append ( tab . nursery , & cpy )
}
tab . mutex . Unlock ( )
tab . refresh ( )
return nil
}
// isInitDone returns whether the table's initial seeding procedure has completed.
func ( tab * Table ) isInitDone ( ) bool {
select {
case <- tab . initDone :
return true
default :
return false
}
}
// Resolve searches for a specific node with the given ID.
// It returns nil if the node could not be found.
func ( tab * Table ) Resolve ( targetID NodeID ) * Node {
@ -314,33 +359,49 @@ func (tab *Table) refresh() <-chan struct{} {
return done
}
// refreshLoop schedules doRefresh runs and coordinates shutdown.
func ( tab * Table ) refreshL oop( ) {
// loop schedules refresh, revalidate runs and coordinates shutdown.
func ( tab * Table ) l oop( ) {
var (
timer = time . NewTicker ( autoRefreshInterval )
waiting [ ] chan struct { } // accumulates waiting callers while doRefresh runs
done chan struct { } // where doRefresh reports completion
revalidate = time . NewTimer ( tab . nextRevalidateTime ( ) )
refresh = time . NewTicker ( refreshInterval )
copyNodes = time . NewTicker ( copyNodesInterval )
revalidateDone = make ( chan struct { } )
refreshDone = make ( chan struct { } ) // where doRefresh reports completion
waiting = [ ] chan struct { } { tab . initDone } // holds waiting callers while doRefresh runs
)
defer refresh . Stop ( )
defer revalidate . Stop ( )
defer copyNodes . Stop ( )
// Start initial refresh.
go tab . doRefresh ( refreshDone )
loop :
for {
select {
case <- timer . C :
if done == nil {
done = make ( chan struct { } )
go tab . doRefresh ( done )
case <- refresh . C :
tab . seedRand ( )
if refreshDone == nil {
refreshDone = make ( chan struct { } )
go tab . doRefresh ( refreshDone )
}
case req := <- tab . refreshReq :
waiting = append ( waiting , req )
if d one == nil {
d one = make ( chan struct { } )
go tab . doRefresh ( d one)
if refreshD one == nil {
refreshD one = make ( chan struct { } )
go tab . doRefresh ( refreshD one)
}
case <- d one:
case <- refreshD one:
for _ , ch := range waiting {
close ( ch )
}
waiting = nil
done = nil
waiting , refreshDone = nil , nil
case <- revalidate . C :
go tab . doRevalidate ( revalidateDone )
case <- revalidateDone :
revalidate . Reset ( tab . nextRevalidateTime ( ) )
case <- copyNodes . C :
go tab . copyBondedNodes ( )
case <- tab . closeReq :
break loop
}
@ -349,8 +410,8 @@ loop:
if tab . net != nil {
tab . net . close ( )
}
if d one != nil {
<- d one
if refreshD one != nil {
<- refreshD one
}
for _ , ch := range waiting {
close ( ch )
@ -365,38 +426,109 @@ loop:
func ( tab * Table ) doRefresh ( done chan struct { } ) {
defer close ( done )
// Load nodes from the database and insert
// them. This should yield a few previously seen nodes that are
// (hopefully) still alive.
tab . loadSeedNodes ( true )
// Run self lookup to discover new neighbor nodes.
tab . lookup ( tab . self . ID , false )
// The Kademlia paper specifies that the bucket refresh should
// perform a lookup in the least recently used bucket. We cannot
// adhere to this because the findnode target is a 512bit value
// (not hash-sized) and it is not easily possible to generate a
// sha3 preimage that falls into a chosen bucket.
// We perform a lookup with a random target instead.
var target NodeID
rand . Read ( target [ : ] )
result := tab . lookup ( target , false )
if len ( result ) > 0 {
return
// We perform a few lookups with a random target instead.
for i := 0 ; i < 3 ; i ++ {
var target NodeID
crand . Read ( target [ : ] )
tab . lookup ( target , false )
}
}
// The table is empty. Load nodes from the database and insert
// them. This should yield a few previously seen nodes that are
// (hopefully) still alive.
func ( tab * Table ) loadSeedNodes ( bond bool ) {
seeds := tab . db . querySeeds ( seedCount , seedMaxAge )
seeds = tab . bondall ( append ( seeds , tab . nursery ... ) )
seeds = append ( seeds , tab . nursery ... )
if bond {
seeds = tab . bondall ( seeds )
}
for i := range seeds {
seed := seeds [ i ]
age := log . Lazy { Fn : func ( ) interface { } { return time . Since ( tab . db . lastPong ( seed . ID ) ) } }
log . Debug ( "Found seed node in database" , "id" , seed . ID , "addr" , seed . addr ( ) , "age" , age )
tab . add ( seed )
}
}
// doRevalidate checks that the last node in a random bucket is still live
// and replaces or deletes the node if it isn't.
func ( tab * Table ) doRevalidate ( done chan <- struct { } ) {
defer func ( ) { done <- struct { } { } } ( )
if len ( seeds ) == 0 {
log . Debug ( "No discv4 seed nodes found" )
last , bi := tab . nodeToRevalidate ( )
if last == nil {
// No non-empty bucket found.
return
}
// Ping the selected node and wait for a pong.
err := tab . ping ( last . ID , last . addr ( ) )
tab . mutex . Lock ( )
defer tab . mutex . Unlock ( )
b := tab . buckets [ bi ]
if err == nil {
// The node responded, move it to the front.
log . Debug ( "Revalidated node" , "b" , bi , "id" , last . ID )
b . bump ( last )
return
}
// No reply received, pick a replacement or delete the node if there aren't
// any replacements.
if r := tab . replace ( b , last ) ; r != nil {
log . Debug ( "Replaced dead node" , "b" , bi , "id" , last . ID , "ip" , last . IP , "r" , r . ID , "rip" , r . IP )
} else {
log . Debug ( "Removed dead node" , "b" , bi , "id" , last . ID , "ip" , last . IP )
}
for _ , n := range seeds {
age := log . Lazy { Fn : func ( ) time . Duration { return time . Since ( tab . db . lastPong ( n . ID ) ) } }
log . Trace ( "Found seed node in database" , "id" , n . ID , "addr" , n . addr ( ) , "age" , age )
}
// nodeToRevalidate returns the last node in a random, non-empty bucket.
func ( tab * Table ) nodeToRevalidate ( ) ( n * Node , bi int ) {
tab . mutex . Lock ( )
defer tab . mutex . Unlock ( )
for _ , bi = range tab . rand . Perm ( len ( tab . buckets ) ) {
b := tab . buckets [ bi ]
if len ( b . entries ) > 0 {
last := b . entries [ len ( b . entries ) - 1 ]
return last , bi
}
}
return nil , 0
}
func ( tab * Table ) nextRevalidateTime ( ) time . Duration {
tab . mutex . Lock ( )
tab . stuff ( seeds )
tab . mutex . Unlock ( )
defer tab . mutex . Unlock ( )
// Finally, do a self lookup to fill up the buckets.
tab . lookup ( tab . self . ID , false )
return time . Duration ( tab . rand . Int63n ( int64 ( revalidateInterval ) ) )
}
// copyBondedNodes adds nodes from the table to the database if they have been in the table
// longer then minTableTime.
func ( tab * Table ) copyBondedNodes ( ) {
tab . mutex . Lock ( )
defer tab . mutex . Unlock ( )
now := time . Now ( )
for _ , b := range tab . buckets {
for _ , n := range b . entries {
if now . Sub ( n . addedAt ) >= seedMinTableTime {
tab . db . updateNode ( n )
}
}
}
}
// closest returns the n nodes in the table that are closest to the
@ -459,15 +591,14 @@ func (tab *Table) bond(pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16
if id == tab . self . ID {
return nil , errors . New ( "is self" )
}
// Retrieve a previously known node and any recent findnode failures
node , fails := tab . db . node ( id ) , 0
if node != nil {
fails = tab . db . findFails ( id )
if pinged && ! tab . isInitDone ( ) {
return nil , errors . New ( "still initializing" )
}
// If the node is unknown (non-bonded) or failed (remotely unknown), bond from scratch
var result error
// Start bonding if we haven't seen this node for a while or if it failed findnode too often.
node , fails := tab . db . node ( id ) , tab . db . findFails ( id )
age := time . Since ( tab . db . lastPong ( id ) )
if node == nil || fails > 0 || age > nodeDBNodeExpiration {
var result error
if fails > 0 || age > nodeDBNodeExpiration {
log . Trace ( "Starting bonding ping/pong" , "id" , id , "known" , node != nil , "failcount" , fails , "age" , age )
tab . bondmu . Lock ( )
@ -494,10 +625,10 @@ func (tab *Table) bond(pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16
node = w . n
}
}
// Add the node to the table even if the bonding ping/pong
// fails. It will be relaced quickly if it continues to be
// unresponsive.
if node != nil {
// Add the node to the table even if the bonding ping/pong
// fails. It will be relaced quickly if it continues to be
// unresponsive.
tab . add ( node )
tab . db . updateFindFails ( id , 0 )
}
@ -522,7 +653,6 @@ func (tab *Table) pingpong(w *bondproc, pinged bool, id NodeID, addr *net.UDPAdd
}
// Bonding succeeded, update the node database.
w . n = NewNode ( id , addr . IP , uint16 ( addr . Port ) , tcpPort )
tab . db . updateNode ( w . n )
close ( w . done )
}
@ -534,16 +664,18 @@ func (tab *Table) ping(id NodeID, addr *net.UDPAddr) error {
return err
}
tab . db . updateLastPong ( id , time . Now ( ) )
// Start the background expiration goroutine after the first
// successful communication. Subsequent calls have no effect if it
// is already running. We do this here instead of somewhere else
// so that the search for seed nodes also considers older nodes
// that would otherwise be removed by the expiration.
tab . db . ensureExpirer ( )
return nil
}
// bucket returns the bucket for the given node ID hash.
func ( tab * Table ) bucket ( sha common . Hash ) * bucket {
d := logdist ( tab . self . sha , sha )
if d <= bucketMinDistance {
return tab . buckets [ 0 ]
}
return tab . buckets [ d - bucketMinDistance - 1 ]
}
// add attempts to add the given node its corresponding bucket. If the
// bucket has space available, adding the node succeeds immediately.
// Otherwise, the node is added if the least recently active node in
@ -551,57 +683,29 @@ func (tab *Table) ping(id NodeID, addr *net.UDPAddr) error {
//
// The caller must not hold tab.mutex.
func ( tab * Table ) add ( new * Node ) {
b := tab . buckets [ logdist ( tab . self . sha , new . sha ) ]
tab . mutex . Lock ( )
defer tab . mutex . Unlock ( )
if b . bump ( new ) {
return
}
var oldest * Node
if len ( b . entries ) == bucketSize {
oldest = b . entries [ bucketSize - 1 ]
if oldest . contested {
// The node is already being replaced, don't attempt
// to replace it.
return
}
oldest . contested = true
// Let go of the mutex so other goroutines can access
// the table while we ping the least recently active node.
tab . mutex . Unlock ( )
err := tab . ping ( oldest . ID , oldest . addr ( ) )
tab . mutex . Lock ( )
oldest . contested = false
if err == nil {
// The node responded, don't replace it.
return
}
}
added := b . replace ( new , oldest )
if added && tab . nodeAddedHook != nil {
tab . nodeAddedHook ( new )
b := tab . bucket ( new . sha )
if ! tab . bumpOrAdd ( b , new ) {
// Node is not in table. Add it to the replacement list.
tab . addReplacement ( b , new )
}
}
// stuff adds nodes the table to the end of their corresponding bucket
// if the bucket is not full. The caller must hold tab.mutex.
// if the bucket is not full. The caller must not hold tab.mutex.
func ( tab * Table ) stuff ( nodes [ ] * Node ) {
outer :
tab . mutex . Lock ( )
defer tab . mutex . Unlock ( )
for _ , n := range nodes {
if n . ID == tab . self . ID {
continue // don't add self
}
bucket := tab . buckets [ logdist ( tab . self . sha , n . sha ) ]
for i := range bucket . entries {
if bucket . entries [ i ] . ID == n . ID {
continue outer // already in bucket
}
}
if len ( bucket . entries ) < bucketSize {
bucket . entries = append ( bucket . entries , n )
if tab . nodeAddedHook != nil {
tab . nodeAddedHook ( n )
}
b := tab . bucket ( n . sha )
if len ( b . entries ) < bucketSize {
tab . bumpOrAdd ( b , n )
}
}
}
@ -611,36 +715,72 @@ outer:
func ( tab * Table ) delete ( node * Node ) {
tab . mutex . Lock ( )
defer tab . mutex . Unlock ( )
bucket := tab . buckets [ logdist ( tab . self . sha , node . sha ) ]
for i := range bucket . entries {
if bucket . entries [ i ] . ID == node . ID {
bucket . entries = append ( bucket . entries [ : i ] , bucket . entries [ i + 1 : ] ... )
return
}
}
tab . deleteInBucket ( tab . bucket ( node . sha ) , node )
}
func ( b * bucket ) replace ( n * Node , last * Node ) bool {
// Don't add if b already contains n.
for i := range b . entries {
if b . entries [ i ] . ID == n . ID {
return false
}
func ( tab * Table ) addIP ( b * bucket , ip net . IP ) bool {
if netutil . IsLAN ( ip ) {
return true
}
// Replace last if it is still the last entry or just add n if b
// isn't full. If is no longer the last entry, it has either been
// replaced with someone else or became active.
if len ( b . entries ) == bucketSize && ( last == nil || b . entries [ bucketSize - 1 ] . ID != last . ID ) {
if ! tab . ips . Add ( ip ) {
log . Debug ( "IP exceeds table limit" , "ip" , ip )
return false
}
if len ( b . entries ) < bucketSize {
b . entries = append ( b . entries , nil )
if ! b . ips . Add ( ip ) {
log . Debug ( "IP exceeds bucket limit" , "ip" , ip )
tab . ips . Remove ( ip )
return false
}
copy ( b . entries [ 1 : ] , b . entries )
b . entries [ 0 ] = n
return true
}
func ( tab * Table ) removeIP ( b * bucket , ip net . IP ) {
if netutil . IsLAN ( ip ) {
return
}
tab . ips . Remove ( ip )
b . ips . Remove ( ip )
}
func ( tab * Table ) addReplacement ( b * bucket , n * Node ) {
for _ , e := range b . replacements {
if e . ID == n . ID {
return // already in list
}
}
if ! tab . addIP ( b , n . IP ) {
return
}
var removed * Node
b . replacements , removed = pushNode ( b . replacements , n , maxReplacements )
if removed != nil {
tab . removeIP ( b , removed . IP )
}
}
// replace removes n from the replacement list and replaces 'last' with it if it is the
// last entry in the bucket. If 'last' isn't the last entry, it has either been replaced
// with someone else or became active.
func ( tab * Table ) replace ( b * bucket , last * Node ) * Node {
if len ( b . entries ) >= 0 && b . entries [ len ( b . entries ) - 1 ] . ID != last . ID {
// Entry has moved, don't replace it.
return nil
}
// Still the last entry.
if len ( b . replacements ) == 0 {
tab . deleteInBucket ( b , last )
return nil
}
r := b . replacements [ tab . rand . Intn ( len ( b . replacements ) ) ]
b . replacements = deleteNode ( b . replacements , r )
b . entries [ len ( b . entries ) - 1 ] = r
tab . removeIP ( b , last . IP )
return r
}
// bump moves the given node to the front of the bucket entry list
// if it is contained in that list.
func ( b * bucket ) bump ( n * Node ) bool {
for i := range b . entries {
if b . entries [ i ] . ID == n . ID {
@ -653,6 +793,50 @@ func (b *bucket) bump(n *Node) bool {
return false
}
// bumpOrAdd moves n to the front of the bucket entry list or adds it if the list isn't
// full. The return value is true if n is in the bucket.
func ( tab * Table ) bumpOrAdd ( b * bucket , n * Node ) bool {
if b . bump ( n ) {
return true
}
if len ( b . entries ) >= bucketSize || ! tab . addIP ( b , n . IP ) {
return false
}
b . entries , _ = pushNode ( b . entries , n , bucketSize )
b . replacements = deleteNode ( b . replacements , n )
n . addedAt = time . Now ( )
if tab . nodeAddedHook != nil {
tab . nodeAddedHook ( n )
}
return true
}
func ( tab * Table ) deleteInBucket ( b * bucket , n * Node ) {
b . entries = deleteNode ( b . entries , n )
tab . removeIP ( b , n . IP )
}
// pushNode adds n to the front of list, keeping at most max items.
func pushNode ( list [ ] * Node , n * Node , max int ) ( [ ] * Node , * Node ) {
if len ( list ) < max {
list = append ( list , nil )
}
removed := list [ len ( list ) - 1 ]
copy ( list [ 1 : ] , list )
list [ 0 ] = n
return list , removed
}
// deleteNode removes n from list.
func deleteNode ( list [ ] * Node , n * Node ) [ ] * Node {
for i := range list {
if list [ i ] . ID == n . ID {
return append ( list [ : i ] , list [ i + 1 : ] ... )
}
}
return list
}
// nodesByDistance is a list of nodes, ordered by
// distance to target.
type nodesByDistance struct {