From 13292ee897e345045fbfab3bda23a77589a271c1 Mon Sep 17 00:00:00 2001 From: Ferenc Szabo Date: Wed, 30 Jan 2019 16:01:42 +0100 Subject: [PATCH] p2p/simulations: encapsulate Node.Up field so we avoid data races The Node.Up field was accessed concurrently without "proper" locking. There was a lock on Network and that was used sometimes to access the field. Other times the locking was missed and we had a data race. For example: https://github.com/ethereum/go-ethereum/pull/18464 The case above was solved, but there were still intermittent/hard to reproduce races. So let's solve the issue permanently. resolves: ethersphere/go-ethereum#1146 --- p2p/simulations/events.go | 2 +- p2p/simulations/http_test.go | 18 ++++---- p2p/simulations/mocker_test.go | 9 ++-- p2p/simulations/network.go | 51 +++++++++++++-------- swarm/network/simulation/node.go | 4 +- swarm/network/simulation/node_test.go | 18 ++++---- swarm/network/simulation/service.go | 2 +- swarm/network/simulation/simulation_test.go | 4 +- swarm/network/simulations/overlay_test.go | 2 +- 9 files changed, 63 insertions(+), 47 deletions(-) diff --git a/p2p/simulations/events.go b/p2p/simulations/events.go index 9b2a990e07..984c2e088f 100644 --- a/p2p/simulations/events.go +++ b/p2p/simulations/events.go @@ -100,7 +100,7 @@ func ControlEvent(v interface{}) *Event { func (e *Event) String() string { switch e.Type { case EventTypeNode: - return fmt.Sprintf(" id: %s up: %t", e.Node.ID().TerminalString(), e.Node.Up) + return fmt.Sprintf(" id: %s up: %t", e.Node.ID().TerminalString(), e.Node.Up()) case EventTypeConn: return fmt.Sprintf(" nodes: %s->%s up: %t", e.Conn.One.TerminalString(), e.Conn.Other.TerminalString(), e.Conn.Up) case EventTypeMsg: diff --git a/p2p/simulations/http_test.go b/p2p/simulations/http_test.go index c0a5acb3d8..ed43c0ed76 100644 --- a/p2p/simulations/http_test.go +++ b/p2p/simulations/http_test.go @@ -421,14 +421,15 @@ type expectEvents struct { } func (t *expectEvents) nodeEvent(id string, up bool) *Event { + node := Node{ + Config: &adapters.NodeConfig{ + ID: enode.HexID(id), + }, + up: up, + } return &Event{ Type: EventTypeNode, - Node: &Node{ - Config: &adapters.NodeConfig{ - ID: enode.HexID(id), - }, - Up: up, - }, + Node: &node, } } @@ -480,6 +481,7 @@ loop: } func (t *expectEvents) expect(events ...*Event) { + t.Helper() timeout := time.After(10 * time.Second) i := 0 for { @@ -501,8 +503,8 @@ func (t *expectEvents) expect(events ...*Event) { if event.Node.ID() != expected.Node.ID() { t.Fatalf("expected node event %d to have id %q, got %q", i, expected.Node.ID().TerminalString(), event.Node.ID().TerminalString()) } - if event.Node.Up != expected.Node.Up { - t.Fatalf("expected node event %d to have up=%t, got up=%t", i, expected.Node.Up, event.Node.Up) + if event.Node.Up() != expected.Node.Up() { + t.Fatalf("expected node event %d to have up=%t, got up=%t", i, expected.Node.Up(), event.Node.Up()) } case EventTypeConn: diff --git a/p2p/simulations/mocker_test.go b/p2p/simulations/mocker_test.go index 192be1732c..069040257e 100644 --- a/p2p/simulations/mocker_test.go +++ b/p2p/simulations/mocker_test.go @@ -90,15 +90,12 @@ func TestMocker(t *testing.T) { for { select { case event := <-events: - //if the event is a node Up event only - if event.Node != nil && event.Node.Up { + if isNodeUp(event) { //add the correspondent node ID to the map nodemap[event.Node.Config.ID] = true //this means all nodes got a nodeUp event, so we can continue the test if len(nodemap) == nodeCount { nodesComplete = true - //wait for 3s as the mocker will need time to connect the nodes - //time.Sleep( 3 *time.Second) } } else if event.Conn != nil && nodesComplete { connCount += 1 @@ -169,3 +166,7 @@ func TestMocker(t *testing.T) { t.Fatalf("Expected empty list of nodes, got: %d", len(nodesInfo)) } } + +func isNodeUp(event *Event) bool { + return event.Node != nil && event.Node.Up() +} diff --git a/p2p/simulations/network.go b/p2p/simulations/network.go index 6edcefc532..e7f20ef974 100644 --- a/p2p/simulations/network.go +++ b/p2p/simulations/network.go @@ -136,7 +136,7 @@ func (net *Network) Config() *NetworkConfig { // StartAll starts all nodes in the network func (net *Network) StartAll() error { for _, node := range net.Nodes { - if node.Up { + if node.Up() { continue } if err := net.Start(node.ID()); err != nil { @@ -149,7 +149,7 @@ func (net *Network) StartAll() error { // StopAll stops all nodes in the network func (net *Network) StopAll() error { for _, node := range net.Nodes { - if !node.Up { + if !node.Up() { continue } if err := net.Stop(node.ID()); err != nil { @@ -174,7 +174,7 @@ func (net *Network) startWithSnapshots(id enode.ID, snapshots map[string][]byte) net.lock.Unlock() return fmt.Errorf("node %v does not exist", id) } - if node.Up { + if node.Up() { net.lock.Unlock() return fmt.Errorf("node %v already up", id) } @@ -184,7 +184,7 @@ func (net *Network) startWithSnapshots(id enode.ID, snapshots map[string][]byte) log.Warn("Node startup failed", "id", id, "err", err) return err } - node.Up = true + node.SetUp(true) log.Info("Started node", "id", id) ev := NewEvent(node) net.lock.Unlock() @@ -219,7 +219,7 @@ func (net *Network) watchPeerEvents(id enode.ID, events chan *p2p.PeerEvent, sub if node == nil { return } - node.Up = false + node.SetUp(false) ev := NewEvent(node) net.events.Send(ev) }() @@ -263,17 +263,17 @@ func (net *Network) Stop(id enode.ID) error { net.lock.Unlock() return fmt.Errorf("node %v does not exist", id) } - if !node.Up { + if !node.Up() { net.lock.Unlock() return fmt.Errorf("node %v already down", id) } - node.Up = false + node.SetUp(false) net.lock.Unlock() err := node.Stop() if err != nil { net.lock.Lock() - node.Up = true + node.SetUp(true) net.lock.Unlock() return err } @@ -430,7 +430,7 @@ func (net *Network) GetRandomUpNode(excludeIDs ...enode.ID) *Node { func (net *Network) getUpNodeIDs() (ids []enode.ID) { for _, node := range net.Nodes { - if node.Up { + if node.Up() { ids = append(ids, node.ID()) } } @@ -446,7 +446,7 @@ func (net *Network) GetRandomDownNode(excludeIDs ...enode.ID) *Node { func (net *Network) getDownNodeIDs() (ids []enode.ID) { for _, node := range net.GetNodes() { - if !node.Up { + if !node.Up() { ids = append(ids, node.ID()) } } @@ -595,8 +595,21 @@ type Node struct { // Config if the config used to created the node Config *adapters.NodeConfig `json:"config"` - // Up tracks whether or not the node is running - Up bool `json:"up"` + // up tracks whether or not the node is running + up bool `json:"up"` + upMu sync.RWMutex `json:"-"` +} + +func (n *Node) Up() bool { + n.upMu.RLock() + defer n.upMu.RUnlock() + return n.up +} + +func (n *Node) SetUp(up bool) { + n.upMu.Lock() + defer n.upMu.Unlock() + n.up = up } // ID returns the ID of the node @@ -630,7 +643,7 @@ func (n *Node) MarshalJSON() ([]byte, error) { }{ Info: n.NodeInfo(), Config: n.Config, - Up: n.Up, + Up: n.Up(), }) } @@ -653,10 +666,10 @@ type Conn struct { // nodesUp returns whether both nodes are currently up func (c *Conn) nodesUp() error { - if !c.one.Up { + if !c.one.Up() { return fmt.Errorf("one %v is not up", c.One) } - if !c.other.Up { + if !c.other.Up() { return fmt.Errorf("other %v is not up", c.Other) } return nil @@ -728,7 +741,7 @@ func (net *Network) snapshot(addServices []string, removeServices []string) (*Sn } for i, node := range net.Nodes { snap.Nodes[i] = NodeSnapshot{Node: *node} - if !node.Up { + if !node.Up() { continue } snapshots, err := node.Snapshots() @@ -783,7 +796,7 @@ func (net *Network) Load(snap *Snapshot) error { if _, err := net.NewNodeWithConfig(n.Node.Config); err != nil { return err } - if !n.Node.Up { + if !n.Node.Up() { continue } if err := net.startWithSnapshots(n.Node.Config.ID, n.Snapshots); err != nil { @@ -855,7 +868,7 @@ func (net *Network) Load(snap *Snapshot) error { // Start connecting. for _, conn := range snap.Conns { - if !net.GetNode(conn.One).Up || !net.GetNode(conn.Other).Up { + if !net.GetNode(conn.One).Up() || !net.GetNode(conn.Other).Up() { //in this case, at least one of the nodes of a connection is not up, //so it would result in the snapshot `Load` to fail continue @@ -909,7 +922,7 @@ func (net *Network) executeControlEvent(event *Event) { } func (net *Network) executeNodeEvent(e *Event) error { - if !e.Node.Up { + if !e.Node.Up() { return net.Stop(e.Node.ID()) } diff --git a/swarm/network/simulation/node.go b/swarm/network/simulation/node.go index 08eb835247..24afe51a41 100644 --- a/swarm/network/simulation/node.go +++ b/swarm/network/simulation/node.go @@ -44,7 +44,7 @@ func (s *Simulation) NodeIDs() (ids []enode.ID) { func (s *Simulation) UpNodeIDs() (ids []enode.ID) { nodes := s.Net.GetNodes() for _, node := range nodes { - if node.Up { + if node.Up() { ids = append(ids, node.ID()) } } @@ -55,7 +55,7 @@ func (s *Simulation) UpNodeIDs() (ids []enode.ID) { func (s *Simulation) DownNodeIDs() (ids []enode.ID) { nodes := s.Net.GetNodes() for _, node := range nodes { - if !node.Up { + if !node.Up() { ids = append(ids, node.ID()) } } diff --git a/swarm/network/simulation/node_test.go b/swarm/network/simulation/node_test.go index e6970b1d62..bae5afb260 100644 --- a/swarm/network/simulation/node_test.go +++ b/swarm/network/simulation/node_test.go @@ -54,7 +54,7 @@ func TestUpDownNodeIDs(t *testing.T) { gotIDs = sim.UpNodeIDs() for _, id := range gotIDs { - if !sim.Net.GetNode(id).Up { + if !sim.Net.GetNode(id).Up() { t.Errorf("node %s should not be down", id) } } @@ -66,7 +66,7 @@ func TestUpDownNodeIDs(t *testing.T) { gotIDs = sim.DownNodeIDs() for _, id := range gotIDs { - if sim.Net.GetNode(id).Up { + if sim.Net.GetNode(id).Up() { t.Errorf("node %s should not be up", id) } } @@ -112,7 +112,7 @@ func TestAddNode(t *testing.T) { t.Fatal("node not found") } - if !n.Up { + if !n.Up() { t.Error("node not started") } } @@ -327,7 +327,7 @@ func TestStartStopNode(t *testing.T) { if n == nil { t.Fatal("node not found") } - if !n.Up { + if !n.Up() { t.Error("node not started") } @@ -335,7 +335,7 @@ func TestStartStopNode(t *testing.T) { if err != nil { t.Fatal(err) } - if n.Up { + if n.Up() { t.Error("node not stopped") } @@ -345,7 +345,7 @@ func TestStartStopNode(t *testing.T) { if err != nil { t.Fatal(err) } - if !n.Up { + if !n.Up() { t.Error("node not started") } } @@ -368,7 +368,7 @@ func TestStartStopRandomNode(t *testing.T) { if n == nil { t.Fatal("node not found") } - if n.Up { + if n.Up() { t.Error("node not stopped") } @@ -408,7 +408,7 @@ func TestStartStopRandomNodes(t *testing.T) { if n == nil { t.Fatal("node not found") } - if n.Up { + if n.Up() { t.Error("node not stopped") } } @@ -425,7 +425,7 @@ func TestStartStopRandomNodes(t *testing.T) { if n == nil { t.Fatal("node not found") } - if !n.Up { + if !n.Up() { t.Error("node not started") } } diff --git a/swarm/network/simulation/service.go b/swarm/network/simulation/service.go index 7dd4dc6d82..0ac8149a94 100644 --- a/swarm/network/simulation/service.go +++ b/swarm/network/simulation/service.go @@ -52,7 +52,7 @@ func (s *Simulation) Services(name string) (services map[enode.ID]node.Service) nodes := s.Net.GetNodes() services = make(map[enode.ID]node.Service) for _, node := range nodes { - if !node.Up { + if !node.Up() { continue } simNode, ok := node.Node.(*adapters.SimNode) diff --git a/swarm/network/simulation/simulation_test.go b/swarm/network/simulation/simulation_test.go index f837f93824..1d0338f593 100644 --- a/swarm/network/simulation/simulation_test.go +++ b/swarm/network/simulation/simulation_test.go @@ -124,7 +124,7 @@ func TestClose(t *testing.T) { var upNodeCount int for _, n := range sim.Net.GetNodes() { - if n.Up { + if n.Up() { upNodeCount++ } } @@ -140,7 +140,7 @@ func TestClose(t *testing.T) { upNodeCount = 0 for _, n := range sim.Net.GetNodes() { - if n.Up { + if n.Up() { upNodeCount++ } } diff --git a/swarm/network/simulations/overlay_test.go b/swarm/network/simulations/overlay_test.go index e56b646503..0667188613 100644 --- a/swarm/network/simulations/overlay_test.go +++ b/swarm/network/simulations/overlay_test.go @@ -178,7 +178,7 @@ func watchSimEvents(net *simulations.Network, ctx context.Context, trigger chan case ev := <-events: //only catch node up events if ev.Type == simulations.EventTypeNode { - if ev.Node.Up { + if ev.Node.Up() { log.Debug("got node up event", "event", ev, "node", ev.Node.Config.ID) select { case trigger <- ev.Node.Config.ID: