| 1301 | const tickDur = 100 * time.Millisecond |
| 1302 | |
| 1303 | func (n *node) Run() { |
| 1304 | defer n.closer.Done() // CLOSER:1 |
| 1305 | |
| 1306 | // lastLead is for detecting leadership changes |
| 1307 | // |
| 1308 | // etcd has a similar mechanism for tracking leader changes, with their |
| 1309 | // raftReadyHandler.getLead() function that returns the previous leader |
| 1310 | lastLead := uint64(math.MaxUint64) |
| 1311 | |
| 1312 | firstRun := true |
| 1313 | var leader bool |
| 1314 | // See also our configuration of HeartbeatTick and ElectionTick. |
| 1315 | // Before we used to have 20ms ticks, but they would overload the Raft tick channel, causing |
| 1316 | // "tick missed to fire" logs. Etcd uses 100ms and they haven't seen those issues. |
| 1317 | // Additionally, using 100ms for ticks does not cause proposals to slow down, because they get |
| 1318 | // sent out asap and don't rely on ticks. So, setting this to 100ms instead of 20ms is a NOOP. |
| 1319 | ticker := time.Tick(tickDur) |
| 1320 | |
| 1321 | done := make(chan struct{}) |
| 1322 | go n.checkpointAndClose(done) |
| 1323 | go n.ReportRaftComms() |
| 1324 | |
| 1325 | if !x.WorkerConfig.HardSync { |
| 1326 | closer := z.NewCloser(2) |
| 1327 | defer closer.SignalAndWait() |
| 1328 | go x.StoreSync(n.Store, closer) |
| 1329 | go x.StoreSync(pstore, closer) |
| 1330 | } |
| 1331 | |
| 1332 | applied, err := n.Store.Checkpoint() |
| 1333 | if err != nil { |
| 1334 | glog.Errorf("While trying to find raft progress: %v", err) |
| 1335 | } else { |
| 1336 | glog.Infof("Found Raft progress: %d", applied) |
| 1337 | } |
| 1338 | |
| 1339 | var timer x.Timer |
| 1340 | for { |
| 1341 | select { |
| 1342 | case <-done: |
| 1343 | // We use done channel here instead of closer.HasBeenClosed so that we can transfer |
| 1344 | // leadership in a goroutine. The push to n.applyCh happens in this loop, so the close |
| 1345 | // should happen here too. Otherwise, race condition between push and close happens. |
| 1346 | close(n.applyCh) |
| 1347 | glog.Infoln("Raft node done.") |
| 1348 | return |
| 1349 | |
| 1350 | // Slow ticker can't be placed here because figuring out checkpoints and snapshots takes |
| 1351 | // time and if the leader does not send heartbeats out during this time, the followers |
| 1352 | // start an election process. And that election process would just continue to happen |
| 1353 | // indefinitely because checkpoints and snapshots are being calculated indefinitely. |
| 1354 | case <-ticker: |
| 1355 | n.Raft().Tick() |
| 1356 | |
| 1357 | case rd := <-n.Raft().Ready(): |
| 1358 | // TODO(Aman): Based on the code here https://github.com/etcd-io/etcd/tree/raft/v3.5.9/raft, |
| 1359 | // n.SaveToStorage should be called first before doing anything else. |
| 1360 | |