InitAndStartNode gets called after having at least one membership sync with the cluster.
()
| 1939 | |
| 1940 | // InitAndStartNode gets called after having at least one membership sync with the cluster. |
| 1941 | func (n *node) InitAndStartNode() { |
| 1942 | x.Check(initProposalKey(n.Id)) |
| 1943 | _, restart, err := n.PastLife() |
| 1944 | x.Check(err) |
| 1945 | |
| 1946 | _, hasPeer := groups().MyPeer() |
| 1947 | if !restart && hasPeer { |
| 1948 | // The node has other peers, it might have crashed after joining the cluster and before |
| 1949 | // writing a snapshot. Check from leader, if it is part of the cluster. Consider this a |
| 1950 | // restart if it is part of the cluster, else start a new node. |
| 1951 | for { |
| 1952 | if restart, err = n.isMember(); err == nil { |
| 1953 | break |
| 1954 | } |
| 1955 | glog.Errorf("Error while calling hasPeer: %v. Retrying...\n", err) |
| 1956 | time.Sleep(time.Second) |
| 1957 | } |
| 1958 | } |
| 1959 | |
| 1960 | if n.RaftContext.IsLearner && !hasPeer { |
| 1961 | glog.Fatal("Cannot start a learner node without peer alpha nodes") |
| 1962 | } |
| 1963 | |
| 1964 | if restart { |
| 1965 | glog.Infof("Restarting node for group: %d\n", n.gid) |
| 1966 | sp, err := n.Store.Snapshot() |
| 1967 | x.Checkf(err, "Unable to get existing snapshot") |
| 1968 | if !raft.IsEmptySnap(sp) { |
| 1969 | // It is important that we pick up the conf state here. |
| 1970 | // Otherwise, we'll lose the store conf state, and it would get |
| 1971 | // overwritten with an empty state when a new snapshot is taken. |
| 1972 | // This causes a node to just hang on restart, because it finds a |
| 1973 | // zero-member Raft group. |
| 1974 | n.SetConfState(&sp.Metadata.ConfState) |
| 1975 | |
| 1976 | // TODO: Making connections here seems unnecessary, evaluate. |
| 1977 | members := groups().members(n.gid) |
| 1978 | for _, id := range sp.Metadata.ConfState.Voters { |
| 1979 | m, ok := members[id] |
| 1980 | if ok { |
| 1981 | n.Connect(id, m.Addr) |
| 1982 | } |
| 1983 | } |
| 1984 | for _, id := range sp.Metadata.ConfState.Learners { |
| 1985 | m, ok := members[id] |
| 1986 | if ok { |
| 1987 | n.Connect(id, m.Addr) |
| 1988 | } |
| 1989 | } |
| 1990 | } |
| 1991 | n.SetRaft(raft.RestartNode(n.Cfg)) |
| 1992 | glog.V(2).Infoln("Restart node complete") |
| 1993 | |
| 1994 | } else { |
| 1995 | glog.Infof("New Node for group: %d\n", n.gid) |
| 1996 | if _, hasPeer := groups().MyPeer(); hasPeer { |
| 1997 | // Get snapshot before joining peers as it can take time to retrieve it and we dont |
| 1998 | // want the quorum to be inactive when it happens. |
no test coverage detected