progress.go 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. // Copyright 2015 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package raft
  15. import "fmt"
  16. const (
  17. ProgressStateProbe ProgressStateType = iota
  18. ProgressStateReplicate
  19. ProgressStateSnapshot
  20. )
  21. type ProgressStateType uint64
  22. var prstmap = [...]string{
  23. "ProgressStateProbe",
  24. "ProgressStateReplicate",
  25. "ProgressStateSnapshot",
  26. }
  27. func (st ProgressStateType) String() string { return prstmap[uint64(st)] }
  28. // Progress represents a follower’s progress in the view of the leader. Leader maintains
  29. // progresses of all followers, and sends entries to the follower based on its progress.
  30. type Progress struct {
  31. Match, Next uint64
  32. // State defines how the leader should interact with the follower.
  33. //
  34. // When in ProgressStateProbe, leader sends at most one replication message
  35. // per heartbeat interval. It also probes actual progress of the follower.
  36. //
  37. // When in ProgressStateReplicate, leader optimistically increases next
  38. // to the latest entry sent after sending replication message. This is
  39. // an optimized state for fast replicating log entries to the follower.
  40. //
  41. // When in ProgressStateSnapshot, leader should have sent out snapshot
  42. // before and stops sending any replication message.
  43. State ProgressStateType
  44. // Paused is used in ProgressStateProbe.
  45. // When Paused is true, raft should pause sending replication message to this peer.
  46. Paused bool
  47. // PendingSnapshot is used in ProgressStateSnapshot.
  48. // If there is a pending snapshot, the pendingSnapshot will be set to the
  49. // index of the snapshot. If pendingSnapshot is set, the replication process of
  50. // this Progress will be paused. raft will not resend snapshot until the pending one
  51. // is reported to be failed.
  52. PendingSnapshot uint64
  53. // RecentActive is true if the progress is recently active. Receiving any messages
  54. // from the corresponding follower indicates the progress is active.
  55. // RecentActive can be reset to false after an election timeout.
  56. RecentActive bool
  57. // inflights is a sliding window for the inflight messages.
  58. // Each inflight message contains one or more log entries.
  59. // The max number of entries per message is defined in raft config as MaxSizePerMsg.
  60. // Thus inflight effectively limits both the number of inflight messages
  61. // and the bandwidth each Progress can use.
  62. // When inflights is full, no more message should be sent.
  63. // When a leader sends out a message, the index of the last
  64. // entry should be added to inflights. The index MUST be added
  65. // into inflights in order.
  66. // When a leader receives a reply, the previous inflights should
  67. // be freed by calling inflights.freeTo with the index of the last
  68. // received entry.
  69. ins *inflights
  70. // IsLearner is true if this progress is tracked for a learner.
  71. IsLearner bool
  72. }
  73. func (pr *Progress) resetState(state ProgressStateType) {
  74. pr.Paused = false
  75. pr.PendingSnapshot = 0
  76. pr.State = state
  77. pr.ins.reset()
  78. }
  79. func (pr *Progress) becomeProbe() {
  80. // If the original state is ProgressStateSnapshot, progress knows that
  81. // the pending snapshot has been sent to this peer successfully, then
  82. // probes from pendingSnapshot + 1.
  83. if pr.State == ProgressStateSnapshot {
  84. pendingSnapshot := pr.PendingSnapshot
  85. pr.resetState(ProgressStateProbe)
  86. pr.Next = max(pr.Match+1, pendingSnapshot+1)
  87. } else {
  88. pr.resetState(ProgressStateProbe)
  89. pr.Next = pr.Match + 1
  90. }
  91. }
  92. func (pr *Progress) becomeReplicate() {
  93. pr.resetState(ProgressStateReplicate)
  94. pr.Next = pr.Match + 1
  95. }
  96. func (pr *Progress) becomeSnapshot(snapshoti uint64) {
  97. pr.resetState(ProgressStateSnapshot)
  98. pr.PendingSnapshot = snapshoti
  99. }
  100. // maybeUpdate returns false if the given n index comes from an outdated message.
  101. // Otherwise it updates the progress and returns true.
  102. func (pr *Progress) maybeUpdate(n uint64) bool {
  103. var updated bool
  104. if pr.Match < n {
  105. pr.Match = n
  106. updated = true
  107. pr.resume()
  108. }
  109. if pr.Next < n+1 {
  110. pr.Next = n + 1
  111. }
  112. return updated
  113. }
  114. func (pr *Progress) optimisticUpdate(n uint64) { pr.Next = n + 1 }
  115. // maybeDecrTo returns false if the given to index comes from an out of order message.
  116. // Otherwise it decreases the progress next index to min(rejected, last) and returns true.
  117. func (pr *Progress) maybeDecrTo(rejected, last uint64) bool {
  118. if pr.State == ProgressStateReplicate {
  119. // the rejection must be stale if the progress has matched and "rejected"
  120. // is smaller than "match".
  121. if rejected <= pr.Match {
  122. return false
  123. }
  124. // directly decrease next to match + 1
  125. pr.Next = pr.Match + 1
  126. return true
  127. }
  128. // the rejection must be stale if "rejected" does not match next - 1
  129. if pr.Next-1 != rejected {
  130. return false
  131. }
  132. if pr.Next = min(rejected, last+1); pr.Next < 1 {
  133. pr.Next = 1
  134. }
  135. pr.resume()
  136. return true
  137. }
  138. func (pr *Progress) pause() { pr.Paused = true }
  139. func (pr *Progress) resume() { pr.Paused = false }
  140. // IsPaused returns whether sending log entries to this node has been
  141. // paused. A node may be paused because it has rejected recent
  142. // MsgApps, is currently waiting for a snapshot, or has reached the
  143. // MaxInflightMsgs limit.
  144. func (pr *Progress) IsPaused() bool {
  145. switch pr.State {
  146. case ProgressStateProbe:
  147. return pr.Paused
  148. case ProgressStateReplicate:
  149. return pr.ins.full()
  150. case ProgressStateSnapshot:
  151. return true
  152. default:
  153. panic("unexpected state")
  154. }
  155. }
  156. func (pr *Progress) snapshotFailure() { pr.PendingSnapshot = 0 }
  157. // needSnapshotAbort returns true if snapshot progress's Match
  158. // is equal or higher than the pendingSnapshot.
  159. func (pr *Progress) needSnapshotAbort() bool {
  160. return pr.State == ProgressStateSnapshot && pr.Match >= pr.PendingSnapshot
  161. }
  162. func (pr *Progress) String() string {
  163. return fmt.Sprintf("next = %d, match = %d, state = %s, waiting = %v, pendingSnapshot = %d", pr.Next, pr.Match, pr.State, pr.IsPaused(), pr.PendingSnapshot)
  164. }
  165. type inflights struct {
  166. // the starting index in the buffer
  167. start int
  168. // number of inflights in the buffer
  169. count int
  170. // the size of the buffer
  171. size int
  172. // buffer contains the index of the last entry
  173. // inside one message.
  174. buffer []uint64
  175. }
  176. func newInflights(size int) *inflights {
  177. return &inflights{
  178. size: size,
  179. }
  180. }
  181. // add adds an inflight into inflights
  182. func (in *inflights) add(inflight uint64) {
  183. if in.full() {
  184. panic("cannot add into a full inflights")
  185. }
  186. next := in.start + in.count
  187. size := in.size
  188. if next >= size {
  189. next -= size
  190. }
  191. if next >= len(in.buffer) {
  192. in.growBuf()
  193. }
  194. in.buffer[next] = inflight
  195. in.count++
  196. }
  197. // grow the inflight buffer by doubling up to inflights.size. We grow on demand
  198. // instead of preallocating to inflights.size to handle systems which have
  199. // thousands of Raft groups per process.
  200. func (in *inflights) growBuf() {
  201. newSize := len(in.buffer) * 2
  202. if newSize == 0 {
  203. newSize = 1
  204. } else if newSize > in.size {
  205. newSize = in.size
  206. }
  207. newBuffer := make([]uint64, newSize)
  208. copy(newBuffer, in.buffer)
  209. in.buffer = newBuffer
  210. }
  211. // freeTo frees the inflights smaller or equal to the given `to` flight.
  212. func (in *inflights) freeTo(to uint64) {
  213. if in.count == 0 || to < in.buffer[in.start] {
  214. // out of the left side of the window
  215. return
  216. }
  217. idx := in.start
  218. var i int
  219. for i = 0; i < in.count; i++ {
  220. if to < in.buffer[idx] { // found the first large inflight
  221. break
  222. }
  223. // increase index and maybe rotate
  224. size := in.size
  225. if idx++; idx >= size {
  226. idx -= size
  227. }
  228. }
  229. // free i inflights and set new start index
  230. in.count -= i
  231. in.start = idx
  232. if in.count == 0 {
  233. // inflights is empty, reset the start index so that we don't grow the
  234. // buffer unnecessarily.
  235. in.start = 0
  236. }
  237. }
  238. func (in *inflights) freeFirstOne() { in.freeTo(in.buffer[in.start]) }
  239. // full returns true if the inflights is full.
  240. func (in *inflights) full() bool {
  241. return in.count == in.size
  242. }
  243. // resets frees all inflights.
  244. func (in *inflights) reset() {
  245. in.count = 0
  246. in.start = 0
  247. }