decoder.go 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. // Copyright 2015 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package wal
  15. import (
  16. "bufio"
  17. "encoding/binary"
  18. "hash"
  19. "io"
  20. "sync"
  21. "github.com/coreos/etcd/pkg/crc"
  22. "github.com/coreos/etcd/pkg/pbutil"
  23. "github.com/coreos/etcd/raft/raftpb"
  24. "github.com/coreos/etcd/wal/walpb"
  25. )
  26. const minSectorSize = 512
  27. // frameSizeBytes is frame size in bytes, including record size and padding size.
  28. const frameSizeBytes = 8
  29. type decoder struct {
  30. mu sync.Mutex
  31. brs []*bufio.Reader
  32. // lastValidOff file offset following the last valid decoded record
  33. lastValidOff int64
  34. crc hash.Hash32
  35. }
  36. func newDecoder(r ...io.Reader) *decoder {
  37. readers := make([]*bufio.Reader, len(r))
  38. for i := range r {
  39. readers[i] = bufio.NewReader(r[i])
  40. }
  41. return &decoder{
  42. brs: readers,
  43. crc: crc.New(0, crcTable),
  44. }
  45. }
  46. func (d *decoder) decode(rec *walpb.Record) error {
  47. rec.Reset()
  48. d.mu.Lock()
  49. defer d.mu.Unlock()
  50. return d.decodeRecord(rec)
  51. }
  52. func (d *decoder) decodeRecord(rec *walpb.Record) error {
  53. if len(d.brs) == 0 {
  54. return io.EOF
  55. }
  56. l, err := readInt64(d.brs[0])
  57. if err == io.EOF || (err == nil && l == 0) {
  58. // hit end of file or preallocated space
  59. d.brs = d.brs[1:]
  60. if len(d.brs) == 0 {
  61. return io.EOF
  62. }
  63. d.lastValidOff = 0
  64. return d.decodeRecord(rec)
  65. }
  66. if err != nil {
  67. return err
  68. }
  69. recBytes, padBytes := decodeFrameSize(l)
  70. data := make([]byte, recBytes+padBytes)
  71. if _, err = io.ReadFull(d.brs[0], data); err != nil {
  72. // ReadFull returns io.EOF only if no bytes were read
  73. // the decoder should treat this as an ErrUnexpectedEOF instead.
  74. if err == io.EOF {
  75. err = io.ErrUnexpectedEOF
  76. }
  77. return err
  78. }
  79. if err := rec.Unmarshal(data[:recBytes]); err != nil {
  80. if d.isTornEntry(data) {
  81. return io.ErrUnexpectedEOF
  82. }
  83. return err
  84. }
  85. // skip crc checking if the record type is crcType
  86. if rec.Type != crcType {
  87. d.crc.Write(rec.Data)
  88. if err := rec.Validate(d.crc.Sum32()); err != nil {
  89. if d.isTornEntry(data) {
  90. return io.ErrUnexpectedEOF
  91. }
  92. return err
  93. }
  94. }
  95. // record decoded as valid; point last valid offset to end of record
  96. d.lastValidOff += frameSizeBytes + recBytes + padBytes
  97. return nil
  98. }
  99. func decodeFrameSize(lenField int64) (recBytes int64, padBytes int64) {
  100. // the record size is stored in the lower 56 bits of the 64-bit length
  101. recBytes = int64(uint64(lenField) & ^(uint64(0xff) << 56))
  102. // non-zero padding is indicated by set MSb / a negative length
  103. if lenField < 0 {
  104. // padding is stored in lower 3 bits of length MSB
  105. padBytes = int64((uint64(lenField) >> 56) & 0x7)
  106. }
  107. return recBytes, padBytes
  108. }
  109. // isTornEntry determines whether the last entry of the WAL was partially written
  110. // and corrupted because of a torn write.
  111. func (d *decoder) isTornEntry(data []byte) bool {
  112. if len(d.brs) != 1 {
  113. return false
  114. }
  115. fileOff := d.lastValidOff + frameSizeBytes
  116. curOff := 0
  117. chunks := [][]byte{}
  118. // split data on sector boundaries
  119. for curOff < len(data) {
  120. chunkLen := int(minSectorSize - (fileOff % minSectorSize))
  121. if chunkLen > len(data)-curOff {
  122. chunkLen = len(data) - curOff
  123. }
  124. chunks = append(chunks, data[curOff:curOff+chunkLen])
  125. fileOff += int64(chunkLen)
  126. curOff += chunkLen
  127. }
  128. // if any data for a sector chunk is all 0, it's a torn write
  129. for _, sect := range chunks {
  130. isZero := true
  131. for _, v := range sect {
  132. if v != 0 {
  133. isZero = false
  134. break
  135. }
  136. }
  137. if isZero {
  138. return true
  139. }
  140. }
  141. return false
  142. }
  143. func (d *decoder) updateCRC(prevCrc uint32) {
  144. d.crc = crc.New(prevCrc, crcTable)
  145. }
  146. func (d *decoder) lastCRC() uint32 {
  147. return d.crc.Sum32()
  148. }
  149. func (d *decoder) lastOffset() int64 { return d.lastValidOff }
  150. func mustUnmarshalEntry(d []byte) raftpb.Entry {
  151. var e raftpb.Entry
  152. pbutil.MustUnmarshal(&e, d)
  153. return e
  154. }
  155. func mustUnmarshalState(d []byte) raftpb.HardState {
  156. var s raftpb.HardState
  157. pbutil.MustUnmarshal(&s, d)
  158. return s
  159. }
  160. func readInt64(r io.Reader) (int64, error) {
  161. var n int64
  162. err := binary.Read(r, binary.LittleEndian, &n)
  163. return n, err
  164. }