reedsolomon.go 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887
  1. /**
  2. * Reed-Solomon Coding over 8-bit values.
  3. *
  4. * Copyright 2015, Klaus Post
  5. * Copyright 2015, Backblaze, Inc.
  6. */
  7. // Package reedsolomon enables Erasure Coding in Go
  8. //
  9. // For usage and examples, see https://github.com/klauspost/reedsolomon
  10. //
  11. package reedsolomon
  12. import (
  13. "bytes"
  14. "errors"
  15. "io"
  16. "runtime"
  17. "sync"
  18. "github.com/klauspost/cpuid"
  19. )
  20. // Encoder is an interface to encode Reed-Salomon parity sets for your data.
  21. type Encoder interface {
  22. // Encode parity for a set of data shards.
  23. // Input is 'shards' containing data shards followed by parity shards.
  24. // The number of shards must match the number given to New().
  25. // Each shard is a byte array, and they must all be the same size.
  26. // The parity shards will always be overwritten and the data shards
  27. // will remain the same, so it is safe for you to read from the
  28. // data shards while this is running.
  29. Encode(shards [][]byte) error
  30. // Verify returns true if the parity shards contain correct data.
  31. // The data is the same format as Encode. No data is modified, so
  32. // you are allowed to read from data while this is running.
  33. Verify(shards [][]byte) (bool, error)
  34. // Reconstruct will recreate the missing shards if possible.
  35. //
  36. // Given a list of shards, some of which contain data, fills in the
  37. // ones that don't have data.
  38. //
  39. // The length of the array must be equal to the total number of shards.
  40. // You indicate that a shard is missing by setting it to nil or zero-length.
  41. // If a shard is zero-length but has sufficient capacity, that memory will
  42. // be used, otherwise a new []byte will be allocated.
  43. //
  44. // If there are too few shards to reconstruct the missing
  45. // ones, ErrTooFewShards will be returned.
  46. //
  47. // The reconstructed shard set is complete, but integrity is not verified.
  48. // Use the Verify function to check if data set is ok.
  49. Reconstruct(shards [][]byte) error
  50. // ReconstructData will recreate any missing data shards, if possible.
  51. //
  52. // Given a list of shards, some of which contain data, fills in the
  53. // data shards that don't have data.
  54. //
  55. // The length of the array must be equal to Shards.
  56. // You indicate that a shard is missing by setting it to nil or zero-length.
  57. // If a shard is zero-length but has sufficient capacity, that memory will
  58. // be used, otherwise a new []byte will be allocated.
  59. //
  60. // If there are too few shards to reconstruct the missing
  61. // ones, ErrTooFewShards will be returned.
  62. //
  63. // As the reconstructed shard set may contain missing parity shards,
  64. // calling the Verify function is likely to fail.
  65. ReconstructData(shards [][]byte) error
  66. // Update parity is use for change a few data shards and update it's parity.
  67. // Input 'newDatashards' containing data shards changed.
  68. // Input 'shards' containing old data shards (if data shard not changed, it can be nil) and old parity shards.
  69. // new parity shards will in shards[DataShards:]
  70. // Update is very useful if DataShards much larger than ParityShards and changed data shards is few. It will
  71. // faster than Encode and not need read all data shards to encode.
  72. Update(shards [][]byte, newDatashards [][]byte) error
  73. // Split a data slice into the number of shards given to the encoder,
  74. // and create empty parity shards.
  75. //
  76. // The data will be split into equally sized shards.
  77. // If the data size isn't dividable by the number of shards,
  78. // the last shard will contain extra zeros.
  79. //
  80. // There must be at least 1 byte otherwise ErrShortData will be
  81. // returned.
  82. //
  83. // The data will not be copied, except for the last shard, so you
  84. // should not modify the data of the input slice afterwards.
  85. Split(data []byte) ([][]byte, error)
  86. // Join the shards and write the data segment to dst.
  87. //
  88. // Only the data shards are considered.
  89. // You must supply the exact output size you want.
  90. // If there are to few shards given, ErrTooFewShards will be returned.
  91. // If the total data size is less than outSize, ErrShortData will be returned.
  92. Join(dst io.Writer, shards [][]byte, outSize int) error
  93. }
  94. // reedSolomon contains a matrix for a specific
  95. // distribution of datashards and parity shards.
  96. // Construct if using New()
  97. type reedSolomon struct {
  98. DataShards int // Number of data shards, should not be modified.
  99. ParityShards int // Number of parity shards, should not be modified.
  100. Shards int // Total number of shards. Calculated, and should not be modified.
  101. m matrix
  102. tree inversionTree
  103. parity [][]byte
  104. o options
  105. }
  106. // ErrInvShardNum will be returned by New, if you attempt to create
  107. // an Encoder where either data or parity shards is zero or less.
  108. var ErrInvShardNum = errors.New("cannot create Encoder with zero or less data/parity shards")
  109. // ErrMaxShardNum will be returned by New, if you attempt to create an
  110. // Encoder where data and parity shards are bigger than the order of
  111. // GF(2^8).
  112. var ErrMaxShardNum = errors.New("cannot create Encoder with more than 256 data+parity shards")
  113. // buildMatrix creates the matrix to use for encoding, given the
  114. // number of data shards and the number of total shards.
  115. //
  116. // The top square of the matrix is guaranteed to be an identity
  117. // matrix, which means that the data shards are unchanged after
  118. // encoding.
  119. func buildMatrix(dataShards, totalShards int) (matrix, error) {
  120. // Start with a Vandermonde matrix. This matrix would work,
  121. // in theory, but doesn't have the property that the data
  122. // shards are unchanged after encoding.
  123. vm, err := vandermonde(totalShards, dataShards)
  124. if err != nil {
  125. return nil, err
  126. }
  127. // Multiply by the inverse of the top square of the matrix.
  128. // This will make the top square be the identity matrix, but
  129. // preserve the property that any square subset of rows is
  130. // invertible.
  131. top, err := vm.SubMatrix(0, 0, dataShards, dataShards)
  132. if err != nil {
  133. return nil, err
  134. }
  135. topInv, err := top.Invert()
  136. if err != nil {
  137. return nil, err
  138. }
  139. return vm.Multiply(topInv)
  140. }
  141. // buildMatrixPAR1 creates the matrix to use for encoding according to
  142. // the PARv1 spec, given the number of data shards and the number of
  143. // total shards. Note that the method they use is buggy, and may lead
  144. // to cases where recovery is impossible, even if there are enough
  145. // parity shards.
  146. //
  147. // The top square of the matrix is guaranteed to be an identity
  148. // matrix, which means that the data shards are unchanged after
  149. // encoding.
  150. func buildMatrixPAR1(dataShards, totalShards int) (matrix, error) {
  151. result, err := newMatrix(totalShards, dataShards)
  152. if err != nil {
  153. return nil, err
  154. }
  155. for r, row := range result {
  156. // The top portion of the matrix is the identity
  157. // matrix, and the bottom is a transposed Vandermonde
  158. // matrix starting at 1 instead of 0.
  159. if r < dataShards {
  160. result[r][r] = 1
  161. } else {
  162. for c := range row {
  163. result[r][c] = galExp(byte(c+1), r-dataShards)
  164. }
  165. }
  166. }
  167. return result, nil
  168. }
  169. func buildMatrixCauchy(dataShards, totalShards int) (matrix, error) {
  170. result, err := newMatrix(totalShards, dataShards)
  171. if err != nil {
  172. return nil, err
  173. }
  174. for r, row := range result {
  175. // The top portion of the matrix is the identity
  176. // matrix, and the bottom is a transposed Cauchy matrix.
  177. if r < dataShards {
  178. result[r][r] = 1
  179. } else {
  180. for c := range row {
  181. result[r][c] = invTable[(byte(r ^ c))]
  182. }
  183. }
  184. }
  185. return result, nil
  186. }
  187. // New creates a new encoder and initializes it to
  188. // the number of data shards and parity shards that
  189. // you want to use. You can reuse this encoder.
  190. // Note that the maximum number of total shards is 256.
  191. // If no options are supplied, default options are used.
  192. func New(dataShards, parityShards int, opts ...Option) (Encoder, error) {
  193. r := reedSolomon{
  194. DataShards: dataShards,
  195. ParityShards: parityShards,
  196. Shards: dataShards + parityShards,
  197. o: defaultOptions,
  198. }
  199. for _, opt := range opts {
  200. opt(&r.o)
  201. }
  202. if dataShards <= 0 || parityShards <= 0 {
  203. return nil, ErrInvShardNum
  204. }
  205. if dataShards+parityShards > 256 {
  206. return nil, ErrMaxShardNum
  207. }
  208. var err error
  209. switch {
  210. case r.o.useCauchy:
  211. r.m, err = buildMatrixCauchy(dataShards, r.Shards)
  212. case r.o.usePAR1Matrix:
  213. r.m, err = buildMatrixPAR1(dataShards, r.Shards)
  214. default:
  215. r.m, err = buildMatrix(dataShards, r.Shards)
  216. }
  217. if err != nil {
  218. return nil, err
  219. }
  220. if r.o.shardSize > 0 {
  221. cacheSize := cpuid.CPU.Cache.L2
  222. if cacheSize <= 0 {
  223. // Set to 128K if undetectable.
  224. cacheSize = 128 << 10
  225. }
  226. p := runtime.NumCPU()
  227. // 1 input + parity must fit in cache, and we add one more to be safer.
  228. shards := 1 + parityShards
  229. g := (r.o.shardSize * shards) / (cacheSize - (cacheSize >> 4))
  230. if cpuid.CPU.ThreadsPerCore > 1 {
  231. // If multiple threads per core, make sure they don't contend for cache.
  232. g *= cpuid.CPU.ThreadsPerCore
  233. }
  234. g *= 2
  235. if g < p {
  236. g = p
  237. }
  238. // Have g be multiple of p
  239. g += p - 1
  240. g -= g % p
  241. r.o.maxGoroutines = g
  242. }
  243. // Inverted matrices are cached in a tree keyed by the indices
  244. // of the invalid rows of the data to reconstruct.
  245. // The inversion root node will have the identity matrix as
  246. // its inversion matrix because it implies there are no errors
  247. // with the original data.
  248. r.tree = newInversionTree(dataShards, parityShards)
  249. r.parity = make([][]byte, parityShards)
  250. for i := range r.parity {
  251. r.parity[i] = r.m[dataShards+i]
  252. }
  253. return &r, err
  254. }
  255. // ErrTooFewShards is returned if too few shards where given to
  256. // Encode/Verify/Reconstruct/Update. It will also be returned from Reconstruct
  257. // if there were too few shards to reconstruct the missing data.
  258. var ErrTooFewShards = errors.New("too few shards given")
  259. // Encodes parity for a set of data shards.
  260. // An array 'shards' containing data shards followed by parity shards.
  261. // The number of shards must match the number given to New.
  262. // Each shard is a byte array, and they must all be the same size.
  263. // The parity shards will always be overwritten and the data shards
  264. // will remain the same.
  265. func (r reedSolomon) Encode(shards [][]byte) error {
  266. if len(shards) != r.Shards {
  267. return ErrTooFewShards
  268. }
  269. err := checkShards(shards, false)
  270. if err != nil {
  271. return err
  272. }
  273. // Get the slice of output buffers.
  274. output := shards[r.DataShards:]
  275. // Do the coding.
  276. r.codeSomeShards(r.parity, shards[0:r.DataShards], output, r.ParityShards, len(shards[0]))
  277. return nil
  278. }
  279. // ErrInvalidInput is returned if invalid input parameter of Update.
  280. var ErrInvalidInput = errors.New("invalid input")
  281. func (r reedSolomon) Update(shards [][]byte, newDatashards [][]byte) error {
  282. if len(shards) != r.Shards {
  283. return ErrTooFewShards
  284. }
  285. if len(newDatashards) != r.DataShards {
  286. return ErrTooFewShards
  287. }
  288. err := checkShards(shards, true)
  289. if err != nil {
  290. return err
  291. }
  292. err = checkShards(newDatashards, true)
  293. if err != nil {
  294. return err
  295. }
  296. for i := range newDatashards {
  297. if newDatashards[i] != nil && shards[i] == nil {
  298. return ErrInvalidInput
  299. }
  300. }
  301. for _, p := range shards[r.DataShards:] {
  302. if p == nil {
  303. return ErrInvalidInput
  304. }
  305. }
  306. shardSize := shardSize(shards)
  307. // Get the slice of output buffers.
  308. output := shards[r.DataShards:]
  309. // Do the coding.
  310. r.updateParityShards(r.parity, shards[0:r.DataShards], newDatashards[0:r.DataShards], output, r.ParityShards, shardSize)
  311. return nil
  312. }
  313. func (r reedSolomon) updateParityShards(matrixRows, oldinputs, newinputs, outputs [][]byte, outputCount, byteCount int) {
  314. if r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize {
  315. r.updateParityShardsP(matrixRows, oldinputs, newinputs, outputs, outputCount, byteCount)
  316. return
  317. }
  318. for c := 0; c < r.DataShards; c++ {
  319. in := newinputs[c]
  320. if in == nil {
  321. continue
  322. }
  323. oldin := oldinputs[c]
  324. // oldinputs data will be change
  325. sliceXor(in, oldin, r.o.useSSE2)
  326. for iRow := 0; iRow < outputCount; iRow++ {
  327. galMulSliceXor(matrixRows[iRow][c], oldin, outputs[iRow], &r.o)
  328. }
  329. }
  330. }
  331. func (r reedSolomon) updateParityShardsP(matrixRows, oldinputs, newinputs, outputs [][]byte, outputCount, byteCount int) {
  332. var wg sync.WaitGroup
  333. do := byteCount / r.o.maxGoroutines
  334. if do < r.o.minSplitSize {
  335. do = r.o.minSplitSize
  336. }
  337. start := 0
  338. for start < byteCount {
  339. if start+do > byteCount {
  340. do = byteCount - start
  341. }
  342. wg.Add(1)
  343. go func(start, stop int) {
  344. for c := 0; c < r.DataShards; c++ {
  345. in := newinputs[c]
  346. if in == nil {
  347. continue
  348. }
  349. oldin := oldinputs[c]
  350. // oldinputs data will be change
  351. sliceXor(in[start:stop], oldin[start:stop], r.o.useSSE2)
  352. for iRow := 0; iRow < outputCount; iRow++ {
  353. galMulSliceXor(matrixRows[iRow][c], oldin[start:stop], outputs[iRow][start:stop], &r.o)
  354. }
  355. }
  356. wg.Done()
  357. }(start, start+do)
  358. start += do
  359. }
  360. wg.Wait()
  361. }
  362. // Verify returns true if the parity shards contain the right data.
  363. // The data is the same format as Encode. No data is modified.
  364. func (r reedSolomon) Verify(shards [][]byte) (bool, error) {
  365. if len(shards) != r.Shards {
  366. return false, ErrTooFewShards
  367. }
  368. err := checkShards(shards, false)
  369. if err != nil {
  370. return false, err
  371. }
  372. // Slice of buffers being checked.
  373. toCheck := shards[r.DataShards:]
  374. // Do the checking.
  375. return r.checkSomeShards(r.parity, shards[0:r.DataShards], toCheck, r.ParityShards, len(shards[0])), nil
  376. }
  377. // Multiplies a subset of rows from a coding matrix by a full set of
  378. // input shards to produce some output shards.
  379. // 'matrixRows' is The rows from the matrix to use.
  380. // 'inputs' An array of byte arrays, each of which is one input shard.
  381. // The number of inputs used is determined by the length of each matrix row.
  382. // outputs Byte arrays where the computed shards are stored.
  383. // The number of outputs computed, and the
  384. // number of matrix rows used, is determined by
  385. // outputCount, which is the number of outputs to compute.
  386. func (r reedSolomon) codeSomeShards(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
  387. if r.o.useAVX512 && len(inputs) >= 4 && len(outputs) >= 2 {
  388. r.codeSomeShardsAvx512(matrixRows, inputs, outputs, outputCount, byteCount)
  389. return
  390. } else if r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize {
  391. r.codeSomeShardsP(matrixRows, inputs, outputs, outputCount, byteCount)
  392. return
  393. }
  394. for c := 0; c < r.DataShards; c++ {
  395. in := inputs[c]
  396. for iRow := 0; iRow < outputCount; iRow++ {
  397. if c == 0 {
  398. galMulSlice(matrixRows[iRow][c], in, outputs[iRow], &r.o)
  399. } else {
  400. galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow], &r.o)
  401. }
  402. }
  403. }
  404. }
  405. // Perform the same as codeSomeShards, but split the workload into
  406. // several goroutines.
  407. func (r reedSolomon) codeSomeShardsP(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
  408. var wg sync.WaitGroup
  409. do := byteCount / r.o.maxGoroutines
  410. if do < r.o.minSplitSize {
  411. do = r.o.minSplitSize
  412. }
  413. // Make sizes divisible by 32
  414. do = (do + 31) & (^31)
  415. start := 0
  416. for start < byteCount {
  417. if start+do > byteCount {
  418. do = byteCount - start
  419. }
  420. wg.Add(1)
  421. go func(start, stop int) {
  422. for c := 0; c < r.DataShards; c++ {
  423. in := inputs[c][start:stop]
  424. for iRow := 0; iRow < outputCount; iRow++ {
  425. if c == 0 {
  426. galMulSlice(matrixRows[iRow][c], in, outputs[iRow][start:stop], &r.o)
  427. } else {
  428. galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow][start:stop], &r.o)
  429. }
  430. }
  431. }
  432. wg.Done()
  433. }(start, start+do)
  434. start += do
  435. }
  436. wg.Wait()
  437. }
  438. // checkSomeShards is mostly the same as codeSomeShards,
  439. // except this will check values and return
  440. // as soon as a difference is found.
  441. func (r reedSolomon) checkSomeShards(matrixRows, inputs, toCheck [][]byte, outputCount, byteCount int) bool {
  442. if r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize {
  443. return r.checkSomeShardsP(matrixRows, inputs, toCheck, outputCount, byteCount)
  444. }
  445. outputs := make([][]byte, len(toCheck))
  446. for i := range outputs {
  447. outputs[i] = make([]byte, byteCount)
  448. }
  449. for c := 0; c < r.DataShards; c++ {
  450. in := inputs[c]
  451. for iRow := 0; iRow < outputCount; iRow++ {
  452. galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow], &r.o)
  453. }
  454. }
  455. for i, calc := range outputs {
  456. if !bytes.Equal(calc, toCheck[i]) {
  457. return false
  458. }
  459. }
  460. return true
  461. }
  462. func (r reedSolomon) checkSomeShardsP(matrixRows, inputs, toCheck [][]byte, outputCount, byteCount int) bool {
  463. same := true
  464. var mu sync.RWMutex // For above
  465. var wg sync.WaitGroup
  466. do := byteCount / r.o.maxGoroutines
  467. if do < r.o.minSplitSize {
  468. do = r.o.minSplitSize
  469. }
  470. // Make sizes divisible by 32
  471. do = (do + 31) & (^31)
  472. start := 0
  473. for start < byteCount {
  474. if start+do > byteCount {
  475. do = byteCount - start
  476. }
  477. wg.Add(1)
  478. go func(start, do int) {
  479. defer wg.Done()
  480. outputs := make([][]byte, len(toCheck))
  481. for i := range outputs {
  482. outputs[i] = make([]byte, do)
  483. }
  484. for c := 0; c < r.DataShards; c++ {
  485. mu.RLock()
  486. if !same {
  487. mu.RUnlock()
  488. return
  489. }
  490. mu.RUnlock()
  491. in := inputs[c][start : start+do]
  492. for iRow := 0; iRow < outputCount; iRow++ {
  493. galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow], &r.o)
  494. }
  495. }
  496. for i, calc := range outputs {
  497. if !bytes.Equal(calc, toCheck[i][start:start+do]) {
  498. mu.Lock()
  499. same = false
  500. mu.Unlock()
  501. return
  502. }
  503. }
  504. }(start, do)
  505. start += do
  506. }
  507. wg.Wait()
  508. return same
  509. }
  510. // ErrShardNoData will be returned if there are no shards,
  511. // or if the length of all shards is zero.
  512. var ErrShardNoData = errors.New("no shard data")
  513. // ErrShardSize is returned if shard length isn't the same for all
  514. // shards.
  515. var ErrShardSize = errors.New("shard sizes do not match")
  516. // checkShards will check if shards are the same size
  517. // or 0, if allowed. An error is returned if this fails.
  518. // An error is also returned if all shards are size 0.
  519. func checkShards(shards [][]byte, nilok bool) error {
  520. size := shardSize(shards)
  521. if size == 0 {
  522. return ErrShardNoData
  523. }
  524. for _, shard := range shards {
  525. if len(shard) != size {
  526. if len(shard) != 0 || !nilok {
  527. return ErrShardSize
  528. }
  529. }
  530. }
  531. return nil
  532. }
  533. // shardSize return the size of a single shard.
  534. // The first non-zero size is returned,
  535. // or 0 if all shards are size 0.
  536. func shardSize(shards [][]byte) int {
  537. for _, shard := range shards {
  538. if len(shard) != 0 {
  539. return len(shard)
  540. }
  541. }
  542. return 0
  543. }
  544. // Reconstruct will recreate the missing shards, if possible.
  545. //
  546. // Given a list of shards, some of which contain data, fills in the
  547. // ones that don't have data.
  548. //
  549. // The length of the array must be equal to Shards.
  550. // You indicate that a shard is missing by setting it to nil or zero-length.
  551. // If a shard is zero-length but has sufficient capacity, that memory will
  552. // be used, otherwise a new []byte will be allocated.
  553. //
  554. // If there are too few shards to reconstruct the missing
  555. // ones, ErrTooFewShards will be returned.
  556. //
  557. // The reconstructed shard set is complete, but integrity is not verified.
  558. // Use the Verify function to check if data set is ok.
  559. func (r reedSolomon) Reconstruct(shards [][]byte) error {
  560. return r.reconstruct(shards, false)
  561. }
  562. // ReconstructData will recreate any missing data shards, if possible.
  563. //
  564. // Given a list of shards, some of which contain data, fills in the
  565. // data shards that don't have data.
  566. //
  567. // The length of the array must be equal to Shards.
  568. // You indicate that a shard is missing by setting it to nil or zero-length.
  569. // If a shard is zero-length but has sufficient capacity, that memory will
  570. // be used, otherwise a new []byte will be allocated.
  571. //
  572. // If there are too few shards to reconstruct the missing
  573. // ones, ErrTooFewShards will be returned.
  574. //
  575. // As the reconstructed shard set may contain missing parity shards,
  576. // calling the Verify function is likely to fail.
  577. func (r reedSolomon) ReconstructData(shards [][]byte) error {
  578. return r.reconstruct(shards, true)
  579. }
  580. // reconstruct will recreate the missing data shards, and unless
  581. // dataOnly is true, also the missing parity shards
  582. //
  583. // The length of the array must be equal to Shards.
  584. // You indicate that a shard is missing by setting it to nil.
  585. //
  586. // If there are too few shards to reconstruct the missing
  587. // ones, ErrTooFewShards will be returned.
  588. func (r reedSolomon) reconstruct(shards [][]byte, dataOnly bool) error {
  589. if len(shards) != r.Shards {
  590. return ErrTooFewShards
  591. }
  592. // Check arguments.
  593. err := checkShards(shards, true)
  594. if err != nil {
  595. return err
  596. }
  597. shardSize := shardSize(shards)
  598. // Quick check: are all of the shards present? If so, there's
  599. // nothing to do.
  600. numberPresent := 0
  601. for i := 0; i < r.Shards; i++ {
  602. if len(shards[i]) != 0 {
  603. numberPresent++
  604. }
  605. }
  606. if numberPresent == r.Shards {
  607. // Cool. All of the shards data data. We don't
  608. // need to do anything.
  609. return nil
  610. }
  611. // More complete sanity check
  612. if numberPresent < r.DataShards {
  613. return ErrTooFewShards
  614. }
  615. // Pull out an array holding just the shards that
  616. // correspond to the rows of the submatrix. These shards
  617. // will be the input to the decoding process that re-creates
  618. // the missing data shards.
  619. //
  620. // Also, create an array of indices of the valid rows we do have
  621. // and the invalid rows we don't have up until we have enough valid rows.
  622. subShards := make([][]byte, r.DataShards)
  623. validIndices := make([]int, r.DataShards)
  624. invalidIndices := make([]int, 0)
  625. subMatrixRow := 0
  626. for matrixRow := 0; matrixRow < r.Shards && subMatrixRow < r.DataShards; matrixRow++ {
  627. if len(shards[matrixRow]) != 0 {
  628. subShards[subMatrixRow] = shards[matrixRow]
  629. validIndices[subMatrixRow] = matrixRow
  630. subMatrixRow++
  631. } else {
  632. invalidIndices = append(invalidIndices, matrixRow)
  633. }
  634. }
  635. // Attempt to get the cached inverted matrix out of the tree
  636. // based on the indices of the invalid rows.
  637. dataDecodeMatrix := r.tree.GetInvertedMatrix(invalidIndices)
  638. // If the inverted matrix isn't cached in the tree yet we must
  639. // construct it ourselves and insert it into the tree for the
  640. // future. In this way the inversion tree is lazily loaded.
  641. if dataDecodeMatrix == nil {
  642. // Pull out the rows of the matrix that correspond to the
  643. // shards that we have and build a square matrix. This
  644. // matrix could be used to generate the shards that we have
  645. // from the original data.
  646. subMatrix, _ := newMatrix(r.DataShards, r.DataShards)
  647. for subMatrixRow, validIndex := range validIndices {
  648. for c := 0; c < r.DataShards; c++ {
  649. subMatrix[subMatrixRow][c] = r.m[validIndex][c]
  650. }
  651. }
  652. // Invert the matrix, so we can go from the encoded shards
  653. // back to the original data. Then pull out the row that
  654. // generates the shard that we want to decode. Note that
  655. // since this matrix maps back to the original data, it can
  656. // be used to create a data shard, but not a parity shard.
  657. dataDecodeMatrix, err = subMatrix.Invert()
  658. if err != nil {
  659. return err
  660. }
  661. // Cache the inverted matrix in the tree for future use keyed on the
  662. // indices of the invalid rows.
  663. err = r.tree.InsertInvertedMatrix(invalidIndices, dataDecodeMatrix, r.Shards)
  664. if err != nil {
  665. return err
  666. }
  667. }
  668. // Re-create any data shards that were missing.
  669. //
  670. // The input to the coding is all of the shards we actually
  671. // have, and the output is the missing data shards. The computation
  672. // is done using the special decode matrix we just built.
  673. outputs := make([][]byte, r.ParityShards)
  674. matrixRows := make([][]byte, r.ParityShards)
  675. outputCount := 0
  676. for iShard := 0; iShard < r.DataShards; iShard++ {
  677. if len(shards[iShard]) == 0 {
  678. if cap(shards[iShard]) >= shardSize {
  679. shards[iShard] = shards[iShard][0:shardSize]
  680. } else {
  681. shards[iShard] = make([]byte, shardSize)
  682. }
  683. outputs[outputCount] = shards[iShard]
  684. matrixRows[outputCount] = dataDecodeMatrix[iShard]
  685. outputCount++
  686. }
  687. }
  688. r.codeSomeShards(matrixRows, subShards, outputs[:outputCount], outputCount, shardSize)
  689. if dataOnly {
  690. // Exit out early if we are only interested in the data shards
  691. return nil
  692. }
  693. // Now that we have all of the data shards intact, we can
  694. // compute any of the parity that is missing.
  695. //
  696. // The input to the coding is ALL of the data shards, including
  697. // any that we just calculated. The output is whichever of the
  698. // data shards were missing.
  699. outputCount = 0
  700. for iShard := r.DataShards; iShard < r.Shards; iShard++ {
  701. if len(shards[iShard]) == 0 {
  702. if cap(shards[iShard]) >= shardSize {
  703. shards[iShard] = shards[iShard][0:shardSize]
  704. } else {
  705. shards[iShard] = make([]byte, shardSize)
  706. }
  707. outputs[outputCount] = shards[iShard]
  708. matrixRows[outputCount] = r.parity[iShard-r.DataShards]
  709. outputCount++
  710. }
  711. }
  712. r.codeSomeShards(matrixRows, shards[:r.DataShards], outputs[:outputCount], outputCount, shardSize)
  713. return nil
  714. }
  715. // ErrShortData will be returned by Split(), if there isn't enough data
  716. // to fill the number of shards.
  717. var ErrShortData = errors.New("not enough data to fill the number of requested shards")
  718. // Split a data slice into the number of shards given to the encoder,
  719. // and create empty parity shards if necessary.
  720. //
  721. // The data will be split into equally sized shards.
  722. // If the data size isn't divisible by the number of shards,
  723. // the last shard will contain extra zeros.
  724. //
  725. // There must be at least 1 byte otherwise ErrShortData will be
  726. // returned.
  727. //
  728. // The data will not be copied, except for the last shard, so you
  729. // should not modify the data of the input slice afterwards.
  730. func (r reedSolomon) Split(data []byte) ([][]byte, error) {
  731. if len(data) == 0 {
  732. return nil, ErrShortData
  733. }
  734. // Calculate number of bytes per data shard.
  735. perShard := (len(data) + r.DataShards - 1) / r.DataShards
  736. if cap(data) > len(data) {
  737. data = data[:cap(data)]
  738. }
  739. // Only allocate memory if necessary
  740. if len(data) < (r.Shards * perShard) {
  741. // Pad data to r.Shards*perShard.
  742. padding := make([]byte, (r.Shards*perShard)-len(data))
  743. data = append(data, padding...)
  744. }
  745. // Split into equal-length shards.
  746. dst := make([][]byte, r.Shards)
  747. for i := range dst {
  748. dst[i] = data[:perShard]
  749. data = data[perShard:]
  750. }
  751. return dst, nil
  752. }
  753. // ErrReconstructRequired is returned if too few data shards are intact and a
  754. // reconstruction is required before you can successfully join the shards.
  755. var ErrReconstructRequired = errors.New("reconstruction required as one or more required data shards are nil")
  756. // Join the shards and write the data segment to dst.
  757. //
  758. // Only the data shards are considered.
  759. // You must supply the exact output size you want.
  760. //
  761. // If there are to few shards given, ErrTooFewShards will be returned.
  762. // If the total data size is less than outSize, ErrShortData will be returned.
  763. // If one or more required data shards are nil, ErrReconstructRequired will be returned.
  764. func (r reedSolomon) Join(dst io.Writer, shards [][]byte, outSize int) error {
  765. // Do we have enough shards?
  766. if len(shards) < r.DataShards {
  767. return ErrTooFewShards
  768. }
  769. shards = shards[:r.DataShards]
  770. // Do we have enough data?
  771. size := 0
  772. for _, shard := range shards {
  773. if shard == nil {
  774. return ErrReconstructRequired
  775. }
  776. size += len(shard)
  777. // Do we have enough data already?
  778. if size >= outSize {
  779. break
  780. }
  781. }
  782. if size < outSize {
  783. return ErrShortData
  784. }
  785. // Copy data to dst
  786. write := outSize
  787. for _, shard := range shards {
  788. if write < len(shard) {
  789. _, err := dst.Write(shard[:write])
  790. return err
  791. }
  792. n, err := dst.Write(shard)
  793. if err != nil {
  794. return err
  795. }
  796. write -= n
  797. }
  798. return nil
  799. }