ec_encoder.go 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. package erasure_coding
  2. import (
  3. "fmt"
  4. "io"
  5. "os"
  6. "github.com/klauspost/reedsolomon"
  7. "github.com/seaweedfs/seaweedfs/weed/glog"
  8. "github.com/seaweedfs/seaweedfs/weed/storage/idx"
  9. "github.com/seaweedfs/seaweedfs/weed/storage/needle_map"
  10. "github.com/seaweedfs/seaweedfs/weed/storage/types"
  11. "github.com/seaweedfs/seaweedfs/weed/util"
  12. )
  13. const (
  14. DataShardsCount = 10
  15. ParityShardsCount = 4
  16. TotalShardsCount = DataShardsCount + ParityShardsCount
  17. MinTotalDisks = TotalShardsCount/ParityShardsCount + 1
  18. ErasureCodingLargeBlockSize = 1024 * 1024 * 1024 // 1GB
  19. ErasureCodingSmallBlockSize = 1024 * 1024 // 1MB
  20. )
  21. // WriteSortedFileFromIdx generates .ecx file from existing .idx file
  22. // all keys are sorted in ascending order
  23. func WriteSortedFileFromIdx(baseFileName string, ext string) (e error) {
  24. nm, err := readNeedleMap(baseFileName)
  25. if nm != nil {
  26. defer nm.Close()
  27. }
  28. if err != nil {
  29. return fmt.Errorf("readNeedleMap: %w", err)
  30. }
  31. ecxFile, err := os.OpenFile(baseFileName+ext, os.O_TRUNC|os.O_CREATE|os.O_WRONLY, 0644)
  32. if err != nil {
  33. return fmt.Errorf("failed to open ecx file: %w", err)
  34. }
  35. defer ecxFile.Close()
  36. err = nm.AscendingVisit(func(value needle_map.NeedleValue) error {
  37. bytes := value.ToBytes()
  38. _, writeErr := ecxFile.Write(bytes)
  39. return writeErr
  40. })
  41. if err != nil {
  42. return fmt.Errorf("failed to visit idx file: %w", err)
  43. }
  44. return nil
  45. }
  46. // WriteEcFiles generates .ec00 ~ .ec13 files
  47. func WriteEcFiles(baseFileName string) error {
  48. return generateEcFiles(baseFileName, 256*1024, ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize)
  49. }
  50. func RebuildEcFiles(baseFileName string) ([]uint32, error) {
  51. return generateMissingEcFiles(baseFileName, 256*1024, ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize)
  52. }
  53. func ToExt(ecIndex int) string {
  54. return fmt.Sprintf(".ec%02d", ecIndex)
  55. }
  56. func generateEcFiles(baseFileName string, bufferSize int, largeBlockSize int64, smallBlockSize int64) error {
  57. file, err := os.OpenFile(baseFileName+".dat", os.O_RDONLY, 0)
  58. if err != nil {
  59. return fmt.Errorf("failed to open dat file: %w", err)
  60. }
  61. defer file.Close()
  62. fi, err := file.Stat()
  63. if err != nil {
  64. return fmt.Errorf("failed to stat dat file: %w", err)
  65. }
  66. glog.V(0).Infof("encodeDatFile %s.dat size:%d", baseFileName, fi.Size())
  67. err = encodeDatFile(fi.Size(), baseFileName, bufferSize, largeBlockSize, file, smallBlockSize)
  68. if err != nil {
  69. return fmt.Errorf("encodeDatFile: %w", err)
  70. }
  71. return nil
  72. }
  73. func generateMissingEcFiles(baseFileName string, bufferSize int, largeBlockSize int64, smallBlockSize int64) (generatedShardIds []uint32, err error) {
  74. shardHasData := make([]bool, TotalShardsCount)
  75. inputFiles := make([]*os.File, TotalShardsCount)
  76. outputFiles := make([]*os.File, TotalShardsCount)
  77. for shardId := 0; shardId < TotalShardsCount; shardId++ {
  78. shardFileName := baseFileName + ToExt(shardId)
  79. if util.FileExists(shardFileName) {
  80. shardHasData[shardId] = true
  81. inputFiles[shardId], err = os.OpenFile(shardFileName, os.O_RDONLY, 0)
  82. if err != nil {
  83. return nil, err
  84. }
  85. defer inputFiles[shardId].Close()
  86. } else {
  87. outputFiles[shardId], err = os.OpenFile(shardFileName, os.O_TRUNC|os.O_WRONLY|os.O_CREATE, 0644)
  88. if err != nil {
  89. return nil, err
  90. }
  91. defer outputFiles[shardId].Close()
  92. generatedShardIds = append(generatedShardIds, uint32(shardId))
  93. }
  94. }
  95. err = rebuildEcFiles(shardHasData, inputFiles, outputFiles)
  96. if err != nil {
  97. return nil, fmt.Errorf("rebuildEcFiles: %w", err)
  98. }
  99. return
  100. }
  101. func encodeData(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize int64, buffers [][]byte, outputs []*os.File) error {
  102. bufferSize := int64(len(buffers[0]))
  103. if bufferSize == 0 {
  104. glog.Fatal("unexpected zero buffer size")
  105. }
  106. batchCount := blockSize / bufferSize
  107. if blockSize%bufferSize != 0 {
  108. glog.Fatalf("unexpected block size %d buffer size %d", blockSize, bufferSize)
  109. }
  110. for b := int64(0); b < batchCount; b++ {
  111. err := encodeDataOneBatch(file, enc, startOffset+b*bufferSize, blockSize, buffers, outputs)
  112. if err != nil {
  113. return err
  114. }
  115. }
  116. return nil
  117. }
  118. func openEcFiles(baseFileName string, forRead bool) (files []*os.File, err error) {
  119. for i := 0; i < TotalShardsCount; i++ {
  120. fname := baseFileName + ToExt(i)
  121. openOption := os.O_TRUNC | os.O_CREATE | os.O_WRONLY
  122. if forRead {
  123. openOption = os.O_RDONLY
  124. }
  125. f, err := os.OpenFile(fname, openOption, 0644)
  126. if err != nil {
  127. return files, fmt.Errorf("failed to open file %s: %v", fname, err)
  128. }
  129. files = append(files, f)
  130. }
  131. return
  132. }
  133. func closeEcFiles(files []*os.File) {
  134. for _, f := range files {
  135. if f != nil {
  136. f.Close()
  137. }
  138. }
  139. }
  140. func encodeDataOneBatch(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize int64, buffers [][]byte, outputs []*os.File) error {
  141. // read data into buffers
  142. for i := 0; i < DataShardsCount; i++ {
  143. n, err := file.ReadAt(buffers[i], startOffset+blockSize*int64(i))
  144. if err != nil {
  145. if err != io.EOF {
  146. return err
  147. }
  148. }
  149. if n < len(buffers[i]) {
  150. for t := len(buffers[i]) - 1; t >= n; t-- {
  151. buffers[i][t] = 0
  152. }
  153. }
  154. }
  155. err := enc.Encode(buffers)
  156. if err != nil {
  157. return err
  158. }
  159. for i := 0; i < TotalShardsCount; i++ {
  160. _, err := outputs[i].Write(buffers[i])
  161. if err != nil {
  162. return err
  163. }
  164. }
  165. return nil
  166. }
  167. func encodeDatFile(remainingSize int64, baseFileName string, bufferSize int, largeBlockSize int64, file *os.File, smallBlockSize int64) error {
  168. var processedSize int64
  169. enc, err := reedsolomon.New(DataShardsCount, ParityShardsCount)
  170. if err != nil {
  171. return fmt.Errorf("failed to create encoder: %w", err)
  172. }
  173. buffers := make([][]byte, TotalShardsCount)
  174. for i := range buffers {
  175. buffers[i] = make([]byte, bufferSize)
  176. }
  177. outputs, err := openEcFiles(baseFileName, false)
  178. defer closeEcFiles(outputs)
  179. if err != nil {
  180. return fmt.Errorf("failed to open ec files %s: %v", baseFileName, err)
  181. }
  182. for remainingSize > largeBlockSize*DataShardsCount {
  183. err = encodeData(file, enc, processedSize, largeBlockSize, buffers, outputs)
  184. if err != nil {
  185. return fmt.Errorf("failed to encode large chunk data: %w", err)
  186. }
  187. remainingSize -= largeBlockSize * DataShardsCount
  188. processedSize += largeBlockSize * DataShardsCount
  189. }
  190. for remainingSize > 0 {
  191. err = encodeData(file, enc, processedSize, smallBlockSize, buffers, outputs)
  192. if err != nil {
  193. return fmt.Errorf("failed to encode small chunk data: %w", err)
  194. }
  195. remainingSize -= smallBlockSize * DataShardsCount
  196. processedSize += smallBlockSize * DataShardsCount
  197. }
  198. return nil
  199. }
  200. func rebuildEcFiles(shardHasData []bool, inputFiles []*os.File, outputFiles []*os.File) error {
  201. enc, err := reedsolomon.New(DataShardsCount, ParityShardsCount)
  202. if err != nil {
  203. return fmt.Errorf("failed to create encoder: %w", err)
  204. }
  205. buffers := make([][]byte, TotalShardsCount)
  206. for i := range buffers {
  207. if shardHasData[i] {
  208. buffers[i] = make([]byte, ErasureCodingSmallBlockSize)
  209. }
  210. }
  211. var startOffset int64
  212. var inputBufferDataSize int
  213. for {
  214. // read the input data from files
  215. for i := 0; i < TotalShardsCount; i++ {
  216. if shardHasData[i] {
  217. n, _ := inputFiles[i].ReadAt(buffers[i], startOffset)
  218. if n == 0 {
  219. return nil
  220. }
  221. if inputBufferDataSize == 0 {
  222. inputBufferDataSize = n
  223. }
  224. if inputBufferDataSize != n {
  225. return fmt.Errorf("ec shard size expected %d actual %d", inputBufferDataSize, n)
  226. }
  227. } else {
  228. buffers[i] = nil
  229. }
  230. }
  231. // encode the data
  232. err = enc.Reconstruct(buffers)
  233. if err != nil {
  234. return fmt.Errorf("reconstruct: %w", err)
  235. }
  236. // write the data to output files
  237. for i := 0; i < TotalShardsCount; i++ {
  238. if !shardHasData[i] {
  239. n, _ := outputFiles[i].WriteAt(buffers[i][:inputBufferDataSize], startOffset)
  240. if inputBufferDataSize != n {
  241. return fmt.Errorf("fail to write to %s", outputFiles[i].Name())
  242. }
  243. }
  244. }
  245. startOffset += int64(inputBufferDataSize)
  246. }
  247. }
  248. func readNeedleMap(baseFileName string) (*needle_map.MemDb, error) {
  249. indexFile, err := os.OpenFile(baseFileName+".idx", os.O_RDONLY, 0644)
  250. if err != nil {
  251. return nil, fmt.Errorf("cannot read Volume Index %s.idx: %v", baseFileName, err)
  252. }
  253. defer indexFile.Close()
  254. cm := needle_map.NewMemDb()
  255. err = idx.WalkIndexFile(indexFile, 0, func(key types.NeedleId, offset types.Offset, size types.Size) error {
  256. if !offset.IsZero() && !size.IsDeleted() {
  257. cm.Set(key, offset, size)
  258. } else {
  259. cm.Delete(key)
  260. }
  261. return nil
  262. })
  263. return cm, err
  264. }