volume_grpc_copy.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521
  1. package weed_server
  2. import (
  3. "context"
  4. "fmt"
  5. "io"
  6. "math"
  7. "os"
  8. "time"
  9. "github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
  10. "github.com/seaweedfs/seaweedfs/weed/storage/backend"
  11. "github.com/seaweedfs/seaweedfs/weed/glog"
  12. "github.com/seaweedfs/seaweedfs/weed/operation"
  13. "github.com/seaweedfs/seaweedfs/weed/pb"
  14. "github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
  15. "github.com/seaweedfs/seaweedfs/weed/storage"
  16. "github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
  17. "github.com/seaweedfs/seaweedfs/weed/storage/needle"
  18. "github.com/seaweedfs/seaweedfs/weed/storage/types"
  19. "github.com/seaweedfs/seaweedfs/weed/util"
  20. )
  21. const BufferSizeLimit = 1024 * 1024 * 2
  22. // VolumeCopy copy the .idx .dat .vif files, and mount the volume
  23. func (vs *VolumeServer) VolumeCopy(req *volume_server_pb.VolumeCopyRequest, stream volume_server_pb.VolumeServer_VolumeCopyServer) error {
  24. v := vs.store.GetVolume(needle.VolumeId(req.VolumeId))
  25. if v != nil {
  26. glog.V(0).Infof("volume %d already exists. deleted before copying...", req.VolumeId)
  27. err := vs.store.DeleteVolume(needle.VolumeId(req.VolumeId), false)
  28. if err != nil {
  29. return fmt.Errorf("failed to delete existing volume %d: %v", req.VolumeId, err)
  30. }
  31. glog.V(0).Infof("deleted existing volume %d before copying.", req.VolumeId)
  32. }
  33. // the master will not start compaction for read-only volumes, so it is safe to just copy files directly
  34. // copy .dat and .idx files
  35. // read .idx .dat file size and timestamp
  36. // send .idx file
  37. // send .dat file
  38. // confirm size and timestamp
  39. var volFileInfoResp *volume_server_pb.ReadVolumeFileStatusResponse
  40. var dataBaseFileName, indexBaseFileName, idxFileName, datFileName string
  41. var hasRemoteDatFile bool
  42. err := operation.WithVolumeServerClient(true, pb.ServerAddress(req.SourceDataNode), vs.grpcDialOption, func(client volume_server_pb.VolumeServerClient) error {
  43. var err error
  44. volFileInfoResp, err = client.ReadVolumeFileStatus(context.Background(),
  45. &volume_server_pb.ReadVolumeFileStatusRequest{
  46. VolumeId: req.VolumeId,
  47. })
  48. if nil != err {
  49. return fmt.Errorf("read volume file status failed, %w", err)
  50. }
  51. diskType := volFileInfoResp.DiskType
  52. if req.DiskType != "" {
  53. diskType = req.DiskType
  54. }
  55. location := vs.store.FindFreeLocation(func(location *storage.DiskLocation) bool {
  56. return location.DiskType == types.ToDiskType(diskType)
  57. })
  58. if location == nil {
  59. return fmt.Errorf("no space left for disk type %s", types.ToDiskType(diskType).ReadableString())
  60. }
  61. dataBaseFileName = storage.VolumeFileName(location.Directory, volFileInfoResp.Collection, int(req.VolumeId))
  62. indexBaseFileName = storage.VolumeFileName(location.IdxDirectory, volFileInfoResp.Collection, int(req.VolumeId))
  63. hasRemoteDatFile = volFileInfoResp.VolumeInfo != nil && len(volFileInfoResp.VolumeInfo.Files) > 0
  64. util.WriteFile(dataBaseFileName+".note", []byte(fmt.Sprintf("copying from %s", req.SourceDataNode)), 0755)
  65. defer func() {
  66. if err != nil {
  67. os.Remove(dataBaseFileName + ".dat")
  68. os.Remove(indexBaseFileName + ".idx")
  69. os.Remove(dataBaseFileName + ".vif")
  70. os.Remove(dataBaseFileName + ".note")
  71. }
  72. }()
  73. var preallocateSize int64
  74. if grpcErr := pb.WithMasterClient(false, vs.GetMaster(context.Background()), vs.grpcDialOption, false, func(client master_pb.SeaweedClient) error {
  75. resp, err := client.GetMasterConfiguration(context.Background(), &master_pb.GetMasterConfigurationRequest{})
  76. if err != nil {
  77. return fmt.Errorf("get master %s configuration: %v", vs.GetMaster(context.Background()), err)
  78. }
  79. if resp.VolumePreallocate {
  80. preallocateSize = int64(resp.VolumeSizeLimitMB) * (1 << 20)
  81. }
  82. return nil
  83. }); grpcErr != nil {
  84. glog.V(0).Infof("connect to %s: %v", vs.GetMaster(context.Background()), grpcErr)
  85. }
  86. if preallocateSize > 0 && !hasRemoteDatFile {
  87. volumeFile := dataBaseFileName + ".dat"
  88. _, err := backend.CreateVolumeFile(volumeFile, preallocateSize, 0)
  89. if err != nil {
  90. return fmt.Errorf("create volume file %s: %v", volumeFile, err)
  91. }
  92. }
  93. // println("source:", volFileInfoResp.String())
  94. copyResponse := &volume_server_pb.VolumeCopyResponse{}
  95. reportInterval := int64(1024 * 1024 * 128)
  96. nextReportTarget := reportInterval
  97. var modifiedTsNs int64
  98. var sendErr error
  99. var ioBytePerSecond int64
  100. if req.IoBytePerSecond <= 0 {
  101. ioBytePerSecond = vs.compactionBytePerSecond
  102. } else {
  103. ioBytePerSecond = req.IoBytePerSecond
  104. }
  105. throttler := util.NewWriteThrottler(ioBytePerSecond)
  106. if !hasRemoteDatFile {
  107. if modifiedTsNs, err = vs.doCopyFileWithThrottler(client, false, req.Collection, req.VolumeId, volFileInfoResp.CompactionRevision, volFileInfoResp.DatFileSize, dataBaseFileName, ".dat", false, true, func(processed int64) bool {
  108. if processed > nextReportTarget {
  109. copyResponse.ProcessedBytes = processed
  110. if sendErr = stream.Send(copyResponse); sendErr != nil {
  111. return false
  112. }
  113. nextReportTarget = processed + reportInterval
  114. }
  115. return true
  116. }, throttler); err != nil {
  117. return err
  118. }
  119. if sendErr != nil {
  120. return sendErr
  121. }
  122. if modifiedTsNs > 0 {
  123. os.Chtimes(dataBaseFileName+".dat", time.Unix(0, modifiedTsNs), time.Unix(0, modifiedTsNs))
  124. }
  125. }
  126. if modifiedTsNs, err = vs.doCopyFileWithThrottler(client, false, req.Collection, req.VolumeId, volFileInfoResp.CompactionRevision, volFileInfoResp.IdxFileSize, indexBaseFileName, ".idx", false, false, nil, throttler); err != nil {
  127. return err
  128. }
  129. if modifiedTsNs > 0 {
  130. os.Chtimes(indexBaseFileName+".idx", time.Unix(0, modifiedTsNs), time.Unix(0, modifiedTsNs))
  131. }
  132. if modifiedTsNs, err = vs.doCopyFileWithThrottler(client, false, req.Collection, req.VolumeId, volFileInfoResp.CompactionRevision, 1024*1024, dataBaseFileName, ".vif", false, true, nil, throttler); err != nil {
  133. return err
  134. }
  135. if modifiedTsNs > 0 {
  136. os.Chtimes(dataBaseFileName+".vif", time.Unix(0, modifiedTsNs), time.Unix(0, modifiedTsNs))
  137. }
  138. os.Remove(dataBaseFileName + ".note")
  139. return nil
  140. })
  141. if err != nil {
  142. return err
  143. }
  144. if dataBaseFileName == "" {
  145. return fmt.Errorf("not found volume %d file", req.VolumeId)
  146. }
  147. idxFileName = indexBaseFileName + ".idx"
  148. datFileName = dataBaseFileName + ".dat"
  149. defer func() {
  150. if err != nil && dataBaseFileName != "" {
  151. os.Remove(idxFileName)
  152. os.Remove(datFileName)
  153. os.Remove(dataBaseFileName + ".vif")
  154. }
  155. }()
  156. if err = checkCopyFiles(volFileInfoResp, hasRemoteDatFile, idxFileName, datFileName); err != nil { // added by panyc16
  157. return err
  158. }
  159. // mount the volume
  160. err = vs.store.MountVolume(needle.VolumeId(req.VolumeId))
  161. if err != nil {
  162. return fmt.Errorf("failed to mount volume %d: %v", req.VolumeId, err)
  163. }
  164. if err = stream.Send(&volume_server_pb.VolumeCopyResponse{
  165. LastAppendAtNs: volFileInfoResp.DatFileTimestampSeconds * uint64(time.Second),
  166. }); err != nil {
  167. glog.Errorf("send response: %v", err)
  168. }
  169. return err
  170. }
  171. func (vs *VolumeServer) doCopyFile(client volume_server_pb.VolumeServerClient, isEcVolume bool, collection string, vid, compactRevision uint32, stopOffset uint64, baseFileName, ext string, isAppend, ignoreSourceFileNotFound bool, progressFn storage.ProgressFunc) (modifiedTsNs int64, err error) {
  172. return vs.doCopyFileWithThrottler(client, isEcVolume, collection, vid, compactRevision, stopOffset, baseFileName, ext, isAppend, ignoreSourceFileNotFound, progressFn, util.NewWriteThrottler(vs.compactionBytePerSecond))
  173. }
  174. func (vs *VolumeServer) doCopyFileWithThrottler(client volume_server_pb.VolumeServerClient, isEcVolume bool, collection string, vid, compactRevision uint32, stopOffset uint64, baseFileName, ext string, isAppend, ignoreSourceFileNotFound bool, progressFn storage.ProgressFunc, throttler *util.WriteThrottler) (modifiedTsNs int64, err error) {
  175. copyFileClient, err := client.CopyFile(context.Background(), &volume_server_pb.CopyFileRequest{
  176. VolumeId: vid,
  177. Ext: ext,
  178. CompactionRevision: compactRevision,
  179. StopOffset: stopOffset,
  180. Collection: collection,
  181. IsEcVolume: isEcVolume,
  182. IgnoreSourceFileNotFound: ignoreSourceFileNotFound,
  183. })
  184. if err != nil {
  185. return modifiedTsNs, fmt.Errorf("failed to start copying volume %d %s file: %v", vid, ext, err)
  186. }
  187. modifiedTsNs, err = writeToFile(copyFileClient, baseFileName+ext, throttler, isAppend, progressFn)
  188. if err != nil {
  189. return modifiedTsNs, fmt.Errorf("failed to copy %s file: %v", baseFileName+ext, err)
  190. }
  191. return modifiedTsNs, nil
  192. }
  193. /*
  194. *
  195. only check the differ of the file size
  196. todo: maybe should check the received count and deleted count of the volume
  197. */
  198. func checkCopyFiles(originFileInf *volume_server_pb.ReadVolumeFileStatusResponse, hasRemoteDatFile bool, idxFileName, datFileName string) error {
  199. stat, err := os.Stat(idxFileName)
  200. if err != nil {
  201. return fmt.Errorf("stat idx file %s failed: %v", idxFileName, err)
  202. }
  203. if originFileInf.IdxFileSize != uint64(stat.Size()) {
  204. return fmt.Errorf("idx file %s size [%v] is not same as origin file size [%v]",
  205. idxFileName, stat.Size(), originFileInf.IdxFileSize)
  206. }
  207. if hasRemoteDatFile {
  208. return nil
  209. }
  210. stat, err = os.Stat(datFileName)
  211. if err != nil {
  212. return fmt.Errorf("get dat file info failed, %w", err)
  213. }
  214. if originFileInf.DatFileSize != uint64(stat.Size()) {
  215. return fmt.Errorf("the dat file size [%v] is not same as origin file size [%v]",
  216. stat.Size(), originFileInf.DatFileSize)
  217. }
  218. return nil
  219. }
  220. func writeToFile(client volume_server_pb.VolumeServer_CopyFileClient, fileName string, wt *util.WriteThrottler, isAppend bool, progressFn storage.ProgressFunc) (modifiedTsNs int64, err error) {
  221. glog.V(4).Infof("writing to %s", fileName)
  222. flags := os.O_WRONLY | os.O_CREATE | os.O_TRUNC
  223. if isAppend {
  224. flags = os.O_WRONLY | os.O_CREATE
  225. }
  226. dst, err := os.OpenFile(fileName, flags, 0644)
  227. if err != nil {
  228. return modifiedTsNs, nil
  229. }
  230. defer dst.Close()
  231. var progressedBytes int64
  232. for {
  233. resp, receiveErr := client.Recv()
  234. if receiveErr == io.EOF {
  235. break
  236. }
  237. if resp != nil && resp.ModifiedTsNs != 0 {
  238. modifiedTsNs = resp.ModifiedTsNs
  239. }
  240. if receiveErr != nil {
  241. return modifiedTsNs, fmt.Errorf("receiving %s: %v", fileName, receiveErr)
  242. }
  243. dst.Write(resp.FileContent)
  244. progressedBytes += int64(len(resp.FileContent))
  245. if progressFn != nil {
  246. if !progressFn(progressedBytes) {
  247. return modifiedTsNs, fmt.Errorf("interrupted copy operation")
  248. }
  249. }
  250. wt.MaybeSlowdown(int64(len(resp.FileContent)))
  251. }
  252. return modifiedTsNs, nil
  253. }
  254. func (vs *VolumeServer) ReadVolumeFileStatus(ctx context.Context, req *volume_server_pb.ReadVolumeFileStatusRequest) (*volume_server_pb.ReadVolumeFileStatusResponse, error) {
  255. resp := &volume_server_pb.ReadVolumeFileStatusResponse{}
  256. v := vs.store.GetVolume(needle.VolumeId(req.VolumeId))
  257. if v == nil {
  258. return nil, fmt.Errorf("not found volume id %d", req.VolumeId)
  259. }
  260. resp.VolumeId = req.VolumeId
  261. datSize, idxSize, modTime := v.FileStat()
  262. resp.DatFileSize = datSize
  263. resp.IdxFileSize = idxSize
  264. resp.DatFileTimestampSeconds = uint64(modTime.Unix())
  265. resp.IdxFileTimestampSeconds = uint64(modTime.Unix())
  266. resp.FileCount = v.FileCount()
  267. resp.CompactionRevision = uint32(v.CompactionRevision)
  268. resp.Collection = v.Collection
  269. resp.DiskType = string(v.DiskType())
  270. resp.VolumeInfo = v.GetVolumeInfo()
  271. resp.Version = uint32(v.Version())
  272. return resp, nil
  273. }
  274. // CopyFile client pulls the volume related file from the source server.
  275. // if req.CompactionRevision != math.MaxUint32, it ensures the compact revision is as expected
  276. // The copying still stop at req.StopOffset, but you can set it to math.MaxUint64 in order to read all data.
  277. func (vs *VolumeServer) CopyFile(req *volume_server_pb.CopyFileRequest, stream volume_server_pb.VolumeServer_CopyFileServer) error {
  278. var fileName string
  279. if !req.IsEcVolume {
  280. v := vs.store.GetVolume(needle.VolumeId(req.VolumeId))
  281. if v == nil {
  282. return fmt.Errorf("not found volume id %d", req.VolumeId)
  283. }
  284. if uint32(v.CompactionRevision) != req.CompactionRevision && req.CompactionRevision != math.MaxUint32 {
  285. return fmt.Errorf("volume %d is compacted", req.VolumeId)
  286. }
  287. v.SyncToDisk()
  288. fileName = v.FileName(req.Ext)
  289. } else {
  290. baseFileName := erasure_coding.EcShardBaseFileName(req.Collection, int(req.VolumeId)) + req.Ext
  291. for _, location := range vs.store.Locations {
  292. tName := util.Join(location.Directory, baseFileName)
  293. if util.FileExists(tName) {
  294. fileName = tName
  295. }
  296. tName = util.Join(location.IdxDirectory, baseFileName)
  297. if util.FileExists(tName) {
  298. fileName = tName
  299. }
  300. }
  301. if fileName == "" {
  302. if req.IgnoreSourceFileNotFound {
  303. return nil
  304. }
  305. return fmt.Errorf("CopyFile not found ec volume id %d", req.VolumeId)
  306. }
  307. }
  308. bytesToRead := int64(req.StopOffset)
  309. file, err := os.Open(fileName)
  310. if err != nil {
  311. if req.IgnoreSourceFileNotFound && err == os.ErrNotExist {
  312. return nil
  313. }
  314. return err
  315. }
  316. defer file.Close()
  317. fileInfo, err := file.Stat()
  318. if err != nil {
  319. return err
  320. }
  321. fileModTsNs := fileInfo.ModTime().UnixNano()
  322. buffer := make([]byte, BufferSizeLimit)
  323. for bytesToRead > 0 {
  324. bytesread, err := file.Read(buffer)
  325. // println(fileName, "read", bytesread, "bytes, with target", bytesToRead)
  326. if err != nil {
  327. if err != io.EOF {
  328. return err
  329. }
  330. // println(fileName, "read", bytesread, "bytes, with target", bytesToRead, "err", err.Error())
  331. break
  332. }
  333. if int64(bytesread) > bytesToRead {
  334. bytesread = int(bytesToRead)
  335. }
  336. err = stream.Send(&volume_server_pb.CopyFileResponse{
  337. FileContent: buffer[:bytesread],
  338. ModifiedTsNs: fileModTsNs,
  339. })
  340. if err != nil {
  341. // println("sending", bytesread, "bytes err", err.Error())
  342. return err
  343. }
  344. fileModTsNs = 0 // only send once
  345. bytesToRead -= int64(bytesread)
  346. }
  347. return nil
  348. }
  349. // ReceiveFile receives a file stream from client and writes it to storage
  350. func (vs *VolumeServer) ReceiveFile(stream volume_server_pb.VolumeServer_ReceiveFileServer) error {
  351. var fileInfo *volume_server_pb.ReceiveFileInfo
  352. var targetFile *os.File
  353. var filePath string
  354. var bytesWritten uint64
  355. defer func() {
  356. if targetFile != nil {
  357. targetFile.Close()
  358. }
  359. }()
  360. for {
  361. req, err := stream.Recv()
  362. if err == io.EOF {
  363. // Stream completed successfully
  364. if targetFile != nil {
  365. targetFile.Sync()
  366. glog.V(1).Infof("Successfully received file %s (%d bytes)", filePath, bytesWritten)
  367. }
  368. return stream.SendAndClose(&volume_server_pb.ReceiveFileResponse{
  369. BytesWritten: bytesWritten,
  370. })
  371. }
  372. if err != nil {
  373. // Clean up on error
  374. if targetFile != nil {
  375. targetFile.Close()
  376. os.Remove(filePath)
  377. }
  378. glog.Errorf("Failed to receive stream: %v", err)
  379. return fmt.Errorf("failed to receive stream: %v", err)
  380. }
  381. switch data := req.Data.(type) {
  382. case *volume_server_pb.ReceiveFileRequest_Info:
  383. // First message contains file info
  384. fileInfo = data.Info
  385. glog.V(1).Infof("ReceiveFile: volume %d, ext %s, collection %s, shard %d, size %d",
  386. fileInfo.VolumeId, fileInfo.Ext, fileInfo.Collection, fileInfo.ShardId, fileInfo.FileSize)
  387. // Create file path based on file info
  388. if fileInfo.IsEcVolume {
  389. // Find storage location for EC shard
  390. var targetLocation *storage.DiskLocation
  391. for _, location := range vs.store.Locations {
  392. if location.DiskType == types.HardDriveType {
  393. targetLocation = location
  394. break
  395. }
  396. }
  397. if targetLocation == nil && len(vs.store.Locations) > 0 {
  398. targetLocation = vs.store.Locations[0] // Fall back to first available location
  399. }
  400. if targetLocation == nil {
  401. glog.Errorf("ReceiveFile: no storage location available")
  402. return stream.SendAndClose(&volume_server_pb.ReceiveFileResponse{
  403. Error: "no storage location available",
  404. })
  405. }
  406. // Create EC shard file path
  407. baseFileName := erasure_coding.EcShardBaseFileName(fileInfo.Collection, int(fileInfo.VolumeId))
  408. filePath = util.Join(targetLocation.Directory, baseFileName+fileInfo.Ext)
  409. } else {
  410. // Regular volume file
  411. v := vs.store.GetVolume(needle.VolumeId(fileInfo.VolumeId))
  412. if v == nil {
  413. glog.Errorf("ReceiveFile: volume %d not found", fileInfo.VolumeId)
  414. return stream.SendAndClose(&volume_server_pb.ReceiveFileResponse{
  415. Error: fmt.Sprintf("volume %d not found", fileInfo.VolumeId),
  416. })
  417. }
  418. filePath = v.FileName(fileInfo.Ext)
  419. }
  420. // Create target file
  421. targetFile, err = os.Create(filePath)
  422. if err != nil {
  423. glog.Errorf("ReceiveFile: failed to create file %s: %v", filePath, err)
  424. return stream.SendAndClose(&volume_server_pb.ReceiveFileResponse{
  425. Error: fmt.Sprintf("failed to create file: %v", err),
  426. })
  427. }
  428. glog.V(1).Infof("ReceiveFile: created target file %s", filePath)
  429. case *volume_server_pb.ReceiveFileRequest_FileContent:
  430. // Subsequent messages contain file content
  431. if targetFile == nil {
  432. glog.Errorf("ReceiveFile: file info must be sent first")
  433. return stream.SendAndClose(&volume_server_pb.ReceiveFileResponse{
  434. Error: "file info must be sent first",
  435. })
  436. }
  437. n, err := targetFile.Write(data.FileContent)
  438. if err != nil {
  439. targetFile.Close()
  440. os.Remove(filePath)
  441. glog.Errorf("ReceiveFile: failed to write to file %s: %v", filePath, err)
  442. return stream.SendAndClose(&volume_server_pb.ReceiveFileResponse{
  443. Error: fmt.Sprintf("failed to write file: %v", err),
  444. })
  445. }
  446. bytesWritten += uint64(n)
  447. glog.V(2).Infof("ReceiveFile: wrote %d bytes to %s (total: %d)", n, filePath, bytesWritten)
  448. default:
  449. glog.Errorf("ReceiveFile: unknown message type")
  450. return stream.SendAndClose(&volume_server_pb.ReceiveFileResponse{
  451. Error: "unknown message type",
  452. })
  453. }
  454. }
  455. }