read_log_from_disk.go 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. package logstore
  2. import (
  3. "context"
  4. "fmt"
  5. "math"
  6. "strings"
  7. "time"
  8. "github.com/seaweedfs/seaweedfs/weed/filer"
  9. "github.com/seaweedfs/seaweedfs/weed/glog"
  10. "github.com/seaweedfs/seaweedfs/weed/mq/topic"
  11. "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
  12. "github.com/seaweedfs/seaweedfs/weed/util"
  13. util_http "github.com/seaweedfs/seaweedfs/weed/util/http"
  14. "github.com/seaweedfs/seaweedfs/weed/util/log_buffer"
  15. "google.golang.org/protobuf/proto"
  16. )
  17. func GenLogOnDiskReadFunc(filerClient filer_pb.FilerClient, t topic.Topic, p topic.Partition) log_buffer.LogReadFromDiskFuncType {
  18. partitionDir := topic.PartitionDir(t, p)
  19. lookupFileIdFn := filer.LookupFn(filerClient)
  20. eachChunkFn := func(buf []byte, eachLogEntryFn log_buffer.EachLogEntryFuncType, starTsNs, stopTsNs int64) (processedTsNs int64, err error) {
  21. for pos := 0; pos+4 < len(buf); {
  22. size := util.BytesToUint32(buf[pos : pos+4])
  23. if pos+4+int(size) > len(buf) {
  24. err = fmt.Errorf("GenLogOnDiskReadFunc: read [%d,%d) from [0,%d)", pos, pos+int(size)+4, len(buf))
  25. return
  26. }
  27. entryData := buf[pos+4 : pos+4+int(size)]
  28. logEntry := &filer_pb.LogEntry{}
  29. if err = proto.Unmarshal(entryData, logEntry); err != nil {
  30. pos += 4 + int(size)
  31. err = fmt.Errorf("unexpected unmarshal mq_pb.Message: %w", err)
  32. return
  33. }
  34. if logEntry.TsNs <= starTsNs {
  35. pos += 4 + int(size)
  36. continue
  37. }
  38. if stopTsNs != 0 && logEntry.TsNs > stopTsNs {
  39. println("stopTsNs", stopTsNs, "logEntry.TsNs", logEntry.TsNs)
  40. return
  41. }
  42. // fmt.Printf(" read logEntry: %v, ts %v\n", string(logEntry.Key), time.Unix(0, logEntry.TsNs).UTC())
  43. if _, err = eachLogEntryFn(logEntry); err != nil {
  44. err = fmt.Errorf("process log entry %v: %w", logEntry, err)
  45. return
  46. }
  47. processedTsNs = logEntry.TsNs
  48. pos += 4 + int(size)
  49. }
  50. return
  51. }
  52. eachFileFn := func(entry *filer_pb.Entry, eachLogEntryFn log_buffer.EachLogEntryFuncType, starTsNs, stopTsNs int64) (processedTsNs int64, err error) {
  53. if len(entry.Content) > 0 {
  54. // skip .offset files
  55. return
  56. }
  57. var urlStrings []string
  58. for _, chunk := range entry.Chunks {
  59. if chunk.Size == 0 {
  60. continue
  61. }
  62. if chunk.IsChunkManifest {
  63. glog.Warningf("this should not happen. unexpected chunk manifest in %s/%s", partitionDir, entry.Name)
  64. return
  65. }
  66. urlStrings, err = lookupFileIdFn(context.Background(), chunk.FileId)
  67. if err != nil {
  68. err = fmt.Errorf("lookup %s: %v", chunk.FileId, err)
  69. return
  70. }
  71. if len(urlStrings) == 0 {
  72. err = fmt.Errorf("no url found for %s", chunk.FileId)
  73. return
  74. }
  75. // try one of the urlString until util.Get(urlString) succeeds
  76. var processed bool
  77. for _, urlString := range urlStrings {
  78. // TODO optimization opportunity: reuse the buffer
  79. var data []byte
  80. if data, _, err = util_http.Get(urlString); err == nil {
  81. processed = true
  82. if processedTsNs, err = eachChunkFn(data, eachLogEntryFn, starTsNs, stopTsNs); err != nil {
  83. return
  84. }
  85. break
  86. }
  87. }
  88. if !processed {
  89. err = fmt.Errorf("no data processed for %s %s", entry.Name, chunk.FileId)
  90. return
  91. }
  92. }
  93. return
  94. }
  95. return func(startPosition log_buffer.MessagePosition, stopTsNs int64, eachLogEntryFn log_buffer.EachLogEntryFuncType) (lastReadPosition log_buffer.MessagePosition, isDone bool, err error) {
  96. startFileName := startPosition.UTC().Format(topic.TIME_FORMAT)
  97. startTsNs := startPosition.Time.UnixNano()
  98. stopTime := time.Unix(0, stopTsNs)
  99. var processedTsNs int64
  100. err = filerClient.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
  101. return filer_pb.SeaweedList(context.Background(), client, partitionDir, "", func(entry *filer_pb.Entry, isLast bool) error {
  102. if entry.IsDirectory {
  103. return nil
  104. }
  105. if strings.HasSuffix(entry.Name, ".parquet") {
  106. return nil
  107. }
  108. // FIXME: this is a hack to skip the .offset files
  109. if strings.HasSuffix(entry.Name, ".offset") {
  110. return nil
  111. }
  112. if stopTsNs != 0 && entry.Name > stopTime.UTC().Format(topic.TIME_FORMAT) {
  113. isDone = true
  114. return nil
  115. }
  116. if entry.Name < startPosition.UTC().Format(topic.TIME_FORMAT) {
  117. return nil
  118. }
  119. if processedTsNs, err = eachFileFn(entry, eachLogEntryFn, startTsNs, stopTsNs); err != nil {
  120. return err
  121. }
  122. return nil
  123. }, startFileName, true, math.MaxInt32)
  124. })
  125. lastReadPosition = log_buffer.NewMessagePosition(processedTsNs, -2)
  126. return
  127. }
  128. }