store_replicate.go 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. package topology
  2. import (
  3. "context"
  4. "encoding/json"
  5. "errors"
  6. "fmt"
  7. "google.golang.org/grpc"
  8. "net/http"
  9. "net/url"
  10. "strconv"
  11. "strings"
  12. "time"
  13. "github.com/seaweedfs/seaweedfs/weed/glog"
  14. "github.com/seaweedfs/seaweedfs/weed/operation"
  15. "github.com/seaweedfs/seaweedfs/weed/security"
  16. "github.com/seaweedfs/seaweedfs/weed/stats"
  17. "github.com/seaweedfs/seaweedfs/weed/storage"
  18. "github.com/seaweedfs/seaweedfs/weed/storage/needle"
  19. "github.com/seaweedfs/seaweedfs/weed/storage/types"
  20. "github.com/seaweedfs/seaweedfs/weed/util"
  21. "github.com/seaweedfs/seaweedfs/weed/util/buffer_pool"
  22. util_http "github.com/seaweedfs/seaweedfs/weed/util/http"
  23. )
  24. func ReplicatedWrite(ctx context.Context, masterFn operation.GetMasterFn, grpcDialOption grpc.DialOption, s *storage.Store, volumeId needle.VolumeId, n *needle.Needle, r *http.Request, contentMd5 string) (isUnchanged bool, err error) {
  25. //check JWT
  26. jwt := security.GetJwt(r)
  27. // check whether this is a replicated write request
  28. var remoteLocations []operation.Location
  29. if r.FormValue("type") != "replicate" {
  30. // this is the initial request
  31. remoteLocations, err = GetWritableRemoteReplications(s, grpcDialOption, volumeId, masterFn)
  32. if err != nil {
  33. glog.V(0).Infoln(err)
  34. return
  35. }
  36. }
  37. // read fsync value
  38. fsync := false
  39. if r.FormValue("fsync") == "true" {
  40. fsync = true
  41. }
  42. if s.GetVolume(volumeId) != nil {
  43. start := time.Now()
  44. inFlightGauge := stats.VolumeServerInFlightRequestsGauge.WithLabelValues(stats.WriteToLocalDisk)
  45. inFlightGauge.Inc()
  46. defer inFlightGauge.Dec()
  47. isUnchanged, err = s.WriteVolumeNeedle(volumeId, n, true, fsync)
  48. stats.VolumeServerRequestHistogram.WithLabelValues(stats.WriteToLocalDisk).Observe(time.Since(start).Seconds())
  49. if err != nil {
  50. stats.VolumeServerHandlerCounter.WithLabelValues(stats.ErrorWriteToLocalDisk).Inc()
  51. err = fmt.Errorf("failed to write to local disk: %w", err)
  52. glog.V(0).Infoln(err)
  53. return
  54. }
  55. }
  56. if len(remoteLocations) > 0 { //send to other replica locations
  57. start := time.Now()
  58. inFlightGauge := stats.VolumeServerInFlightRequestsGauge.WithLabelValues(stats.WriteToReplicas)
  59. inFlightGauge.Inc()
  60. defer inFlightGauge.Dec()
  61. err = DistributedOperation(remoteLocations, func(location operation.Location) error {
  62. u := url.URL{
  63. Scheme: "http",
  64. Host: location.Url,
  65. Path: r.URL.Path,
  66. }
  67. q := url.Values{
  68. "type": {"replicate"},
  69. "ttl": {n.Ttl.String()},
  70. }
  71. if n.LastModified > 0 {
  72. q.Set("ts", strconv.FormatUint(n.LastModified, 10))
  73. }
  74. if n.IsChunkedManifest() {
  75. q.Set("cm", "true")
  76. }
  77. u.RawQuery = q.Encode()
  78. pairMap := make(map[string]string)
  79. if n.HasPairs() {
  80. tmpMap := make(map[string]string)
  81. err := json.Unmarshal(n.Pairs, &tmpMap)
  82. if err != nil {
  83. stats.VolumeServerHandlerCounter.WithLabelValues(stats.ErrorUnmarshalPairs).Inc()
  84. glog.V(0).Infoln("Unmarshal pairs error:", err)
  85. }
  86. for k, v := range tmpMap {
  87. pairMap[needle.PairNamePrefix+k] = v
  88. }
  89. }
  90. bytesBuffer := buffer_pool.SyncPoolGetBuffer()
  91. defer buffer_pool.SyncPoolPutBuffer(bytesBuffer)
  92. // volume server do not know about encryption
  93. // TODO optimize here to compress data only once
  94. uploadOption := &operation.UploadOption{
  95. UploadUrl: u.String(),
  96. Filename: string(n.Name),
  97. Cipher: false,
  98. IsInputCompressed: n.IsCompressed(),
  99. MimeType: string(n.Mime),
  100. PairMap: pairMap,
  101. Jwt: jwt,
  102. Md5: contentMd5,
  103. BytesBuffer: bytesBuffer,
  104. }
  105. uploader, err := operation.NewUploader()
  106. if err != nil {
  107. glog.Errorf("replication-UploadData, err:%v, url:%s", err, u.String())
  108. return err
  109. }
  110. _, err = uploader.UploadData(ctx, n.Data, uploadOption)
  111. if err != nil {
  112. glog.Errorf("replication-UploadData, err:%v, url:%s", err, u.String())
  113. }
  114. return err
  115. })
  116. stats.VolumeServerRequestHistogram.WithLabelValues(stats.WriteToReplicas).Observe(time.Since(start).Seconds())
  117. if err != nil {
  118. stats.VolumeServerHandlerCounter.WithLabelValues(stats.ErrorWriteToReplicas).Inc()
  119. err = fmt.Errorf("failed to write to replicas for volume %d: %v", volumeId, err)
  120. glog.V(0).Infoln(err)
  121. return false, err
  122. }
  123. }
  124. return
  125. }
  126. func ReplicatedDelete(masterFn operation.GetMasterFn, grpcDialOption grpc.DialOption, store *storage.Store, volumeId needle.VolumeId, n *needle.Needle, r *http.Request) (size types.Size, err error) {
  127. //check JWT
  128. jwt := security.GetJwt(r)
  129. var remoteLocations []operation.Location
  130. if r.FormValue("type") != "replicate" {
  131. remoteLocations, err = GetWritableRemoteReplications(store, grpcDialOption, volumeId, masterFn)
  132. if err != nil {
  133. glog.V(0).Infoln(err)
  134. return
  135. }
  136. }
  137. size, err = store.DeleteVolumeNeedle(volumeId, n)
  138. if err != nil {
  139. glog.V(0).Infoln("delete error:", err)
  140. return
  141. }
  142. if len(remoteLocations) > 0 { //send to other replica locations
  143. if err = DistributedOperation(remoteLocations, func(location operation.Location) error {
  144. return util_http.Delete("http://"+location.Url+r.URL.Path+"?type=replicate", string(jwt))
  145. }); err != nil {
  146. size = 0
  147. }
  148. }
  149. return
  150. }
  151. type DistributedOperationResult map[string]error
  152. func (dr DistributedOperationResult) Error() error {
  153. var errs []string
  154. for k, v := range dr {
  155. if v != nil {
  156. errs = append(errs, fmt.Sprintf("[%s]: %v", k, v))
  157. }
  158. }
  159. if len(errs) == 0 {
  160. return nil
  161. }
  162. return errors.New(strings.Join(errs, "\n"))
  163. }
  164. type RemoteResult struct {
  165. Host string
  166. Error error
  167. }
  168. func DistributedOperation(locations []operation.Location, op func(location operation.Location) error) error {
  169. length := len(locations)
  170. results := make(chan RemoteResult)
  171. for _, location := range locations {
  172. go func(location operation.Location, results chan RemoteResult) {
  173. results <- RemoteResult{location.Url, op(location)}
  174. }(location, results)
  175. }
  176. ret := DistributedOperationResult(make(map[string]error))
  177. for i := 0; i < length; i++ {
  178. result := <-results
  179. ret[result.Host] = result.Error
  180. }
  181. return ret.Error()
  182. }
  183. func GetWritableRemoteReplications(s *storage.Store, grpcDialOption grpc.DialOption, volumeId needle.VolumeId, masterFn operation.GetMasterFn) (remoteLocations []operation.Location, err error) {
  184. v := s.GetVolume(volumeId)
  185. if v != nil && v.ReplicaPlacement.GetCopyCount() == 1 {
  186. return
  187. }
  188. // not on local store, or has replications
  189. lookupResult, lookupErr := operation.LookupVolumeId(masterFn, grpcDialOption, volumeId.String())
  190. if lookupErr == nil {
  191. selfUrl := util.JoinHostPort(s.Ip, s.Port)
  192. for _, location := range lookupResult.Locations {
  193. if location.Url != selfUrl {
  194. remoteLocations = append(remoteLocations, location)
  195. }
  196. }
  197. } else {
  198. err = fmt.Errorf("replicating lookup failed for %d: %v", volumeId, lookupErr)
  199. return
  200. }
  201. if v != nil {
  202. // has one local and has remote replications
  203. copyCount := v.ReplicaPlacement.GetCopyCount()
  204. if len(lookupResult.Locations) < copyCount {
  205. err = fmt.Errorf("replicating operations [%d] is less than volume %d replication copy count [%d]",
  206. len(lookupResult.Locations), volumeId, copyCount)
  207. }
  208. }
  209. return
  210. }