upload_content.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437
  1. package operation
  2. import (
  3. "bytes"
  4. "context"
  5. "encoding/json"
  6. "fmt"
  7. "io"
  8. "mime"
  9. "mime/multipart"
  10. "net/http"
  11. "net/textproto"
  12. "path/filepath"
  13. "strings"
  14. "sync"
  15. "time"
  16. "github.com/seaweedfs/seaweedfs/weed/util/request_id"
  17. "github.com/valyala/bytebufferpool"
  18. "github.com/seaweedfs/seaweedfs/weed/glog"
  19. "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
  20. "github.com/seaweedfs/seaweedfs/weed/security"
  21. "github.com/seaweedfs/seaweedfs/weed/stats"
  22. "github.com/seaweedfs/seaweedfs/weed/util"
  23. util_http "github.com/seaweedfs/seaweedfs/weed/util/http"
  24. util_http_client "github.com/seaweedfs/seaweedfs/weed/util/http/client"
  25. )
  26. type UploadOption struct {
  27. UploadUrl string
  28. Filename string
  29. Cipher bool
  30. IsInputCompressed bool
  31. MimeType string
  32. PairMap map[string]string
  33. Jwt security.EncodedJwt
  34. RetryForever bool
  35. Md5 string
  36. BytesBuffer *bytes.Buffer
  37. }
  38. type UploadResult struct {
  39. Name string `json:"name,omitempty"`
  40. Size uint32 `json:"size,omitempty"`
  41. Error string `json:"error,omitempty"`
  42. ETag string `json:"eTag,omitempty"`
  43. CipherKey []byte `json:"cipherKey,omitempty"`
  44. Mime string `json:"mime,omitempty"`
  45. Gzip uint32 `json:"gzip,omitempty"`
  46. ContentMd5 string `json:"contentMd5,omitempty"`
  47. RetryCount int `json:"-"`
  48. }
  49. func (uploadResult *UploadResult) ToPbFileChunk(fileId string, offset int64, tsNs int64) *filer_pb.FileChunk {
  50. fid, _ := filer_pb.ToFileIdObject(fileId)
  51. return &filer_pb.FileChunk{
  52. FileId: fileId,
  53. Offset: offset,
  54. Size: uint64(uploadResult.Size),
  55. ModifiedTsNs: tsNs,
  56. ETag: uploadResult.ContentMd5,
  57. CipherKey: uploadResult.CipherKey,
  58. IsCompressed: uploadResult.Gzip > 0,
  59. Fid: fid,
  60. }
  61. }
  62. // ToPbFileChunkWithSSE creates a FileChunk with SSE metadata
  63. func (uploadResult *UploadResult) ToPbFileChunkWithSSE(fileId string, offset int64, tsNs int64, sseType filer_pb.SSEType, sseMetadata []byte) *filer_pb.FileChunk {
  64. fid, _ := filer_pb.ToFileIdObject(fileId)
  65. chunk := &filer_pb.FileChunk{
  66. FileId: fileId,
  67. Offset: offset,
  68. Size: uint64(uploadResult.Size),
  69. ModifiedTsNs: tsNs,
  70. ETag: uploadResult.ContentMd5,
  71. CipherKey: uploadResult.CipherKey,
  72. IsCompressed: uploadResult.Gzip > 0,
  73. Fid: fid,
  74. }
  75. // Add SSE metadata if provided
  76. chunk.SseType = sseType
  77. if len(sseMetadata) > 0 {
  78. chunk.SseMetadata = sseMetadata
  79. }
  80. return chunk
  81. }
  82. var (
  83. fileNameEscaper = strings.NewReplacer(`\`, `\\`, `"`, `\"`, "\n", "")
  84. uploader *Uploader
  85. uploaderErr error
  86. once sync.Once
  87. )
  88. // HTTPClient interface for testing
  89. type HTTPClient interface {
  90. Do(req *http.Request) (*http.Response, error)
  91. }
  92. // Uploader
  93. type Uploader struct {
  94. httpClient HTTPClient
  95. }
  96. func NewUploader() (*Uploader, error) {
  97. once.Do(func() {
  98. // With Dial context
  99. var httpClient *util_http_client.HTTPClient
  100. httpClient, uploaderErr = util_http.NewGlobalHttpClient(util_http_client.AddDialContext)
  101. if uploaderErr != nil {
  102. uploaderErr = fmt.Errorf("error initializing the loader: %s", uploaderErr)
  103. }
  104. if httpClient != nil {
  105. uploader = newUploader(httpClient)
  106. }
  107. })
  108. return uploader, uploaderErr
  109. }
  110. func newUploader(httpClient HTTPClient) *Uploader {
  111. return &Uploader{
  112. httpClient: httpClient,
  113. }
  114. }
  115. // UploadWithRetry will retry both assigning volume request and uploading content
  116. // The option parameter does not need to specify UploadUrl and Jwt, which will come from assigning volume.
  117. func (uploader *Uploader) UploadWithRetry(filerClient filer_pb.FilerClient, assignRequest *filer_pb.AssignVolumeRequest, uploadOption *UploadOption, genFileUrlFn func(host, fileId string) string, reader io.Reader) (fileId string, uploadResult *UploadResult, err error, data []byte) {
  118. doUploadFunc := func() error {
  119. var host string
  120. var auth security.EncodedJwt
  121. // grpc assign volume
  122. if grpcAssignErr := filerClient.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
  123. resp, assignErr := client.AssignVolume(context.Background(), assignRequest)
  124. if assignErr != nil {
  125. glog.V(0).Infof("assign volume failure %v: %v", assignRequest, assignErr)
  126. return assignErr
  127. }
  128. if resp.Error != "" {
  129. return fmt.Errorf("assign volume failure %v: %v", assignRequest, resp.Error)
  130. }
  131. fileId, auth = resp.FileId, security.EncodedJwt(resp.Auth)
  132. loc := resp.Location
  133. host = filerClient.AdjustedUrl(loc)
  134. return nil
  135. }); grpcAssignErr != nil {
  136. return fmt.Errorf("filerGrpcAddress assign volume: %w", grpcAssignErr)
  137. }
  138. uploadOption.UploadUrl = genFileUrlFn(host, fileId)
  139. uploadOption.Jwt = auth
  140. var uploadErr error
  141. uploadResult, uploadErr, data = uploader.doUpload(context.Background(), reader, uploadOption)
  142. return uploadErr
  143. }
  144. if uploadOption.RetryForever {
  145. util.RetryUntil("uploadWithRetryForever", doUploadFunc, func(err error) (shouldContinue bool) {
  146. glog.V(0).Infof("upload content: %v", err)
  147. return true
  148. })
  149. } else {
  150. uploadErrList := []string{"transport", "is read only"}
  151. err = util.MultiRetry("uploadWithRetry", uploadErrList, doUploadFunc)
  152. }
  153. return
  154. }
  155. // Upload sends a POST request to a volume server to upload the content with adjustable compression level
  156. func (uploader *Uploader) UploadData(ctx context.Context, data []byte, option *UploadOption) (uploadResult *UploadResult, err error) {
  157. uploadResult, err = uploader.retriedUploadData(ctx, data, option)
  158. return
  159. }
  160. // Upload sends a POST request to a volume server to upload the content with fast compression
  161. func (uploader *Uploader) Upload(ctx context.Context, reader io.Reader, option *UploadOption) (uploadResult *UploadResult, err error, data []byte) {
  162. uploadResult, err, data = uploader.doUpload(ctx, reader, option)
  163. return
  164. }
  165. func (uploader *Uploader) doUpload(ctx context.Context, reader io.Reader, option *UploadOption) (uploadResult *UploadResult, err error, data []byte) {
  166. bytesReader, ok := reader.(*util.BytesReader)
  167. if ok {
  168. data = bytesReader.Bytes
  169. } else {
  170. data, err = io.ReadAll(reader)
  171. if err != nil {
  172. err = fmt.Errorf("read input: %w", err)
  173. return
  174. }
  175. }
  176. uploadResult, uploadErr := uploader.retriedUploadData(ctx, data, option)
  177. return uploadResult, uploadErr, data
  178. }
  179. func (uploader *Uploader) retriedUploadData(ctx context.Context, data []byte, option *UploadOption) (uploadResult *UploadResult, err error) {
  180. for i := 0; i < 3; i++ {
  181. if i > 0 {
  182. time.Sleep(time.Millisecond * time.Duration(237*(i+1)))
  183. }
  184. uploadResult, err = uploader.doUploadData(ctx, data, option)
  185. if err == nil {
  186. uploadResult.RetryCount = i
  187. return
  188. }
  189. glog.WarningfCtx(ctx, "uploading %d to %s: %v", i, option.UploadUrl, err)
  190. }
  191. return
  192. }
  193. func (uploader *Uploader) doUploadData(ctx context.Context, data []byte, option *UploadOption) (uploadResult *UploadResult, err error) {
  194. contentIsGzipped := option.IsInputCompressed
  195. shouldGzipNow := false
  196. if !option.IsInputCompressed {
  197. if option.MimeType == "" {
  198. option.MimeType = http.DetectContentType(data)
  199. // println("detect1 mimetype to", MimeType)
  200. if option.MimeType == "application/octet-stream" {
  201. option.MimeType = ""
  202. }
  203. }
  204. if shouldBeCompressed, iAmSure := util.IsCompressableFileType(filepath.Base(option.Filename), option.MimeType); iAmSure && shouldBeCompressed {
  205. shouldGzipNow = true
  206. } else if !iAmSure && option.MimeType == "" && len(data) > 16*1024 {
  207. var compressed []byte
  208. compressed, err = util.GzipData(data[0:128])
  209. if err != nil {
  210. return
  211. }
  212. shouldGzipNow = len(compressed)*10 < 128*9 // can not compress to less than 90%
  213. }
  214. }
  215. var clearDataLen int
  216. // gzip if possible
  217. // this could be double copying
  218. clearDataLen = len(data)
  219. clearData := data
  220. if shouldGzipNow {
  221. compressed, compressErr := util.GzipData(data)
  222. // fmt.Printf("data is compressed from %d ==> %d\n", len(data), len(compressed))
  223. if compressErr == nil {
  224. data = compressed
  225. contentIsGzipped = true
  226. }
  227. } else if option.IsInputCompressed {
  228. // just to get the clear data length
  229. clearData, err = util.DecompressData(data)
  230. if err == nil {
  231. clearDataLen = len(clearData)
  232. }
  233. }
  234. if option.Cipher {
  235. // encrypt(gzip(data))
  236. // encrypt
  237. cipherKey := util.GenCipherKey()
  238. encryptedData, encryptionErr := util.Encrypt(data, cipherKey)
  239. if encryptionErr != nil {
  240. err = fmt.Errorf("encrypt input: %w", encryptionErr)
  241. return
  242. }
  243. // upload data
  244. uploadResult, err = uploader.upload_content(ctx, func(w io.Writer) (err error) {
  245. _, err = w.Write(encryptedData)
  246. return
  247. }, len(encryptedData), &UploadOption{
  248. UploadUrl: option.UploadUrl,
  249. Filename: "",
  250. Cipher: false,
  251. IsInputCompressed: false,
  252. MimeType: "",
  253. PairMap: nil,
  254. Jwt: option.Jwt,
  255. })
  256. if uploadResult == nil {
  257. return
  258. }
  259. uploadResult.Name = option.Filename
  260. uploadResult.Mime = option.MimeType
  261. uploadResult.CipherKey = cipherKey
  262. uploadResult.Size = uint32(clearDataLen)
  263. if contentIsGzipped {
  264. uploadResult.Gzip = 1
  265. }
  266. } else {
  267. // upload data
  268. uploadResult, err = uploader.upload_content(ctx, func(w io.Writer) (err error) {
  269. _, err = w.Write(data)
  270. return
  271. }, len(data), &UploadOption{
  272. UploadUrl: option.UploadUrl,
  273. Filename: option.Filename,
  274. Cipher: false,
  275. IsInputCompressed: contentIsGzipped,
  276. MimeType: option.MimeType,
  277. PairMap: option.PairMap,
  278. Jwt: option.Jwt,
  279. Md5: option.Md5,
  280. BytesBuffer: option.BytesBuffer,
  281. })
  282. if uploadResult == nil {
  283. return
  284. }
  285. uploadResult.Size = uint32(clearDataLen)
  286. if contentIsGzipped {
  287. uploadResult.Gzip = 1
  288. }
  289. }
  290. return uploadResult, err
  291. }
  292. func (uploader *Uploader) upload_content(ctx context.Context, fillBufferFunction func(w io.Writer) error, originalDataSize int, option *UploadOption) (*UploadResult, error) {
  293. var body_writer *multipart.Writer
  294. var reqReader *bytes.Reader
  295. var buf *bytebufferpool.ByteBuffer
  296. if option.BytesBuffer == nil {
  297. buf = GetBuffer()
  298. defer PutBuffer(buf)
  299. body_writer = multipart.NewWriter(buf)
  300. } else {
  301. option.BytesBuffer.Reset()
  302. body_writer = multipart.NewWriter(option.BytesBuffer)
  303. }
  304. h := make(textproto.MIMEHeader)
  305. filename := fileNameEscaper.Replace(option.Filename)
  306. h.Set("Content-Disposition", fmt.Sprintf(`form-data; name="file"; filename="%s"`, filename))
  307. h.Set("Idempotency-Key", option.UploadUrl)
  308. if option.MimeType == "" {
  309. option.MimeType = mime.TypeByExtension(strings.ToLower(filepath.Ext(option.Filename)))
  310. }
  311. if option.MimeType != "" {
  312. h.Set("Content-Type", option.MimeType)
  313. }
  314. if option.IsInputCompressed {
  315. h.Set("Content-Encoding", "gzip")
  316. }
  317. if option.Md5 != "" {
  318. h.Set("Content-MD5", option.Md5)
  319. }
  320. file_writer, cp_err := body_writer.CreatePart(h)
  321. if cp_err != nil {
  322. glog.V(0).InfolnCtx(ctx, "error creating form file", cp_err.Error())
  323. return nil, cp_err
  324. }
  325. if err := fillBufferFunction(file_writer); err != nil {
  326. glog.V(0).InfolnCtx(ctx, "error copying data", err)
  327. return nil, err
  328. }
  329. content_type := body_writer.FormDataContentType()
  330. if err := body_writer.Close(); err != nil {
  331. glog.V(0).InfolnCtx(ctx, "error closing body", err)
  332. return nil, err
  333. }
  334. if option.BytesBuffer == nil {
  335. reqReader = bytes.NewReader(buf.Bytes())
  336. } else {
  337. reqReader = bytes.NewReader(option.BytesBuffer.Bytes())
  338. }
  339. req, postErr := http.NewRequest(http.MethodPost, option.UploadUrl, reqReader)
  340. if postErr != nil {
  341. glog.V(1).InfofCtx(ctx, "create upload request %s: %v", option.UploadUrl, postErr)
  342. return nil, fmt.Errorf("create upload request %s: %v", option.UploadUrl, postErr)
  343. }
  344. req.Header.Set("Content-Type", content_type)
  345. for k, v := range option.PairMap {
  346. req.Header.Set(k, v)
  347. }
  348. if option.Jwt != "" {
  349. req.Header.Set("Authorization", "BEARER "+string(option.Jwt))
  350. }
  351. request_id.InjectToRequest(ctx, req)
  352. // print("+")
  353. resp, post_err := uploader.httpClient.Do(req)
  354. defer util_http.CloseResponse(resp)
  355. if post_err != nil {
  356. if strings.Contains(post_err.Error(), "connection reset by peer") ||
  357. strings.Contains(post_err.Error(), "use of closed network connection") {
  358. glog.V(1).InfofCtx(ctx, "repeat error upload request %s: %v", option.UploadUrl, postErr)
  359. stats.FilerHandlerCounter.WithLabelValues(stats.RepeatErrorUploadContent).Inc()
  360. resp, post_err = uploader.httpClient.Do(req)
  361. defer util_http.CloseResponse(resp)
  362. }
  363. }
  364. if post_err != nil {
  365. return nil, fmt.Errorf("upload %s %d bytes to %v: %v", option.Filename, originalDataSize, option.UploadUrl, post_err)
  366. }
  367. // print("-")
  368. var ret UploadResult
  369. etag := getEtag(resp)
  370. if resp.StatusCode == http.StatusNoContent {
  371. ret.ETag = etag
  372. return &ret, nil
  373. }
  374. resp_body, ra_err := io.ReadAll(resp.Body)
  375. if ra_err != nil {
  376. return nil, fmt.Errorf("read response body %v: %w", option.UploadUrl, ra_err)
  377. }
  378. unmarshal_err := json.Unmarshal(resp_body, &ret)
  379. if unmarshal_err != nil {
  380. glog.ErrorfCtx(ctx, "unmarshal %s: %v", option.UploadUrl, string(resp_body))
  381. return nil, fmt.Errorf("unmarshal %v: %w", option.UploadUrl, unmarshal_err)
  382. }
  383. if ret.Error != "" {
  384. return nil, fmt.Errorf("unmarshalled error %v: %v", option.UploadUrl, ret.Error)
  385. }
  386. ret.ETag = etag
  387. ret.ContentMd5 = resp.Header.Get("Content-MD5")
  388. return &ret, nil
  389. }
  390. func getEtag(r *http.Response) (etag string) {
  391. etag = r.Header.Get("ETag")
  392. if strings.HasPrefix(etag, "\"") && strings.HasSuffix(etag, "\"") {
  393. etag = etag[1 : len(etag)-1]
  394. }
  395. return
  396. }