filer_server_handlers_write_autochunk.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551
  1. package weed_server
  2. import (
  3. "bytes"
  4. "context"
  5. "encoding/base64"
  6. "errors"
  7. "fmt"
  8. "io"
  9. "net/http"
  10. "net/url"
  11. "os"
  12. "path"
  13. "strconv"
  14. "strings"
  15. "time"
  16. "github.com/seaweedfs/seaweedfs/weed/filer"
  17. "github.com/seaweedfs/seaweedfs/weed/glog"
  18. "github.com/seaweedfs/seaweedfs/weed/operation"
  19. "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
  20. "github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
  21. "github.com/seaweedfs/seaweedfs/weed/storage/needle"
  22. "github.com/seaweedfs/seaweedfs/weed/util"
  23. "github.com/seaweedfs/seaweedfs/weed/util/constants"
  24. )
  25. func (fs *FilerServer) autoChunk(ctx context.Context, w http.ResponseWriter, r *http.Request, contentLength int64, so *operation.StorageOption) {
  26. // autoChunking can be set at the command-line level or as a query param. Query param overrides command-line
  27. query := r.URL.Query()
  28. parsedMaxMB, _ := strconv.ParseInt(query.Get("maxMB"), 10, 32)
  29. maxMB := int32(parsedMaxMB)
  30. if maxMB <= 0 && fs.option.MaxMB > 0 {
  31. maxMB = int32(fs.option.MaxMB)
  32. }
  33. chunkSize := 1024 * 1024 * maxMB
  34. var reply *FilerPostResult
  35. var err error
  36. var md5bytes []byte
  37. if r.Method == http.MethodPost {
  38. if r.Header.Get("Content-Type") == "" && strings.HasSuffix(r.URL.Path, "/") {
  39. reply, err = fs.mkdir(ctx, w, r, so)
  40. } else {
  41. reply, md5bytes, err = fs.doPostAutoChunk(ctx, w, r, chunkSize, contentLength, so)
  42. }
  43. } else {
  44. reply, md5bytes, err = fs.doPutAutoChunk(ctx, w, r, chunkSize, contentLength, so)
  45. }
  46. if err != nil {
  47. errStr := err.Error()
  48. switch {
  49. case errStr == constants.ErrMsgOperationNotPermitted:
  50. writeJsonError(w, r, http.StatusForbidden, err)
  51. case strings.HasPrefix(errStr, "read input:") || errStr == io.ErrUnexpectedEOF.Error():
  52. writeJsonError(w, r, util.HttpStatusCancelled, err)
  53. case strings.HasSuffix(errStr, "is a file") || strings.HasSuffix(errStr, "already exists"):
  54. writeJsonError(w, r, http.StatusConflict, err)
  55. case errStr == constants.ErrMsgBadDigest:
  56. writeJsonError(w, r, http.StatusBadRequest, err)
  57. default:
  58. writeJsonError(w, r, http.StatusInternalServerError, err)
  59. }
  60. } else if reply != nil {
  61. if len(md5bytes) > 0 {
  62. md5InBase64 := util.Base64Encode(md5bytes)
  63. w.Header().Set("Content-MD5", md5InBase64)
  64. }
  65. writeJsonQuiet(w, r, http.StatusCreated, reply)
  66. }
  67. }
  68. func (fs *FilerServer) doPostAutoChunk(ctx context.Context, w http.ResponseWriter, r *http.Request, chunkSize int32, contentLength int64, so *operation.StorageOption) (filerResult *FilerPostResult, md5bytes []byte, replyerr error) {
  69. multipartReader, multipartReaderErr := r.MultipartReader()
  70. if multipartReaderErr != nil {
  71. return nil, nil, multipartReaderErr
  72. }
  73. part1, part1Err := multipartReader.NextPart()
  74. if part1Err != nil {
  75. return nil, nil, part1Err
  76. }
  77. fileName := part1.FileName()
  78. if fileName != "" {
  79. fileName = path.Base(fileName)
  80. }
  81. contentType := part1.Header.Get("Content-Type")
  82. if contentType == "application/octet-stream" {
  83. contentType = ""
  84. }
  85. if err := fs.checkPermissions(ctx, r, fileName); err != nil {
  86. return nil, nil, err
  87. }
  88. if so.SaveInside {
  89. buf := bufPool.Get().(*bytes.Buffer)
  90. buf.Reset()
  91. buf.ReadFrom(part1)
  92. filerResult, replyerr = fs.saveMetaData(ctx, r, fileName, contentType, so, nil, nil, 0, buf.Bytes())
  93. bufPool.Put(buf)
  94. return
  95. }
  96. fileChunks, md5Hash, chunkOffset, err, smallContent := fs.uploadRequestToChunks(ctx, w, r, part1, chunkSize, fileName, contentType, contentLength, so)
  97. if err != nil {
  98. return nil, nil, err
  99. }
  100. md5bytes = md5Hash.Sum(nil)
  101. headerMd5 := r.Header.Get("Content-Md5")
  102. if headerMd5 != "" && !(util.Base64Encode(md5bytes) == headerMd5 || fmt.Sprintf("%x", md5bytes) == headerMd5) {
  103. fs.filer.DeleteUncommittedChunks(ctx, fileChunks)
  104. return nil, nil, errors.New(constants.ErrMsgBadDigest)
  105. }
  106. filerResult, replyerr = fs.saveMetaData(ctx, r, fileName, contentType, so, md5bytes, fileChunks, chunkOffset, smallContent)
  107. if replyerr != nil {
  108. fs.filer.DeleteUncommittedChunks(ctx, fileChunks)
  109. }
  110. return
  111. }
  112. func (fs *FilerServer) doPutAutoChunk(ctx context.Context, w http.ResponseWriter, r *http.Request, chunkSize int32, contentLength int64, so *operation.StorageOption) (filerResult *FilerPostResult, md5bytes []byte, replyerr error) {
  113. fileName := path.Base(r.URL.Path)
  114. contentType := r.Header.Get("Content-Type")
  115. if contentType == "application/octet-stream" {
  116. contentType = ""
  117. }
  118. if err := fs.checkPermissions(ctx, r, fileName); err != nil {
  119. return nil, nil, err
  120. }
  121. fileChunks, md5Hash, chunkOffset, err, smallContent := fs.uploadRequestToChunks(ctx, w, r, r.Body, chunkSize, fileName, contentType, contentLength, so)
  122. if err != nil {
  123. return nil, nil, err
  124. }
  125. md5bytes = md5Hash.Sum(nil)
  126. headerMd5 := r.Header.Get("Content-Md5")
  127. if headerMd5 != "" && !(util.Base64Encode(md5bytes) == headerMd5 || fmt.Sprintf("%x", md5bytes) == headerMd5) {
  128. fs.filer.DeleteUncommittedChunks(ctx, fileChunks)
  129. return nil, nil, errors.New(constants.ErrMsgBadDigest)
  130. }
  131. filerResult, replyerr = fs.saveMetaData(ctx, r, fileName, contentType, so, md5bytes, fileChunks, chunkOffset, smallContent)
  132. if replyerr != nil {
  133. fs.filer.DeleteUncommittedChunks(ctx, fileChunks)
  134. }
  135. return
  136. }
  137. func isAppend(r *http.Request) bool {
  138. return r.URL.Query().Get("op") == "append"
  139. }
  140. func skipCheckParentDirEntry(r *http.Request) bool {
  141. return r.URL.Query().Get("skipCheckParentDir") == "true"
  142. }
  143. func isS3Request(r *http.Request) bool {
  144. return r.Header.Get(s3_constants.AmzAuthType) != "" || r.Header.Get("X-Amz-Date") != ""
  145. }
  146. func (fs *FilerServer) checkPermissions(ctx context.Context, r *http.Request, fileName string) error {
  147. fullPath := fs.fixFilePath(ctx, r, fileName)
  148. enforced, err := fs.wormEnforcedForEntry(ctx, fullPath)
  149. if err != nil {
  150. return err
  151. } else if enforced {
  152. // you cannot change a worm file
  153. return errors.New(constants.ErrMsgOperationNotPermitted)
  154. }
  155. return nil
  156. }
  157. func (fs *FilerServer) wormEnforcedForEntry(ctx context.Context, fullPath string) (bool, error) {
  158. rule := fs.filer.FilerConf.MatchStorageRule(fullPath)
  159. if !rule.Worm {
  160. return false, nil
  161. }
  162. entry, err := fs.filer.FindEntry(ctx, util.FullPath(fullPath))
  163. if err != nil {
  164. if errors.Is(err, filer_pb.ErrNotFound) {
  165. return false, nil
  166. }
  167. return false, err
  168. }
  169. // worm is not enforced
  170. if entry.WORMEnforcedAtTsNs == 0 {
  171. return false, nil
  172. }
  173. // worm will never expire
  174. if rule.WormRetentionTimeSeconds == 0 {
  175. return true, nil
  176. }
  177. enforcedAt := time.Unix(0, entry.WORMEnforcedAtTsNs)
  178. // worm is expired
  179. if time.Now().Sub(enforcedAt).Seconds() >= float64(rule.WormRetentionTimeSeconds) {
  180. return false, nil
  181. }
  182. return true, nil
  183. }
  184. func (fs *FilerServer) fixFilePath(ctx context.Context, r *http.Request, fileName string) string {
  185. // fix the path
  186. fullPath := r.URL.Path
  187. if strings.HasSuffix(fullPath, "/") {
  188. if fileName != "" {
  189. fullPath += fileName
  190. }
  191. } else {
  192. if fileName != "" {
  193. if possibleDirEntry, findDirErr := fs.filer.FindEntry(ctx, util.FullPath(fullPath)); findDirErr == nil {
  194. if possibleDirEntry.IsDirectory() {
  195. fullPath += "/" + fileName
  196. }
  197. }
  198. }
  199. }
  200. return fullPath
  201. }
  202. func (fs *FilerServer) saveMetaData(ctx context.Context, r *http.Request, fileName string, contentType string, so *operation.StorageOption, md5bytes []byte, fileChunks []*filer_pb.FileChunk, chunkOffset int64, content []byte) (filerResult *FilerPostResult, replyerr error) {
  203. // detect file mode
  204. modeStr := r.URL.Query().Get("mode")
  205. if modeStr == "" {
  206. modeStr = "0660"
  207. }
  208. mode, err := strconv.ParseUint(modeStr, 8, 32)
  209. if err != nil {
  210. glog.ErrorfCtx(ctx, "Invalid mode format: %s, use 0660 by default", modeStr)
  211. mode = 0660
  212. }
  213. // fix the path
  214. path := fs.fixFilePath(ctx, r, fileName)
  215. var entry *filer.Entry
  216. var newChunks []*filer_pb.FileChunk
  217. var mergedChunks []*filer_pb.FileChunk
  218. isAppend := isAppend(r)
  219. isOffsetWrite := len(fileChunks) > 0 && fileChunks[0].Offset > 0
  220. // when it is an append
  221. if isAppend || isOffsetWrite {
  222. existingEntry, findErr := fs.filer.FindEntry(ctx, util.FullPath(path))
  223. if findErr != nil && findErr != filer_pb.ErrNotFound {
  224. glog.V(0).InfofCtx(ctx, "failing to find %s: %v", path, findErr)
  225. }
  226. entry = existingEntry
  227. }
  228. if entry != nil {
  229. entry.Mtime = time.Now()
  230. entry.Md5 = nil
  231. // adjust chunk offsets
  232. if isAppend {
  233. for _, chunk := range fileChunks {
  234. chunk.Offset += int64(entry.FileSize)
  235. }
  236. entry.FileSize += uint64(chunkOffset)
  237. }
  238. newChunks = append(entry.GetChunks(), fileChunks...)
  239. // TODO
  240. if len(entry.Content) > 0 {
  241. replyerr = fmt.Errorf("append to small file is not supported yet")
  242. return
  243. }
  244. } else {
  245. glog.V(4).InfolnCtx(ctx, "saving", path)
  246. newChunks = fileChunks
  247. entry = &filer.Entry{
  248. FullPath: util.FullPath(path),
  249. Attr: filer.Attr{
  250. Mtime: time.Now(),
  251. Crtime: time.Now(),
  252. Mode: os.FileMode(mode),
  253. Uid: OS_UID,
  254. Gid: OS_GID,
  255. TtlSec: so.TtlSeconds,
  256. Mime: contentType,
  257. Md5: md5bytes,
  258. FileSize: uint64(chunkOffset),
  259. },
  260. Content: content,
  261. }
  262. }
  263. // maybe concatenate small chunks into one whole chunk
  264. mergedChunks, replyerr = fs.maybeMergeChunks(ctx, so, newChunks)
  265. if replyerr != nil {
  266. glog.V(0).InfofCtx(ctx, "merge chunks %s: %v", r.RequestURI, replyerr)
  267. mergedChunks = newChunks
  268. }
  269. // maybe compact entry chunks
  270. mergedChunks, replyerr = filer.MaybeManifestize(fs.saveAsChunk(ctx, so), mergedChunks)
  271. if replyerr != nil {
  272. glog.V(0).InfofCtx(ctx, "manifestize %s: %v", r.RequestURI, replyerr)
  273. return
  274. }
  275. entry.Chunks = mergedChunks
  276. if isOffsetWrite {
  277. entry.Md5 = nil
  278. entry.FileSize = entry.Size()
  279. }
  280. filerResult = &FilerPostResult{
  281. Name: fileName,
  282. Size: int64(entry.FileSize),
  283. }
  284. entry.Extended = SaveAmzMetaData(r, entry.Extended, false)
  285. for k, v := range r.Header {
  286. if len(v) > 0 && len(v[0]) > 0 {
  287. if strings.HasPrefix(k, needle.PairNamePrefix) || k == "Cache-Control" || k == "Expires" || k == "Content-Disposition" {
  288. entry.Extended[k] = []byte(v[0])
  289. }
  290. if k == "Response-Content-Disposition" {
  291. entry.Extended["Content-Disposition"] = []byte(v[0])
  292. }
  293. }
  294. }
  295. // Process SSE metadata headers sent by S3 API and store in entry extended metadata
  296. if sseIVHeader := r.Header.Get(s3_constants.SeaweedFSSSEIVHeader); sseIVHeader != "" {
  297. // Decode base64-encoded IV and store in metadata
  298. if ivData, err := base64.StdEncoding.DecodeString(sseIVHeader); err == nil {
  299. entry.Extended[s3_constants.SeaweedFSSSEIV] = ivData
  300. glog.V(4).Infof("Stored SSE-C IV metadata for %s", entry.FullPath)
  301. } else {
  302. glog.Errorf("Failed to decode SSE-C IV header for %s: %v", entry.FullPath, err)
  303. }
  304. }
  305. // Store SSE-C algorithm and key MD5 for proper S3 API response headers
  306. if sseAlgorithm := r.Header.Get(s3_constants.AmzServerSideEncryptionCustomerAlgorithm); sseAlgorithm != "" {
  307. entry.Extended[s3_constants.AmzServerSideEncryptionCustomerAlgorithm] = []byte(sseAlgorithm)
  308. glog.V(4).Infof("Stored SSE-C algorithm metadata for %s", entry.FullPath)
  309. }
  310. if sseKeyMD5 := r.Header.Get(s3_constants.AmzServerSideEncryptionCustomerKeyMD5); sseKeyMD5 != "" {
  311. entry.Extended[s3_constants.AmzServerSideEncryptionCustomerKeyMD5] = []byte(sseKeyMD5)
  312. glog.V(4).Infof("Stored SSE-C key MD5 metadata for %s", entry.FullPath)
  313. }
  314. if sseKMSHeader := r.Header.Get(s3_constants.SeaweedFSSSEKMSKeyHeader); sseKMSHeader != "" {
  315. // Decode base64-encoded KMS metadata and store
  316. if kmsData, err := base64.StdEncoding.DecodeString(sseKMSHeader); err == nil {
  317. entry.Extended[s3_constants.SeaweedFSSSEKMSKey] = kmsData
  318. glog.V(4).Infof("Stored SSE-KMS metadata for %s", entry.FullPath)
  319. } else {
  320. glog.Errorf("Failed to decode SSE-KMS metadata header for %s: %v", entry.FullPath, err)
  321. }
  322. }
  323. dbErr := fs.filer.CreateEntry(ctx, entry, false, false, nil, skipCheckParentDirEntry(r), so.MaxFileNameLength)
  324. // In test_bucket_listv2_delimiter_basic, the valid object key is the parent folder
  325. if dbErr != nil && strings.HasSuffix(dbErr.Error(), " is a file") && isS3Request(r) {
  326. dbErr = fs.filer.CreateEntry(ctx, entry, false, false, nil, true, so.MaxFileNameLength)
  327. }
  328. if dbErr != nil {
  329. replyerr = dbErr
  330. filerResult.Error = dbErr.Error()
  331. glog.V(0).InfofCtx(ctx, "failing to write %s to filer server : %v", path, dbErr)
  332. }
  333. return filerResult, replyerr
  334. }
  335. func (fs *FilerServer) saveAsChunk(ctx context.Context, so *operation.StorageOption) filer.SaveDataAsChunkFunctionType {
  336. return func(reader io.Reader, name string, offset int64, tsNs int64) (*filer_pb.FileChunk, error) {
  337. var fileId string
  338. var uploadResult *operation.UploadResult
  339. err := util.Retry("saveAsChunk", func() error {
  340. // assign one file id for one chunk
  341. assignedFileId, urlLocation, auth, assignErr := fs.assignNewFileInfo(ctx, so)
  342. if assignErr != nil {
  343. return assignErr
  344. }
  345. fileId = assignedFileId
  346. // upload the chunk to the volume server
  347. uploadOption := &operation.UploadOption{
  348. UploadUrl: urlLocation,
  349. Filename: name,
  350. Cipher: fs.option.Cipher,
  351. IsInputCompressed: false,
  352. MimeType: "",
  353. PairMap: nil,
  354. Jwt: auth,
  355. }
  356. uploader, uploaderErr := operation.NewUploader()
  357. if uploaderErr != nil {
  358. return uploaderErr
  359. }
  360. var uploadErr error
  361. uploadResult, uploadErr, _ = uploader.Upload(ctx, reader, uploadOption)
  362. if uploadErr != nil {
  363. return uploadErr
  364. }
  365. return nil
  366. })
  367. if err != nil {
  368. return nil, err
  369. }
  370. return uploadResult.ToPbFileChunk(fileId, offset, tsNs), nil
  371. }
  372. }
  373. func (fs *FilerServer) mkdir(ctx context.Context, w http.ResponseWriter, r *http.Request, so *operation.StorageOption) (filerResult *FilerPostResult, replyerr error) {
  374. // detect file mode
  375. modeStr := r.URL.Query().Get("mode")
  376. if modeStr == "" {
  377. modeStr = "0660"
  378. }
  379. mode, err := strconv.ParseUint(modeStr, 8, 32)
  380. if err != nil {
  381. glog.ErrorfCtx(ctx, "Invalid mode format: %s, use 0660 by default", modeStr)
  382. mode = 0660
  383. }
  384. // fix the path
  385. path := r.URL.Path
  386. if strings.HasSuffix(path, "/") {
  387. path = path[:len(path)-1]
  388. }
  389. existingEntry, err := fs.filer.FindEntry(ctx, util.FullPath(path))
  390. if err == nil && existingEntry != nil {
  391. replyerr = fmt.Errorf("dir %s already exists", path)
  392. return
  393. }
  394. glog.V(4).InfolnCtx(ctx, "mkdir", path)
  395. entry := &filer.Entry{
  396. FullPath: util.FullPath(path),
  397. Attr: filer.Attr{
  398. Mtime: time.Now(),
  399. Crtime: time.Now(),
  400. Mode: os.FileMode(mode) | os.ModeDir,
  401. Uid: OS_UID,
  402. Gid: OS_GID,
  403. TtlSec: so.TtlSeconds,
  404. },
  405. }
  406. filerResult = &FilerPostResult{
  407. Name: util.FullPath(path).Name(),
  408. }
  409. if dbErr := fs.filer.CreateEntry(ctx, entry, false, false, nil, false, so.MaxFileNameLength); dbErr != nil {
  410. replyerr = dbErr
  411. filerResult.Error = dbErr.Error()
  412. glog.V(0).InfofCtx(ctx, "failing to create dir %s on filer server : %v", path, dbErr)
  413. }
  414. return filerResult, replyerr
  415. }
  416. func SaveAmzMetaData(r *http.Request, existing map[string][]byte, isReplace bool) (metadata map[string][]byte) {
  417. metadata = make(map[string][]byte)
  418. if !isReplace {
  419. for k, v := range existing {
  420. metadata[k] = v
  421. }
  422. }
  423. if sc := r.Header.Get(s3_constants.AmzStorageClass); sc != "" {
  424. metadata[s3_constants.AmzStorageClass] = []byte(sc)
  425. }
  426. if ce := r.Header.Get("Content-Encoding"); ce != "" {
  427. metadata["Content-Encoding"] = []byte(ce)
  428. }
  429. if tags := r.Header.Get(s3_constants.AmzObjectTagging); tags != "" {
  430. // Use url.ParseQuery for robust parsing and automatic URL decoding
  431. parsedTags, err := url.ParseQuery(tags)
  432. if err != nil {
  433. glog.Errorf("Failed to parse S3 tags '%s': %v", tags, err)
  434. } else {
  435. for key, values := range parsedTags {
  436. // According to S3 spec, if a key is provided multiple times, the last value is used.
  437. // A tag value can be an empty string but not nil.
  438. value := ""
  439. if len(values) > 0 {
  440. value = values[len(values)-1]
  441. }
  442. metadata[s3_constants.AmzObjectTagging+"-"+key] = []byte(value)
  443. }
  444. }
  445. }
  446. for header, values := range r.Header {
  447. if strings.HasPrefix(header, s3_constants.AmzUserMetaPrefix) {
  448. for _, value := range values {
  449. metadata[header] = []byte(value)
  450. }
  451. }
  452. }
  453. // Handle SSE-C headers
  454. if algorithm := r.Header.Get(s3_constants.AmzServerSideEncryptionCustomerAlgorithm); algorithm != "" {
  455. metadata[s3_constants.AmzServerSideEncryptionCustomerAlgorithm] = []byte(algorithm)
  456. }
  457. if keyMD5 := r.Header.Get(s3_constants.AmzServerSideEncryptionCustomerKeyMD5); keyMD5 != "" {
  458. // Store as-is; SSE-C MD5 is base64 and case-sensitive
  459. metadata[s3_constants.AmzServerSideEncryptionCustomerKeyMD5] = []byte(keyMD5)
  460. }
  461. //acp-owner
  462. acpOwner := r.Header.Get(s3_constants.ExtAmzOwnerKey)
  463. if len(acpOwner) > 0 {
  464. metadata[s3_constants.ExtAmzOwnerKey] = []byte(acpOwner)
  465. }
  466. //acp-grants
  467. acpGrants := r.Header.Get(s3_constants.ExtAmzAclKey)
  468. if len(acpOwner) > 0 {
  469. metadata[s3_constants.ExtAmzAclKey] = []byte(acpGrants)
  470. }
  471. return
  472. }