| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514 |
- //go:build ydb
- // +build ydb
- package ydb
- import (
- "context"
- "fmt"
- "os"
- "path"
- "strings"
- "sync"
- "time"
- "github.com/ydb-platform/ydb-go-sdk/v3/query"
- "github.com/ydb-platform/ydb-go-sdk/v3/table/options"
- "github.com/seaweedfs/seaweedfs/weed/filer"
- "github.com/seaweedfs/seaweedfs/weed/filer/abstract_sql"
- "github.com/seaweedfs/seaweedfs/weed/glog"
- "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
- "github.com/seaweedfs/seaweedfs/weed/util"
- environ "github.com/ydb-platform/ydb-go-sdk-auth-environ"
- "github.com/ydb-platform/ydb-go-sdk/v3"
- "github.com/ydb-platform/ydb-go-sdk/v3/table"
- "github.com/ydb-platform/ydb-go-sdk/v3/table/types"
- )
- const (
- defaultDialTimeOut = 10
- defaultPartitionBySizeEnabled = true
- defaultPartitionSizeMb = 200
- defaultPartitionByLoadEnabled = true
- defaultMinPartitionsCount = 5
- defaultMaxPartitionsCount = 1000
- defaultMaxListChunk = 2000
- )
- var (
- roQC = query.WithTxControl(query.OnlineReadOnlyTxControl())
- rwQC = query.WithTxControl(query.DefaultTxControl())
- )
- type YdbStore struct {
- DB *ydb.Driver
- dirBuckets string
- tablePathPrefix string
- SupportBucketTable bool
- partitionBySizeEnabled options.FeatureFlag
- partitionSizeMb uint64
- partitionByLoadEnabled options.FeatureFlag
- minPartitionsCount uint64
- maxPartitionsCount uint64
- maxListChunk int
- dbs map[string]bool
- dbsLock sync.Mutex
- }
- func init() {
- filer.Stores = append(filer.Stores, &YdbStore{})
- }
- func (store *YdbStore) GetName() string {
- return "ydb"
- }
- func (store *YdbStore) Initialize(configuration util.Configuration, prefix string) (err error) {
- configuration.SetDefault(prefix+"partitionBySizeEnabled", defaultPartitionBySizeEnabled)
- configuration.SetDefault(prefix+"partitionSizeMb", defaultPartitionSizeMb)
- configuration.SetDefault(prefix+"partitionByLoadEnabled", defaultPartitionByLoadEnabled)
- configuration.SetDefault(prefix+"minPartitionsCount", defaultMinPartitionsCount)
- configuration.SetDefault(prefix+"maxPartitionsCount", defaultMaxPartitionsCount)
- configuration.SetDefault(prefix+"maxListChunk", defaultMaxListChunk)
- return store.initialize(
- configuration.GetString("filer.options.buckets_folder"),
- configuration.GetString(prefix+"dsn"),
- configuration.GetString(prefix+"prefix"),
- configuration.GetBool(prefix+"useBucketPrefix"),
- configuration.GetInt(prefix+"dialTimeOut"),
- configuration.GetInt(prefix+"poolSizeLimit"),
- configuration.GetBool(prefix+"partitionBySizeEnabled"),
- uint64(configuration.GetInt(prefix+"partitionSizeMb")),
- configuration.GetBool(prefix+"partitionByLoadEnabled"),
- uint64(configuration.GetInt(prefix+"minPartitionsCount")),
- uint64(configuration.GetInt(prefix+"maxPartitionsCount")),
- configuration.GetInt(prefix+"maxListChunk"),
- )
- }
- func (store *YdbStore) initialize(dirBuckets string, dsn string, tablePathPrefix string, useBucketPrefix bool, dialTimeOut int, poolSizeLimit int, partitionBySizeEnabled bool, partitionSizeMb uint64, partitionByLoadEnabled bool, minPartitionsCount uint64, maxPartitionsCount uint64, maxListChunk int) (err error) {
- store.dirBuckets = dirBuckets
- store.SupportBucketTable = useBucketPrefix
- if partitionBySizeEnabled {
- store.partitionBySizeEnabled = options.FeatureEnabled
- } else {
- store.partitionBySizeEnabled = options.FeatureDisabled
- }
- if partitionByLoadEnabled {
- store.partitionByLoadEnabled = options.FeatureEnabled
- } else {
- store.partitionByLoadEnabled = options.FeatureDisabled
- }
- store.partitionSizeMb = partitionSizeMb
- store.minPartitionsCount = minPartitionsCount
- store.maxPartitionsCount = maxPartitionsCount
- store.maxListChunk = maxListChunk
- if store.SupportBucketTable {
- glog.V(0).Infof("enabled BucketPrefix")
- }
- store.dbs = make(map[string]bool)
- ctx := context.Background()
- if dialTimeOut == 0 {
- dialTimeOut = defaultDialTimeOut
- }
- opts := []ydb.Option{
- ydb.WithDialTimeout(time.Duration(dialTimeOut) * time.Second),
- environ.WithEnvironCredentials(),
- }
- if poolSizeLimit > 0 {
- opts = append(opts, ydb.WithSessionPoolSizeLimit(poolSizeLimit))
- }
- if dsn == "" {
- dsn = os.Getenv("YDB_CONNECTION_STRING")
- }
- store.DB, err = ydb.Open(ctx, dsn, opts...)
- if err != nil {
- return fmt.Errorf("can not connect to %s: %w", dsn, err)
- }
- store.tablePathPrefix = path.Join(store.DB.Name(), tablePathPrefix)
- if err := store.ensureTables(ctx); err != nil {
- return err
- }
- return err
- }
- func (store *YdbStore) doTxOrDB(ctx context.Context, q *string, params *table.QueryParameters, ts query.ExecuteOption, processResultFunc func(res query.Result) error) (err error) {
- var res query.Result
- if tx, ok := ctx.Value("tx").(query.Transaction); ok {
- res, err = tx.Query(ctx, *q, query.WithParameters(params))
- if err != nil {
- return fmt.Errorf("execute transaction: %w", err)
- }
- } else {
- err = store.DB.Query().Do(ctx, func(ctx context.Context, s query.Session) (err error) {
- res, err = s.Query(ctx, *q, query.WithParameters(params), ts)
- if err != nil {
- return fmt.Errorf("execute statement: %w", err)
- }
- return nil
- }, query.WithIdempotent())
- }
- if err != nil {
- return err
- }
- if res != nil {
- defer func() { _ = res.Close(ctx) }()
- if processResultFunc != nil {
- if err = processResultFunc(res); err != nil {
- return fmt.Errorf("process result: %w", err)
- }
- }
- }
- return err
- }
- func (store *YdbStore) insertOrUpdateEntry(ctx context.Context, entry *filer.Entry) (err error) {
- dir, name := entry.FullPath.DirAndName()
- meta, err := entry.EncodeAttributesAndChunks()
- if err != nil {
- return fmt.Errorf("encode %s: %s", entry.FullPath, err)
- }
- if len(entry.GetChunks()) > filer.CountEntryChunksForGzip {
- meta = util.MaybeGzipData(meta)
- }
- tablePathPrefix, shortDir := store.getPrefix(ctx, &dir)
- fileMeta := FileMeta{util.HashStringToLong(dir), name, *shortDir, meta}
- return store.doTxOrDB(ctx, withPragma(tablePathPrefix, upsertQuery), fileMeta.queryParameters(entry.TtlSec), rwQC, nil)
- }
- func (store *YdbStore) InsertEntry(ctx context.Context, entry *filer.Entry) (err error) {
- return store.insertOrUpdateEntry(ctx, entry)
- }
- func (store *YdbStore) UpdateEntry(ctx context.Context, entry *filer.Entry) (err error) {
- return store.insertOrUpdateEntry(ctx, entry)
- }
- func (store *YdbStore) FindEntry(ctx context.Context, fullpath util.FullPath) (entry *filer.Entry, err error) {
- dir, name := fullpath.DirAndName()
- var data []byte
- entryFound := false
- tablePathPrefix, shortDir := store.getPrefix(ctx, &dir)
- q := withPragma(tablePathPrefix, findQuery)
- queryParams := table.NewQueryParameters(
- table.ValueParam("$dir_hash", types.Int64Value(util.HashStringToLong(*shortDir))),
- table.ValueParam("$directory", types.UTF8Value(*shortDir)),
- table.ValueParam("$name", types.UTF8Value(name)))
- err = store.doTxOrDB(ctx, q, queryParams, roQC, func(res query.Result) error {
- for rs, err := range res.ResultSets(ctx) {
- if err != nil {
- return err
- }
- for row, err := range rs.Rows(ctx) {
- if err != nil {
- return err
- }
- if scanErr := row.Scan(&data); scanErr != nil {
- return fmt.Errorf("scan %s: %v", fullpath, scanErr)
- }
- entryFound = true
- return nil
- }
- }
- return nil
- })
- if err != nil {
- return nil, err
- }
- if !entryFound {
- return nil, filer_pb.ErrNotFound
- }
- entry = &filer.Entry{FullPath: fullpath}
- if decodeErr := entry.DecodeAttributesAndChunks(util.MaybeDecompressData(data)); decodeErr != nil {
- return nil, fmt.Errorf("decode %s: %v", fullpath, decodeErr)
- }
- return entry, nil
- }
- func (store *YdbStore) DeleteEntry(ctx context.Context, fullpath util.FullPath) (err error) {
- dir, name := fullpath.DirAndName()
- tablePathPrefix, shortDir := store.getPrefix(ctx, &dir)
- q := withPragma(tablePathPrefix, deleteQuery)
- glog.V(4).InfofCtx(ctx, "DeleteEntry %s, tablePathPrefix %s, shortDir %s", fullpath, *tablePathPrefix, *shortDir)
- queryParams := table.NewQueryParameters(
- table.ValueParam("$dir_hash", types.Int64Value(util.HashStringToLong(*shortDir))),
- table.ValueParam("$directory", types.UTF8Value(*shortDir)),
- table.ValueParam("$name", types.UTF8Value(name)))
- return store.doTxOrDB(ctx, q, queryParams, rwQC, nil)
- }
- func (store *YdbStore) DeleteFolderChildren(ctx context.Context, fullpath util.FullPath) (err error) {
- dir := string(fullpath)
- tablePathPrefix, shortDir := store.getPrefix(ctx, &dir)
- q := withPragma(tablePathPrefix, deleteFolderChildrenQuery)
- queryParams := table.NewQueryParameters(
- table.ValueParam("$dir_hash", types.Int64Value(util.HashStringToLong(*shortDir))),
- table.ValueParam("$directory", types.UTF8Value(*shortDir)))
- return store.doTxOrDB(ctx, q, queryParams, rwQC, nil)
- }
- func (store *YdbStore) ListDirectoryEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) {
- return store.ListDirectoryPrefixedEntries(ctx, dirPath, startFileName, includeStartFile, limit, "", eachEntryFunc)
- }
- func (store *YdbStore) ListDirectoryPrefixedEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, prefix string, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) {
- dir := string(dirPath)
- tablePathPrefix, shortDir := store.getPrefix(ctx, &dir)
- baseInclusive := withPragma(tablePathPrefix, listInclusiveDirectoryQuery)
- baseExclusive := withPragma(tablePathPrefix, listDirectoryQuery)
- var entryCount int64
- var prevFetchedLessThanChunk bool
- for entryCount < limit {
- if prevFetchedLessThanChunk {
- break
- }
- var q *string
- if entryCount == 0 && includeStartFile {
- q = baseInclusive
- } else {
- q = baseExclusive
- }
- rest := limit - entryCount
- chunkLimit := rest
- if chunkLimit > int64(store.maxListChunk) {
- chunkLimit = int64(store.maxListChunk)
- }
- var rowCount int64
- params := table.NewQueryParameters(
- table.ValueParam("$dir_hash", types.Int64Value(util.HashStringToLong(*shortDir))),
- table.ValueParam("$directory", types.UTF8Value(*shortDir)),
- table.ValueParam("$start_name", types.UTF8Value(startFileName)),
- table.ValueParam("$prefix", types.UTF8Value(prefix+"%")),
- table.ValueParam("$limit", types.Uint64Value(uint64(chunkLimit))),
- )
- err := store.doTxOrDB(ctx, q, params, roQC, func(res query.Result) error {
- for rs, err := range res.ResultSets(ctx) {
- if err != nil {
- return err
- }
- for row, err := range rs.Rows(ctx) {
- if err != nil {
- return err
- }
- var name string
- var data []byte
- if scanErr := row.Scan(&name, &data); scanErr != nil {
- return fmt.Errorf("scan %s: %w", dir, scanErr)
- }
- lastFileName = name
- entry := &filer.Entry{FullPath: util.NewFullPath(dir, name)}
- if decodeErr := entry.DecodeAttributesAndChunks(util.MaybeDecompressData(data)); decodeErr != nil {
- return fmt.Errorf("decode entry %s: %w", entry.FullPath, decodeErr)
- }
- if !eachEntryFunc(entry) {
- return nil
- }
- rowCount++
- entryCount++
- startFileName = lastFileName
- if entryCount >= limit {
- return nil
- }
- }
- }
- return nil
- })
- if err != nil {
- return lastFileName, err
- }
- if rowCount < chunkLimit {
- prevFetchedLessThanChunk = true
- }
- }
- return lastFileName, nil
- }
- func (store *YdbStore) BeginTransaction(ctx context.Context) (context.Context, error) {
- session, err := store.DB.Table().CreateSession(ctx)
- if err != nil {
- return ctx, err
- }
- tx, err := session.BeginTransaction(ctx, table.TxSettings(table.WithSerializableReadWrite()))
- if err != nil {
- return ctx, err
- }
- return context.WithValue(ctx, "tx", tx), nil
- }
- func (store *YdbStore) CommitTransaction(ctx context.Context) error {
- if tx, ok := ctx.Value("tx").(table.Transaction); ok {
- _, err := tx.CommitTx(ctx)
- return err
- }
- return nil
- }
- func (store *YdbStore) RollbackTransaction(ctx context.Context) error {
- if tx, ok := ctx.Value("tx").(table.Transaction); ok {
- return tx.Rollback(ctx)
- }
- return nil
- }
- func (store *YdbStore) Shutdown() {
- _ = store.DB.Close(context.Background())
- }
- var _ filer.BucketAware = (*YdbStore)(nil)
- func (store *YdbStore) CanDropWholeBucket() bool {
- return store.SupportBucketTable
- }
- func (store *YdbStore) OnBucketCreation(bucket string) {
- if !store.SupportBucketTable {
- return
- }
- prefix := path.Join(store.tablePathPrefix, bucket)
- store.dbsLock.Lock()
- defer store.dbsLock.Unlock()
- if err := store.createTable(context.Background(), prefix); err != nil {
- glog.Errorf("createTable %s: %v", prefix, err)
- }
- if store.dbs == nil {
- return
- }
- store.dbs[bucket] = true
- }
- func (store *YdbStore) OnBucketDeletion(bucket string) {
- if !store.SupportBucketTable {
- return
- }
- store.dbsLock.Lock()
- defer store.dbsLock.Unlock()
- prefix := path.Join(store.tablePathPrefix, bucket)
- glog.V(4).Infof("deleting table %s", prefix)
- if err := store.deleteTable(context.Background(), prefix); err != nil {
- glog.Errorf("deleteTable %s: %v", prefix, err)
- }
- if err := store.DB.Scheme().RemoveDirectory(context.Background(), prefix); err != nil {
- glog.Errorf("remove directory %s: %v", prefix, err)
- }
- if store.dbs == nil {
- return
- }
- delete(store.dbs, bucket)
- }
- func (store *YdbStore) createTable(ctx context.Context, prefix string) error {
- return store.DB.Table().Do(ctx, func(ctx context.Context, s table.Session) error {
- return s.CreateTable(ctx, path.Join(prefix, abstract_sql.DEFAULT_TABLE), store.createTableOptions()...)
- })
- }
- func (store *YdbStore) deleteTable(ctx context.Context, prefix string) error {
- if !store.SupportBucketTable {
- return nil
- }
- if err := store.DB.Table().Do(ctx, func(ctx context.Context, s table.Session) error {
- return s.DropTable(ctx, path.Join(prefix, abstract_sql.DEFAULT_TABLE))
- }); err != nil {
- return err
- }
- glog.V(4).InfofCtx(ctx, "deleted table %s", prefix)
- return nil
- }
- func (store *YdbStore) getPrefix(ctx context.Context, dir *string) (tablePathPrefix *string, shortDir *string) {
- tablePathPrefix = &store.tablePathPrefix
- shortDir = dir
- if !store.SupportBucketTable {
- return
- }
- prefixBuckets := store.dirBuckets + "/"
- glog.V(4).InfofCtx(ctx, "dir: %s, prefixBuckets: %s", *dir, prefixBuckets)
- if strings.HasPrefix(*dir, prefixBuckets) {
- // detect bucket
- bucketAndDir := (*dir)[len(prefixBuckets):]
- glog.V(4).InfofCtx(ctx, "bucketAndDir: %s", bucketAndDir)
- var bucket string
- if t := strings.Index(bucketAndDir, "/"); t > 0 {
- bucket = bucketAndDir[:t]
- } else if t < 0 {
- bucket = bucketAndDir
- }
- if bucket == "" {
- return
- }
- store.dbsLock.Lock()
- defer store.dbsLock.Unlock()
- if _, found := store.dbs[bucket]; !found {
- glog.V(4).InfofCtx(ctx, "bucket %q not in cache, verifying existence via DescribeTable", bucket)
- tablePath := path.Join(store.tablePathPrefix, bucket, abstract_sql.DEFAULT_TABLE)
- err2 := store.DB.Table().Do(ctx, func(ctx context.Context, s table.Session) error {
- _, err3 := s.DescribeTable(ctx, tablePath)
- return err3
- })
- if err2 != nil {
- glog.V(4).InfofCtx(ctx, "bucket %q not found (DescribeTable %s failed)", bucket, tablePath)
- return
- }
- glog.V(4).InfofCtx(ctx, "bucket %q exists, adding to cache", bucket)
- store.dbs[bucket] = true
- }
- bucketPrefix := path.Join(store.tablePathPrefix, bucket)
- tablePathPrefix = &bucketPrefix
- }
- return
- }
- func (store *YdbStore) ensureTables(ctx context.Context) error {
- prefixFull := store.tablePathPrefix
- glog.V(4).InfofCtx(ctx, "creating base table %s", prefixFull)
- baseTable := path.Join(prefixFull, abstract_sql.DEFAULT_TABLE)
- if err := store.DB.Table().Do(ctx, func(ctx context.Context, s table.Session) error {
- return s.CreateTable(ctx, baseTable, store.createTableOptions()...)
- }); err != nil {
- return fmt.Errorf("failed to create base table %s: %v", baseTable, err)
- }
- glog.V(4).InfofCtx(ctx, "creating bucket tables")
- if store.SupportBucketTable {
- store.dbsLock.Lock()
- defer store.dbsLock.Unlock()
- for bucket := range store.dbs {
- glog.V(4).InfofCtx(ctx, "creating bucket table %s", bucket)
- bucketTable := path.Join(prefixFull, bucket, abstract_sql.DEFAULT_TABLE)
- if err := store.DB.Table().Do(ctx, func(ctx context.Context, s table.Session) error {
- return s.CreateTable(ctx, bucketTable, store.createTableOptions()...)
- }); err != nil {
- glog.ErrorfCtx(ctx, "failed to create bucket table %s: %v", bucketTable, err)
- }
- }
- }
- return nil
- }
|