| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284 |
- package needle_map
- /* CompactMap is an in-memory map of needle indeces, optimized for memory usage.
- *
- * It's implemented as a map of sorted indeces segments, which are in turn accessed through binary
- * search. This guarantees a best-case scenario (ordered inserts/updates) of O(1) and a worst case
- * scenario of O(log n) runtime, with memory usage unaffected by insert ordering.
- *
- * Note that even at O(log n), the clock time for both reads and writes is very low, so CompactMap
- * will seldom bottleneck index operations.
- */
- import (
- "fmt"
- "math"
- "slices"
- "sort"
- "sync"
- "github.com/seaweedfs/seaweedfs/weed/storage/types"
- )
- const (
- MaxCompactKey = math.MaxUint16
- SegmentChunkSize = 50000 // should be <= MaxCompactKey
- )
- type CompactKey uint16
- type CompactOffset [types.OffsetSize]byte
- type CompactNeedleValue struct {
- key CompactKey
- offset CompactOffset
- size types.Size
- }
- type Chunk uint64
- type CompactMapSegment struct {
- list []CompactNeedleValue
- chunk Chunk
- firstKey CompactKey
- lastKey CompactKey
- }
- type CompactMap struct {
- sync.RWMutex
- segments map[Chunk]*CompactMapSegment
- }
- func (ck CompactKey) Key(chunk Chunk) types.NeedleId {
- return (types.NeedleId(SegmentChunkSize) * types.NeedleId(chunk)) + types.NeedleId(ck)
- }
- func OffsetToCompact(offset types.Offset) CompactOffset {
- var co CompactOffset
- types.OffsetToBytes(co[:], offset)
- return co
- }
- func (co CompactOffset) Offset() types.Offset {
- return types.BytesToOffset(co[:])
- }
- func (cnv CompactNeedleValue) NeedleValue(chunk Chunk) NeedleValue {
- return NeedleValue{
- Key: cnv.key.Key(chunk),
- Offset: cnv.offset.Offset(),
- Size: cnv.size,
- }
- }
- func newCompactMapSegment(chunk Chunk) *CompactMapSegment {
- return &CompactMapSegment{
- list: []CompactNeedleValue{},
- chunk: chunk,
- firstKey: MaxCompactKey,
- lastKey: 0,
- }
- }
- func (cs *CompactMapSegment) len() int {
- return len(cs.list)
- }
- func (cs *CompactMapSegment) cap() int {
- return cap(cs.list)
- }
- func (cs *CompactMapSegment) compactKey(key types.NeedleId) CompactKey {
- return CompactKey(key - (types.NeedleId(SegmentChunkSize) * types.NeedleId(cs.chunk)))
- }
- // bsearchKey returns the CompactNeedleValue index for a given ID key.
- // If the key is not found, it returns the index where it should be inserted instead.
- func (cs *CompactMapSegment) bsearchKey(key types.NeedleId) (int, bool) {
- ck := cs.compactKey(key)
- switch {
- case len(cs.list) == 0:
- return 0, false
- case ck == cs.firstKey:
- return 0, true
- case ck <= cs.firstKey:
- return 0, false
- case ck == cs.lastKey:
- return len(cs.list) - 1, true
- case ck > cs.lastKey:
- return len(cs.list), false
- }
- i := sort.Search(len(cs.list), func(i int) bool {
- return cs.list[i].key >= ck
- })
- return i, cs.list[i].key == ck
- }
- // set inserts/updates a CompactNeedleValue.
- // If the operation is an update, returns the overwritten value's previous offset and size.
- func (cs *CompactMapSegment) set(key types.NeedleId, offset types.Offset, size types.Size) (oldOffset types.Offset, oldSize types.Size) {
- i, found := cs.bsearchKey(key)
- if found {
- // update
- o := cs.list[i].offset.Offset()
- oldOffset.OffsetLower = o.OffsetLower
- oldOffset.OffsetHigher = o.OffsetHigher
- oldSize = cs.list[i].size
- o.OffsetLower = offset.OffsetLower
- o.OffsetHigher = offset.OffsetHigher
- cs.list[i].offset = OffsetToCompact(o)
- cs.list[i].size = size
- return
- }
- // insert
- if len(cs.list) >= SegmentChunkSize {
- panic(fmt.Sprintf("attempted to write more than %d entries on CompactMapSegment %p!!!", SegmentChunkSize, cs))
- }
- if len(cs.list) == SegmentChunkSize-1 {
- // if we max out our segment storage, pin its capacity to minimize memory usage
- nl := make([]CompactNeedleValue, SegmentChunkSize, SegmentChunkSize)
- copy(nl, cs.list[:i])
- copy(nl[i+1:], cs.list[i:])
- cs.list = nl
- } else {
- cs.list = append(cs.list, CompactNeedleValue{})
- copy(cs.list[i+1:], cs.list[i:])
- }
- ck := cs.compactKey(key)
- cs.list[i] = CompactNeedleValue{
- key: ck,
- offset: OffsetToCompact(offset),
- size: size,
- }
- if ck < cs.firstKey {
- cs.firstKey = ck
- }
- if ck > cs.lastKey {
- cs.lastKey = ck
- }
- return
- }
- // get seeks a map entry by key. Returns an entry pointer, with a boolean specifiying if the entry was found.
- func (cs *CompactMapSegment) get(key types.NeedleId) (*CompactNeedleValue, bool) {
- if i, found := cs.bsearchKey(key); found {
- return &cs.list[i], true
- }
- return nil, false
- }
- // delete deletes a map entry by key. Returns the entries' previous Size, if available.
- func (cs *CompactMapSegment) delete(key types.NeedleId) types.Size {
- if i, found := cs.bsearchKey(key); found {
- if cs.list[i].size > 0 && cs.list[i].size.IsValid() {
- ret := cs.list[i].size
- cs.list[i].size = -cs.list[i].size
- return ret
- }
- }
- return types.Size(0)
- }
- func NewCompactMap() *CompactMap {
- return &CompactMap{
- segments: map[Chunk]*CompactMapSegment{},
- }
- }
- func (cm *CompactMap) Len() int {
- l := 0
- for _, s := range cm.segments {
- l += s.len()
- }
- return l
- }
- func (cm *CompactMap) Cap() int {
- c := 0
- for _, s := range cm.segments {
- c += s.cap()
- }
- return c
- }
- func (cm *CompactMap) String() string {
- if cm.Len() == 0 {
- return "empty"
- }
- return fmt.Sprintf(
- "%d/%d elements on %d segments, %.02f%% efficiency",
- cm.Len(), cm.Cap(), len(cm.segments),
- float64(100)*float64(cm.Len())/float64(cm.Cap()))
- }
- func (cm *CompactMap) segmentForKey(key types.NeedleId) *CompactMapSegment {
- chunk := Chunk(key / SegmentChunkSize)
- if cs, ok := cm.segments[chunk]; ok {
- return cs
- }
- cs := newCompactMapSegment(chunk)
- cm.segments[chunk] = cs
- return cs
- }
- // Set inserts/updates a NeedleValue.
- // If the operation is an update, returns the overwritten value's previous offset and size.
- func (cm *CompactMap) Set(key types.NeedleId, offset types.Offset, size types.Size) (oldOffset types.Offset, oldSize types.Size) {
- cm.RLock()
- defer cm.RUnlock()
- cs := cm.segmentForKey(key)
- return cs.set(key, offset, size)
- }
- // Get seeks a map entry by key. Returns an entry pointer, with a boolean specifiying if the entry was found.
- func (cm *CompactMap) Get(key types.NeedleId) (*NeedleValue, bool) {
- cm.RLock()
- defer cm.RUnlock()
- cs := cm.segmentForKey(key)
- if cnv, found := cs.get(key); found {
- nv := cnv.NeedleValue(cs.chunk)
- return &nv, true
- }
- return nil, false
- }
- // Delete deletes a map entry by key. Returns the entries' previous Size, if available.
- func (cm *CompactMap) Delete(key types.NeedleId) types.Size {
- cm.RLock()
- defer cm.RUnlock()
- cs := cm.segmentForKey(key)
- return cs.delete(key)
- }
- // AscendingVisit runs a function on all entries, in ascending key order. Returns any errors hit while visiting.
- func (cm *CompactMap) AscendingVisit(visit func(NeedleValue) error) error {
- cm.RLock()
- defer cm.RUnlock()
- chunks := []Chunk{}
- for c := range cm.segments {
- chunks = append(chunks, c)
- }
- slices.Sort(chunks)
- for _, c := range chunks {
- cs := cm.segments[c]
- for _, cnv := range cs.list {
- nv := cnv.NeedleValue(cs.chunk)
- if err := visit(nv); err != nil {
- return err
- }
- }
- }
- return nil
- }
|