compact_map.go 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. package needle_map
  2. /* CompactMap is an in-memory map of needle indeces, optimized for memory usage.
  3. *
  4. * It's implemented as a map of sorted indeces segments, which are in turn accessed through binary
  5. * search. This guarantees a best-case scenario (ordered inserts/updates) of O(1) and a worst case
  6. * scenario of O(log n) runtime, with memory usage unaffected by insert ordering.
  7. *
  8. * Note that even at O(log n), the clock time for both reads and writes is very low, so CompactMap
  9. * will seldom bottleneck index operations.
  10. */
  11. import (
  12. "fmt"
  13. "math"
  14. "slices"
  15. "sort"
  16. "sync"
  17. "github.com/seaweedfs/seaweedfs/weed/storage/types"
  18. )
  19. const (
  20. MaxCompactKey = math.MaxUint16
  21. SegmentChunkSize = 50000 // should be <= MaxCompactKey
  22. )
  23. type CompactKey uint16
  24. type CompactOffset [types.OffsetSize]byte
  25. type CompactNeedleValue struct {
  26. key CompactKey
  27. offset CompactOffset
  28. size types.Size
  29. }
  30. type Chunk uint64
  31. type CompactMapSegment struct {
  32. list []CompactNeedleValue
  33. chunk Chunk
  34. firstKey CompactKey
  35. lastKey CompactKey
  36. }
  37. type CompactMap struct {
  38. sync.RWMutex
  39. segments map[Chunk]*CompactMapSegment
  40. }
  41. func (ck CompactKey) Key(chunk Chunk) types.NeedleId {
  42. return (types.NeedleId(SegmentChunkSize) * types.NeedleId(chunk)) + types.NeedleId(ck)
  43. }
  44. func OffsetToCompact(offset types.Offset) CompactOffset {
  45. var co CompactOffset
  46. types.OffsetToBytes(co[:], offset)
  47. return co
  48. }
  49. func (co CompactOffset) Offset() types.Offset {
  50. return types.BytesToOffset(co[:])
  51. }
  52. func (cnv CompactNeedleValue) NeedleValue(chunk Chunk) NeedleValue {
  53. return NeedleValue{
  54. Key: cnv.key.Key(chunk),
  55. Offset: cnv.offset.Offset(),
  56. Size: cnv.size,
  57. }
  58. }
  59. func newCompactMapSegment(chunk Chunk) *CompactMapSegment {
  60. return &CompactMapSegment{
  61. list: []CompactNeedleValue{},
  62. chunk: chunk,
  63. firstKey: MaxCompactKey,
  64. lastKey: 0,
  65. }
  66. }
  67. func (cs *CompactMapSegment) len() int {
  68. return len(cs.list)
  69. }
  70. func (cs *CompactMapSegment) cap() int {
  71. return cap(cs.list)
  72. }
  73. func (cs *CompactMapSegment) compactKey(key types.NeedleId) CompactKey {
  74. return CompactKey(key - (types.NeedleId(SegmentChunkSize) * types.NeedleId(cs.chunk)))
  75. }
  76. // bsearchKey returns the CompactNeedleValue index for a given ID key.
  77. // If the key is not found, it returns the index where it should be inserted instead.
  78. func (cs *CompactMapSegment) bsearchKey(key types.NeedleId) (int, bool) {
  79. ck := cs.compactKey(key)
  80. switch {
  81. case len(cs.list) == 0:
  82. return 0, false
  83. case ck == cs.firstKey:
  84. return 0, true
  85. case ck <= cs.firstKey:
  86. return 0, false
  87. case ck == cs.lastKey:
  88. return len(cs.list) - 1, true
  89. case ck > cs.lastKey:
  90. return len(cs.list), false
  91. }
  92. i := sort.Search(len(cs.list), func(i int) bool {
  93. return cs.list[i].key >= ck
  94. })
  95. return i, cs.list[i].key == ck
  96. }
  97. // set inserts/updates a CompactNeedleValue.
  98. // If the operation is an update, returns the overwritten value's previous offset and size.
  99. func (cs *CompactMapSegment) set(key types.NeedleId, offset types.Offset, size types.Size) (oldOffset types.Offset, oldSize types.Size) {
  100. i, found := cs.bsearchKey(key)
  101. if found {
  102. // update
  103. o := cs.list[i].offset.Offset()
  104. oldOffset.OffsetLower = o.OffsetLower
  105. oldOffset.OffsetHigher = o.OffsetHigher
  106. oldSize = cs.list[i].size
  107. o.OffsetLower = offset.OffsetLower
  108. o.OffsetHigher = offset.OffsetHigher
  109. cs.list[i].offset = OffsetToCompact(o)
  110. cs.list[i].size = size
  111. return
  112. }
  113. // insert
  114. if len(cs.list) >= SegmentChunkSize {
  115. panic(fmt.Sprintf("attempted to write more than %d entries on CompactMapSegment %p!!!", SegmentChunkSize, cs))
  116. }
  117. if len(cs.list) == SegmentChunkSize-1 {
  118. // if we max out our segment storage, pin its capacity to minimize memory usage
  119. nl := make([]CompactNeedleValue, SegmentChunkSize, SegmentChunkSize)
  120. copy(nl, cs.list[:i])
  121. copy(nl[i+1:], cs.list[i:])
  122. cs.list = nl
  123. } else {
  124. cs.list = append(cs.list, CompactNeedleValue{})
  125. copy(cs.list[i+1:], cs.list[i:])
  126. }
  127. ck := cs.compactKey(key)
  128. cs.list[i] = CompactNeedleValue{
  129. key: ck,
  130. offset: OffsetToCompact(offset),
  131. size: size,
  132. }
  133. if ck < cs.firstKey {
  134. cs.firstKey = ck
  135. }
  136. if ck > cs.lastKey {
  137. cs.lastKey = ck
  138. }
  139. return
  140. }
  141. // get seeks a map entry by key. Returns an entry pointer, with a boolean specifiying if the entry was found.
  142. func (cs *CompactMapSegment) get(key types.NeedleId) (*CompactNeedleValue, bool) {
  143. if i, found := cs.bsearchKey(key); found {
  144. return &cs.list[i], true
  145. }
  146. return nil, false
  147. }
  148. // delete deletes a map entry by key. Returns the entries' previous Size, if available.
  149. func (cs *CompactMapSegment) delete(key types.NeedleId) types.Size {
  150. if i, found := cs.bsearchKey(key); found {
  151. if cs.list[i].size > 0 && cs.list[i].size.IsValid() {
  152. ret := cs.list[i].size
  153. cs.list[i].size = -cs.list[i].size
  154. return ret
  155. }
  156. }
  157. return types.Size(0)
  158. }
  159. func NewCompactMap() *CompactMap {
  160. return &CompactMap{
  161. segments: map[Chunk]*CompactMapSegment{},
  162. }
  163. }
  164. func (cm *CompactMap) Len() int {
  165. l := 0
  166. for _, s := range cm.segments {
  167. l += s.len()
  168. }
  169. return l
  170. }
  171. func (cm *CompactMap) Cap() int {
  172. c := 0
  173. for _, s := range cm.segments {
  174. c += s.cap()
  175. }
  176. return c
  177. }
  178. func (cm *CompactMap) String() string {
  179. if cm.Len() == 0 {
  180. return "empty"
  181. }
  182. return fmt.Sprintf(
  183. "%d/%d elements on %d segments, %.02f%% efficiency",
  184. cm.Len(), cm.Cap(), len(cm.segments),
  185. float64(100)*float64(cm.Len())/float64(cm.Cap()))
  186. }
  187. func (cm *CompactMap) segmentForKey(key types.NeedleId) *CompactMapSegment {
  188. chunk := Chunk(key / SegmentChunkSize)
  189. if cs, ok := cm.segments[chunk]; ok {
  190. return cs
  191. }
  192. cs := newCompactMapSegment(chunk)
  193. cm.segments[chunk] = cs
  194. return cs
  195. }
  196. // Set inserts/updates a NeedleValue.
  197. // If the operation is an update, returns the overwritten value's previous offset and size.
  198. func (cm *CompactMap) Set(key types.NeedleId, offset types.Offset, size types.Size) (oldOffset types.Offset, oldSize types.Size) {
  199. cm.RLock()
  200. defer cm.RUnlock()
  201. cs := cm.segmentForKey(key)
  202. return cs.set(key, offset, size)
  203. }
  204. // Get seeks a map entry by key. Returns an entry pointer, with a boolean specifiying if the entry was found.
  205. func (cm *CompactMap) Get(key types.NeedleId) (*NeedleValue, bool) {
  206. cm.RLock()
  207. defer cm.RUnlock()
  208. cs := cm.segmentForKey(key)
  209. if cnv, found := cs.get(key); found {
  210. nv := cnv.NeedleValue(cs.chunk)
  211. return &nv, true
  212. }
  213. return nil, false
  214. }
  215. // Delete deletes a map entry by key. Returns the entries' previous Size, if available.
  216. func (cm *CompactMap) Delete(key types.NeedleId) types.Size {
  217. cm.RLock()
  218. defer cm.RUnlock()
  219. cs := cm.segmentForKey(key)
  220. return cs.delete(key)
  221. }
  222. // AscendingVisit runs a function on all entries, in ascending key order. Returns any errors hit while visiting.
  223. func (cm *CompactMap) AscendingVisit(visit func(NeedleValue) error) error {
  224. cm.RLock()
  225. defer cm.RUnlock()
  226. chunks := []Chunk{}
  227. for c := range cm.segments {
  228. chunks = append(chunks, c)
  229. }
  230. slices.Sort(chunks)
  231. for _, c := range chunks {
  232. cs := cm.segments[c]
  233. for _, cnv := range cs.list {
  234. nv := cnv.NeedleValue(cs.chunk)
  235. if err := visit(nv); err != nil {
  236. return err
  237. }
  238. }
  239. }
  240. return nil
  241. }