masterclient.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357
  1. package wdclient
  2. import (
  3. "context"
  4. "fmt"
  5. "github.com/seaweedfs/seaweedfs/weed/util/version"
  6. "math/rand"
  7. "sync"
  8. "time"
  9. "github.com/seaweedfs/seaweedfs/weed/stats"
  10. "github.com/seaweedfs/seaweedfs/weed/util"
  11. "google.golang.org/grpc"
  12. "github.com/seaweedfs/seaweedfs/weed/glog"
  13. "github.com/seaweedfs/seaweedfs/weed/pb"
  14. "github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
  15. )
  16. type MasterClient struct {
  17. FilerGroup string
  18. clientType string
  19. clientHost pb.ServerAddress
  20. rack string
  21. currentMaster pb.ServerAddress
  22. currentMasterLock sync.RWMutex
  23. masters pb.ServerDiscovery
  24. grpcDialOption grpc.DialOption
  25. *vidMap
  26. vidMapCacheSize int
  27. OnPeerUpdate func(update *master_pb.ClusterNodeUpdate, startFrom time.Time)
  28. OnPeerUpdateLock sync.RWMutex
  29. }
  30. func NewMasterClient(grpcDialOption grpc.DialOption, filerGroup string, clientType string, clientHost pb.ServerAddress, clientDataCenter string, rack string, masters pb.ServerDiscovery) *MasterClient {
  31. return &MasterClient{
  32. FilerGroup: filerGroup,
  33. clientType: clientType,
  34. clientHost: clientHost,
  35. rack: rack,
  36. masters: masters,
  37. grpcDialOption: grpcDialOption,
  38. vidMap: newVidMap(clientDataCenter),
  39. vidMapCacheSize: 5,
  40. }
  41. }
  42. func (mc *MasterClient) SetOnPeerUpdateFn(onPeerUpdate func(update *master_pb.ClusterNodeUpdate, startFrom time.Time)) {
  43. mc.OnPeerUpdateLock.Lock()
  44. mc.OnPeerUpdate = onPeerUpdate
  45. mc.OnPeerUpdateLock.Unlock()
  46. }
  47. func (mc *MasterClient) GetLookupFileIdFunction() LookupFileIdFunctionType {
  48. return mc.LookupFileIdWithFallback
  49. }
  50. func (mc *MasterClient) LookupFileIdWithFallback(ctx context.Context, fileId string) (fullUrls []string, err error) {
  51. fullUrls, err = mc.vidMap.LookupFileId(ctx, fileId)
  52. if err == nil && len(fullUrls) > 0 {
  53. return
  54. }
  55. err = pb.WithMasterClient(false, mc.GetMaster(ctx), mc.grpcDialOption, false, func(client master_pb.SeaweedClient) error {
  56. resp, err := client.LookupVolume(ctx, &master_pb.LookupVolumeRequest{
  57. VolumeOrFileIds: []string{fileId},
  58. })
  59. if err != nil {
  60. return fmt.Errorf("LookupVolume %s failed: %v", fileId, err)
  61. }
  62. for vid, vidLocation := range resp.VolumeIdLocations {
  63. for _, vidLoc := range vidLocation.Locations {
  64. loc := Location{
  65. Url: vidLoc.Url,
  66. PublicUrl: vidLoc.PublicUrl,
  67. GrpcPort: int(vidLoc.GrpcPort),
  68. DataCenter: vidLoc.DataCenter,
  69. }
  70. mc.vidMap.addLocation(uint32(vid), loc)
  71. httpUrl := "http://" + loc.Url + "/" + fileId
  72. // Prefer same data center
  73. if mc.DataCenter != "" && mc.DataCenter == loc.DataCenter {
  74. fullUrls = append([]string{httpUrl}, fullUrls...)
  75. } else {
  76. fullUrls = append(fullUrls, httpUrl)
  77. }
  78. }
  79. }
  80. return nil
  81. })
  82. return
  83. }
  84. func (mc *MasterClient) getCurrentMaster() pb.ServerAddress {
  85. mc.currentMasterLock.RLock()
  86. defer mc.currentMasterLock.RUnlock()
  87. return mc.currentMaster
  88. }
  89. func (mc *MasterClient) setCurrentMaster(master pb.ServerAddress) {
  90. mc.currentMasterLock.Lock()
  91. mc.currentMaster = master
  92. mc.currentMasterLock.Unlock()
  93. }
  94. func (mc *MasterClient) GetMaster(ctx context.Context) pb.ServerAddress {
  95. mc.WaitUntilConnected(ctx)
  96. return mc.getCurrentMaster()
  97. }
  98. func (mc *MasterClient) GetMasters(ctx context.Context) []pb.ServerAddress {
  99. mc.WaitUntilConnected(ctx)
  100. return mc.masters.GetInstances()
  101. }
  102. func (mc *MasterClient) WaitUntilConnected(ctx context.Context) {
  103. for {
  104. select {
  105. case <-ctx.Done():
  106. glog.V(0).Infof("Connection wait stopped: %v", ctx.Err())
  107. return
  108. default:
  109. if mc.getCurrentMaster() != "" {
  110. return
  111. }
  112. time.Sleep(time.Duration(rand.Int31n(200)) * time.Millisecond)
  113. print(".")
  114. }
  115. }
  116. }
  117. func (mc *MasterClient) KeepConnectedToMaster(ctx context.Context) {
  118. glog.V(1).Infof("%s.%s masterClient bootstraps with masters %v", mc.FilerGroup, mc.clientType, mc.masters)
  119. for {
  120. select {
  121. case <-ctx.Done():
  122. glog.V(0).Infof("Connection to masters stopped: %v", ctx.Err())
  123. return
  124. default:
  125. mc.tryAllMasters(ctx)
  126. time.Sleep(time.Second)
  127. }
  128. }
  129. }
  130. func (mc *MasterClient) FindLeaderFromOtherPeers(myMasterAddress pb.ServerAddress) (leader string) {
  131. for _, master := range mc.masters.GetInstances() {
  132. if master == myMasterAddress {
  133. continue
  134. }
  135. if grpcErr := pb.WithMasterClient(false, master, mc.grpcDialOption, false, func(client master_pb.SeaweedClient) error {
  136. ctx, cancel := context.WithTimeout(context.Background(), 120*time.Millisecond)
  137. defer cancel()
  138. resp, err := client.GetMasterConfiguration(ctx, &master_pb.GetMasterConfigurationRequest{})
  139. if err != nil {
  140. return err
  141. }
  142. leader = resp.Leader
  143. return nil
  144. }); grpcErr != nil {
  145. glog.V(0).Infof("connect to %s: %v", master, grpcErr)
  146. }
  147. if leader != "" {
  148. glog.V(0).Infof("existing leader is %s", leader)
  149. return
  150. }
  151. }
  152. glog.V(0).Infof("No existing leader found!")
  153. return
  154. }
  155. func (mc *MasterClient) tryAllMasters(ctx context.Context) {
  156. var nextHintedLeader pb.ServerAddress
  157. mc.masters.RefreshBySrvIfAvailable()
  158. for _, master := range mc.masters.GetInstances() {
  159. nextHintedLeader = mc.tryConnectToMaster(ctx, master)
  160. for nextHintedLeader != "" {
  161. select {
  162. case <-ctx.Done():
  163. glog.V(0).Infof("Connection attempt to all masters stopped: %v", ctx.Err())
  164. return
  165. default:
  166. nextHintedLeader = mc.tryConnectToMaster(ctx, nextHintedLeader)
  167. }
  168. }
  169. mc.setCurrentMaster("")
  170. }
  171. }
  172. func (mc *MasterClient) tryConnectToMaster(ctx context.Context, master pb.ServerAddress) (nextHintedLeader pb.ServerAddress) {
  173. glog.V(1).Infof("%s.%s masterClient Connecting to master %v", mc.FilerGroup, mc.clientType, master)
  174. stats.MasterClientConnectCounter.WithLabelValues("total").Inc()
  175. gprcErr := pb.WithMasterClient(true, master, mc.grpcDialOption, false, func(client master_pb.SeaweedClient) error {
  176. ctx, cancel := context.WithCancel(ctx)
  177. defer cancel()
  178. stream, err := client.KeepConnected(ctx)
  179. if err != nil {
  180. glog.V(1).Infof("%s.%s masterClient failed to keep connected to %s: %v", mc.FilerGroup, mc.clientType, master, err)
  181. stats.MasterClientConnectCounter.WithLabelValues(stats.FailedToKeepConnected).Inc()
  182. return err
  183. }
  184. if err = stream.Send(&master_pb.KeepConnectedRequest{
  185. FilerGroup: mc.FilerGroup,
  186. DataCenter: mc.DataCenter,
  187. Rack: mc.rack,
  188. ClientType: mc.clientType,
  189. ClientAddress: string(mc.clientHost),
  190. Version: version.Version(),
  191. }); err != nil {
  192. glog.V(0).Infof("%s.%s masterClient failed to send to %s: %v", mc.FilerGroup, mc.clientType, master, err)
  193. stats.MasterClientConnectCounter.WithLabelValues(stats.FailedToSend).Inc()
  194. return err
  195. }
  196. glog.V(1).Infof("%s.%s masterClient Connected to %v", mc.FilerGroup, mc.clientType, master)
  197. resp, err := stream.Recv()
  198. if err != nil {
  199. glog.V(0).Infof("%s.%s masterClient failed to receive from %s: %v", mc.FilerGroup, mc.clientType, master, err)
  200. stats.MasterClientConnectCounter.WithLabelValues(stats.FailedToReceive).Inc()
  201. return err
  202. }
  203. // check if it is the leader to determine whether to reset the vidMap
  204. if resp.VolumeLocation != nil {
  205. if resp.VolumeLocation.Leader != "" && string(master) != resp.VolumeLocation.Leader {
  206. glog.V(0).Infof("master %v redirected to leader %v", master, resp.VolumeLocation.Leader)
  207. nextHintedLeader = pb.ServerAddress(resp.VolumeLocation.Leader)
  208. stats.MasterClientConnectCounter.WithLabelValues(stats.RedirectedToLeader).Inc()
  209. return nil
  210. }
  211. mc.resetVidMap()
  212. mc.updateVidMap(resp)
  213. } else {
  214. mc.resetVidMap()
  215. }
  216. mc.setCurrentMaster(master)
  217. for {
  218. resp, err := stream.Recv()
  219. if err != nil {
  220. glog.V(0).Infof("%s.%s masterClient failed to receive from %s: %v", mc.FilerGroup, mc.clientType, master, err)
  221. stats.MasterClientConnectCounter.WithLabelValues(stats.FailedToReceive).Inc()
  222. return err
  223. }
  224. if resp.VolumeLocation != nil {
  225. // maybe the leader is changed
  226. if resp.VolumeLocation.Leader != "" && string(mc.GetMaster(ctx)) != resp.VolumeLocation.Leader {
  227. glog.V(0).Infof("currentMaster %v redirected to leader %v", mc.GetMaster(ctx), resp.VolumeLocation.Leader)
  228. nextHintedLeader = pb.ServerAddress(resp.VolumeLocation.Leader)
  229. stats.MasterClientConnectCounter.WithLabelValues(stats.RedirectedToLeader).Inc()
  230. return nil
  231. }
  232. mc.updateVidMap(resp)
  233. }
  234. if resp.ClusterNodeUpdate != nil {
  235. update := resp.ClusterNodeUpdate
  236. mc.OnPeerUpdateLock.RLock()
  237. if mc.OnPeerUpdate != nil {
  238. if update.FilerGroup == mc.FilerGroup {
  239. if update.IsAdd {
  240. glog.V(0).Infof("+ %s@%s noticed %s.%s %s\n", mc.clientType, mc.clientHost, update.FilerGroup, update.NodeType, update.Address)
  241. } else {
  242. glog.V(0).Infof("- %s@%s noticed %s.%s %s\n", mc.clientType, mc.clientHost, update.FilerGroup, update.NodeType, update.Address)
  243. }
  244. stats.MasterClientConnectCounter.WithLabelValues(stats.OnPeerUpdate).Inc()
  245. mc.OnPeerUpdate(update, time.Now())
  246. }
  247. }
  248. mc.OnPeerUpdateLock.RUnlock()
  249. }
  250. if err := ctx.Err(); err != nil {
  251. glog.V(0).Infof("Connection attempt to master stopped: %v", err)
  252. return err
  253. }
  254. }
  255. })
  256. if gprcErr != nil {
  257. stats.MasterClientConnectCounter.WithLabelValues(stats.Failed).Inc()
  258. glog.V(1).Infof("%s.%s masterClient failed to connect with master %v: %v", mc.FilerGroup, mc.clientType, master, gprcErr)
  259. }
  260. return
  261. }
  262. func (mc *MasterClient) updateVidMap(resp *master_pb.KeepConnectedResponse) {
  263. if resp.VolumeLocation.IsEmptyUrl() {
  264. glog.V(0).Infof("updateVidMap ignore short heartbeat: %+v", resp)
  265. return
  266. }
  267. // process new volume location
  268. loc := Location{
  269. Url: resp.VolumeLocation.Url,
  270. PublicUrl: resp.VolumeLocation.PublicUrl,
  271. DataCenter: resp.VolumeLocation.DataCenter,
  272. GrpcPort: int(resp.VolumeLocation.GrpcPort),
  273. }
  274. for _, newVid := range resp.VolumeLocation.NewVids {
  275. glog.V(2).Infof("%s.%s: %s masterClient adds volume %d", mc.FilerGroup, mc.clientType, loc.Url, newVid)
  276. mc.addLocation(newVid, loc)
  277. }
  278. for _, deletedVid := range resp.VolumeLocation.DeletedVids {
  279. glog.V(2).Infof("%s.%s: %s masterClient removes volume %d", mc.FilerGroup, mc.clientType, loc.Url, deletedVid)
  280. mc.deleteLocation(deletedVid, loc)
  281. }
  282. for _, newEcVid := range resp.VolumeLocation.NewEcVids {
  283. glog.V(2).Infof("%s.%s: %s masterClient adds ec volume %d", mc.FilerGroup, mc.clientType, loc.Url, newEcVid)
  284. mc.addEcLocation(newEcVid, loc)
  285. }
  286. for _, deletedEcVid := range resp.VolumeLocation.DeletedEcVids {
  287. glog.V(2).Infof("%s.%s: %s masterClient removes ec volume %d", mc.FilerGroup, mc.clientType, loc.Url, deletedEcVid)
  288. mc.deleteEcLocation(deletedEcVid, loc)
  289. }
  290. glog.V(1).Infof("updateVidMap(%s) %s.%s: %s volume add: %d, del: %d, add ec: %d del ec: %d",
  291. resp.VolumeLocation.DataCenter, mc.FilerGroup, mc.clientType, loc.Url,
  292. len(resp.VolumeLocation.NewVids), len(resp.VolumeLocation.DeletedVids),
  293. len(resp.VolumeLocation.NewEcVids), len(resp.VolumeLocation.DeletedEcVids))
  294. }
  295. func (mc *MasterClient) WithClient(streamingMode bool, fn func(client master_pb.SeaweedClient) error) error {
  296. getMasterF := func() pb.ServerAddress { return mc.GetMaster(context.Background()) }
  297. return mc.WithClientCustomGetMaster(getMasterF, streamingMode, fn)
  298. }
  299. func (mc *MasterClient) WithClientCustomGetMaster(getMasterF func() pb.ServerAddress, streamingMode bool, fn func(client master_pb.SeaweedClient) error) error {
  300. return util.Retry("master grpc", func() error {
  301. return pb.WithMasterClient(streamingMode, getMasterF(), mc.grpcDialOption, false, func(client master_pb.SeaweedClient) error {
  302. return fn(client)
  303. })
  304. })
  305. }
  306. func (mc *MasterClient) resetVidMap() {
  307. tail := &vidMap{
  308. vid2Locations: mc.vid2Locations,
  309. ecVid2Locations: mc.ecVid2Locations,
  310. DataCenter: mc.DataCenter,
  311. cache: mc.cache,
  312. }
  313. nvm := newVidMap(mc.DataCenter)
  314. nvm.cache = tail
  315. mc.vidMap = nvm
  316. //trim
  317. for i := 0; i < mc.vidMapCacheSize && tail.cache != nil; i++ {
  318. if i == mc.vidMapCacheSize-1 {
  319. tail.cache = nil
  320. } else {
  321. tail = tail.cache
  322. }
  323. }
  324. }