test_sample_data_test.go 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. package engine
  2. import (
  3. "time"
  4. "github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
  5. )
  6. // generateSampleHybridData creates sample data that simulates both live and archived messages
  7. // This function is only used for testing and is not included in production builds
  8. func generateSampleHybridData(topicName string, options HybridScanOptions) []HybridScanResult {
  9. now := time.Now().UnixNano()
  10. // Generate different sample data based on topic name
  11. var sampleData []HybridScanResult
  12. switch topicName {
  13. case "user_events":
  14. sampleData = []HybridScanResult{
  15. // Simulated live log data (recent)
  16. // Generate more test data to support LIMIT/OFFSET testing
  17. {
  18. Values: map[string]*schema_pb.Value{
  19. "id": {Kind: &schema_pb.Value_Int64Value{Int64Value: 82460}},
  20. "user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 9465}},
  21. "event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "live_login"}},
  22. "data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"ip": "10.0.0.1", "live": true}`}},
  23. "status": {Kind: &schema_pb.Value_StringValue{StringValue: "active"}},
  24. "action": {Kind: &schema_pb.Value_StringValue{StringValue: "login"}},
  25. "user_type": {Kind: &schema_pb.Value_StringValue{StringValue: "premium"}},
  26. "amount": {Kind: &schema_pb.Value_DoubleValue{DoubleValue: 43.619326294957126}},
  27. },
  28. Timestamp: now - 300000000000, // 5 minutes ago
  29. Key: []byte("live-user-9465"),
  30. Source: "live_log",
  31. },
  32. {
  33. Values: map[string]*schema_pb.Value{
  34. "id": {Kind: &schema_pb.Value_Int64Value{Int64Value: 841256}},
  35. "user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 2336}},
  36. "event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "live_action"}},
  37. "data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"action": "click", "live": true}`}},
  38. "status": {Kind: &schema_pb.Value_StringValue{StringValue: "pending"}},
  39. "action": {Kind: &schema_pb.Value_StringValue{StringValue: "click"}},
  40. "user_type": {Kind: &schema_pb.Value_StringValue{StringValue: "standard"}},
  41. "amount": {Kind: &schema_pb.Value_DoubleValue{DoubleValue: 550.0278410655299}},
  42. },
  43. Timestamp: now - 120000000000, // 2 minutes ago
  44. Key: []byte("live-user-2336"),
  45. Source: "live_log",
  46. },
  47. {
  48. Values: map[string]*schema_pb.Value{
  49. "id": {Kind: &schema_pb.Value_Int64Value{Int64Value: 55537}},
  50. "user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 6912}},
  51. "event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "purchase"}},
  52. "data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"amount": 25.99, "item": "book"}`}},
  53. },
  54. Timestamp: now - 90000000000, // 1.5 minutes ago
  55. Key: []byte("live-user-6912"),
  56. Source: "live_log",
  57. },
  58. {
  59. Values: map[string]*schema_pb.Value{
  60. "id": {Kind: &schema_pb.Value_Int64Value{Int64Value: 65143}},
  61. "user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 5102}},
  62. "event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "page_view"}},
  63. "data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"page": "/home", "duration": 30}`}},
  64. },
  65. Timestamp: now - 80000000000, // 80 seconds ago
  66. Key: []byte("live-user-5102"),
  67. Source: "live_log",
  68. },
  69. // Simulated archived Parquet data (older)
  70. {
  71. Values: map[string]*schema_pb.Value{
  72. "id": {Kind: &schema_pb.Value_Int64Value{Int64Value: 686003}},
  73. "user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 2759}},
  74. "event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "archived_login"}},
  75. "data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"ip": "192.168.1.1", "archived": true}`}},
  76. },
  77. Timestamp: now - 3600000000000, // 1 hour ago
  78. Key: []byte("archived-user-2759"),
  79. Source: "parquet_archive",
  80. },
  81. {
  82. Values: map[string]*schema_pb.Value{
  83. "id": {Kind: &schema_pb.Value_Int64Value{Int64Value: 417224}},
  84. "user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 7810}},
  85. "event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "archived_logout"}},
  86. "data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"duration": 1800, "archived": true}`}},
  87. },
  88. Timestamp: now - 1800000000000, // 30 minutes ago
  89. Key: []byte("archived-user-7810"),
  90. Source: "parquet_archive",
  91. },
  92. {
  93. Values: map[string]*schema_pb.Value{
  94. "id": {Kind: &schema_pb.Value_Int64Value{Int64Value: 424297}},
  95. "user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 8897}},
  96. "event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "purchase"}},
  97. "data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"amount": 45.50, "item": "electronics"}`}},
  98. },
  99. Timestamp: now - 1500000000000, // 25 minutes ago
  100. Key: []byte("archived-user-8897"),
  101. Source: "parquet_archive",
  102. },
  103. {
  104. Values: map[string]*schema_pb.Value{
  105. "id": {Kind: &schema_pb.Value_Int64Value{Int64Value: 431189}},
  106. "user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 3400}},
  107. "event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "signup"}},
  108. "data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"referral": "google", "plan": "free"}`}},
  109. },
  110. Timestamp: now - 1200000000000, // 20 minutes ago
  111. Key: []byte("archived-user-3400"),
  112. Source: "parquet_archive",
  113. },
  114. {
  115. Values: map[string]*schema_pb.Value{
  116. "id": {Kind: &schema_pb.Value_Int64Value{Int64Value: 413249}},
  117. "user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 5175}},
  118. "event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "update_profile"}},
  119. "data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"field": "email", "new_value": "user@example.com"}`}},
  120. },
  121. Timestamp: now - 900000000000, // 15 minutes ago
  122. Key: []byte("archived-user-5175"),
  123. Source: "parquet_archive",
  124. },
  125. {
  126. Values: map[string]*schema_pb.Value{
  127. "id": {Kind: &schema_pb.Value_Int64Value{Int64Value: 120612}},
  128. "user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 5429}},
  129. "event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "comment"}},
  130. "data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"post_id": 123, "comment": "Great post!"}`}},
  131. },
  132. Timestamp: now - 600000000000, // 10 minutes ago
  133. Key: []byte("archived-user-5429"),
  134. Source: "parquet_archive",
  135. },
  136. }
  137. case "system_logs":
  138. sampleData = []HybridScanResult{
  139. // Simulated live system logs (recent)
  140. {
  141. Values: map[string]*schema_pb.Value{
  142. "level": {Kind: &schema_pb.Value_StringValue{StringValue: "INFO"}},
  143. "message": {Kind: &schema_pb.Value_StringValue{StringValue: "Live system startup completed"}},
  144. "service": {Kind: &schema_pb.Value_StringValue{StringValue: "auth-service"}},
  145. },
  146. Timestamp: now - 240000000000, // 4 minutes ago
  147. Key: []byte("live-sys-001"),
  148. Source: "live_log",
  149. },
  150. {
  151. Values: map[string]*schema_pb.Value{
  152. "level": {Kind: &schema_pb.Value_StringValue{StringValue: "WARN"}},
  153. "message": {Kind: &schema_pb.Value_StringValue{StringValue: "Live high memory usage detected"}},
  154. "service": {Kind: &schema_pb.Value_StringValue{StringValue: "monitor-service"}},
  155. },
  156. Timestamp: now - 180000000000, // 3 minutes ago
  157. Key: []byte("live-sys-002"),
  158. Source: "live_log",
  159. },
  160. // Simulated archived system logs (older)
  161. {
  162. Values: map[string]*schema_pb.Value{
  163. "level": {Kind: &schema_pb.Value_StringValue{StringValue: "ERROR"}},
  164. "message": {Kind: &schema_pb.Value_StringValue{StringValue: "Archived database connection failed"}},
  165. "service": {Kind: &schema_pb.Value_StringValue{StringValue: "db-service"}},
  166. },
  167. Timestamp: now - 7200000000000, // 2 hours ago
  168. Key: []byte("archived-sys-001"),
  169. Source: "parquet_archive",
  170. },
  171. {
  172. Values: map[string]*schema_pb.Value{
  173. "level": {Kind: &schema_pb.Value_StringValue{StringValue: "INFO"}},
  174. "message": {Kind: &schema_pb.Value_StringValue{StringValue: "Archived batch job completed"}},
  175. "service": {Kind: &schema_pb.Value_StringValue{StringValue: "batch-service"}},
  176. },
  177. Timestamp: now - 3600000000000, // 1 hour ago
  178. Key: []byte("archived-sys-002"),
  179. Source: "parquet_archive",
  180. },
  181. }
  182. default:
  183. // For unknown topics, return empty data
  184. sampleData = []HybridScanResult{}
  185. }
  186. // Apply predicate filtering if specified
  187. if options.Predicate != nil {
  188. var filtered []HybridScanResult
  189. for _, result := range sampleData {
  190. // Convert to RecordValue for predicate testing
  191. recordValue := &schema_pb.RecordValue{Fields: make(map[string]*schema_pb.Value)}
  192. for k, v := range result.Values {
  193. recordValue.Fields[k] = v
  194. }
  195. recordValue.Fields[SW_COLUMN_NAME_TIMESTAMP] = &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: result.Timestamp}}
  196. recordValue.Fields[SW_COLUMN_NAME_KEY] = &schema_pb.Value{Kind: &schema_pb.Value_BytesValue{BytesValue: result.Key}}
  197. if options.Predicate(recordValue) {
  198. filtered = append(filtered, result)
  199. }
  200. }
  201. sampleData = filtered
  202. }
  203. return sampleData
  204. }