| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392 |
- package engine
- import (
- "context"
- "encoding/binary"
- "errors"
- "testing"
- "github.com/seaweedfs/seaweedfs/weed/mq/topic"
- "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
- "github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
- "github.com/stretchr/testify/assert"
- "github.com/stretchr/testify/mock"
- "google.golang.org/protobuf/proto"
- )
- // Mock implementations for testing
- type MockHybridMessageScanner struct {
- mock.Mock
- topic topic.Topic
- }
- func (m *MockHybridMessageScanner) ReadParquetStatistics(partitionPath string) ([]*ParquetFileStats, error) {
- args := m.Called(partitionPath)
- return args.Get(0).([]*ParquetFileStats), args.Error(1)
- }
- type MockSQLEngine struct {
- *SQLEngine
- mockPartitions map[string][]string
- mockParquetSourceFiles map[string]map[string]bool
- mockLiveLogRowCounts map[string]int64
- mockColumnStats map[string]map[string]*ParquetColumnStats
- }
- func NewMockSQLEngine() *MockSQLEngine {
- return &MockSQLEngine{
- SQLEngine: &SQLEngine{
- catalog: &SchemaCatalog{
- databases: make(map[string]*DatabaseInfo),
- currentDatabase: "test",
- },
- },
- mockPartitions: make(map[string][]string),
- mockParquetSourceFiles: make(map[string]map[string]bool),
- mockLiveLogRowCounts: make(map[string]int64),
- mockColumnStats: make(map[string]map[string]*ParquetColumnStats),
- }
- }
- func (m *MockSQLEngine) discoverTopicPartitions(namespace, topicName string) ([]string, error) {
- key := namespace + "." + topicName
- if partitions, exists := m.mockPartitions[key]; exists {
- return partitions, nil
- }
- return []string{"partition-1", "partition-2"}, nil
- }
- func (m *MockSQLEngine) extractParquetSourceFiles(fileStats []*ParquetFileStats) map[string]bool {
- if len(fileStats) == 0 {
- return make(map[string]bool)
- }
- return map[string]bool{"converted-log-1": true}
- }
- func (m *MockSQLEngine) countLiveLogRowsExcludingParquetSources(ctx context.Context, partition string, parquetSources map[string]bool) (int64, error) {
- if count, exists := m.mockLiveLogRowCounts[partition]; exists {
- return count, nil
- }
- return 25, nil
- }
- func (m *MockSQLEngine) computeLiveLogMinMax(partition, column string, parquetSources map[string]bool) (interface{}, interface{}, error) {
- switch column {
- case "id":
- return int64(1), int64(50), nil
- case "value":
- return 10.5, 99.9, nil
- default:
- return nil, nil, nil
- }
- }
- func (m *MockSQLEngine) getSystemColumnGlobalMin(column string, allFileStats map[string][]*ParquetFileStats) interface{} {
- return int64(1000000000)
- }
- func (m *MockSQLEngine) getSystemColumnGlobalMax(column string, allFileStats map[string][]*ParquetFileStats) interface{} {
- return int64(2000000000)
- }
- func createMockColumnStats(column string, minVal, maxVal interface{}) *ParquetColumnStats {
- return &ParquetColumnStats{
- ColumnName: column,
- MinValue: convertToSchemaValue(minVal),
- MaxValue: convertToSchemaValue(maxVal),
- NullCount: 0,
- }
- }
- func convertToSchemaValue(val interface{}) *schema_pb.Value {
- switch v := val.(type) {
- case int64:
- return &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: v}}
- case float64:
- return &schema_pb.Value{Kind: &schema_pb.Value_DoubleValue{DoubleValue: v}}
- case string:
- return &schema_pb.Value{Kind: &schema_pb.Value_StringValue{StringValue: v}}
- }
- return nil
- }
- // Test FastPathOptimizer
- func TestFastPathOptimizer_DetermineStrategy(t *testing.T) {
- engine := NewMockSQLEngine()
- optimizer := NewFastPathOptimizer(engine.SQLEngine)
- tests := []struct {
- name string
- aggregations []AggregationSpec
- expected AggregationStrategy
- }{
- {
- name: "Supported aggregations",
- aggregations: []AggregationSpec{
- {Function: FuncCOUNT, Column: "*"},
- {Function: FuncMAX, Column: "id"},
- {Function: FuncMIN, Column: "value"},
- },
- expected: AggregationStrategy{
- CanUseFastPath: true,
- Reason: "all_aggregations_supported",
- UnsupportedSpecs: []AggregationSpec{},
- },
- },
- {
- name: "Unsupported aggregation",
- aggregations: []AggregationSpec{
- {Function: FuncCOUNT, Column: "*"},
- {Function: FuncAVG, Column: "value"}, // Not supported
- },
- expected: AggregationStrategy{
- CanUseFastPath: false,
- Reason: "unsupported_aggregation_functions",
- },
- },
- {
- name: "Empty aggregations",
- aggregations: []AggregationSpec{},
- expected: AggregationStrategy{
- CanUseFastPath: true,
- Reason: "all_aggregations_supported",
- UnsupportedSpecs: []AggregationSpec{},
- },
- },
- }
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- strategy := optimizer.DetermineStrategy(tt.aggregations)
- assert.Equal(t, tt.expected.CanUseFastPath, strategy.CanUseFastPath)
- assert.Equal(t, tt.expected.Reason, strategy.Reason)
- if !tt.expected.CanUseFastPath {
- assert.NotEmpty(t, strategy.UnsupportedSpecs)
- }
- })
- }
- }
- // Test AggregationComputer
- func TestAggregationComputer_ComputeFastPathAggregations(t *testing.T) {
- engine := NewMockSQLEngine()
- computer := NewAggregationComputer(engine.SQLEngine)
- dataSources := &TopicDataSources{
- ParquetFiles: map[string][]*ParquetFileStats{
- "/topics/test/topic1/partition-1": {
- {
- RowCount: 30,
- ColumnStats: map[string]*ParquetColumnStats{
- "id": createMockColumnStats("id", int64(10), int64(40)),
- },
- },
- },
- },
- ParquetRowCount: 30,
- LiveLogRowCount: 25,
- PartitionsCount: 1,
- }
- partitions := []string{"/topics/test/topic1/partition-1"}
- tests := []struct {
- name string
- aggregations []AggregationSpec
- validate func(t *testing.T, results []AggregationResult)
- }{
- {
- name: "COUNT aggregation",
- aggregations: []AggregationSpec{
- {Function: FuncCOUNT, Column: "*"},
- },
- validate: func(t *testing.T, results []AggregationResult) {
- assert.Len(t, results, 1)
- assert.Equal(t, int64(55), results[0].Count) // 30 + 25
- },
- },
- {
- name: "MAX aggregation",
- aggregations: []AggregationSpec{
- {Function: FuncMAX, Column: "id"},
- },
- validate: func(t *testing.T, results []AggregationResult) {
- assert.Len(t, results, 1)
- // Should be max of parquet stats (40) - mock doesn't combine with live log
- assert.Equal(t, int64(40), results[0].Max)
- },
- },
- {
- name: "MIN aggregation",
- aggregations: []AggregationSpec{
- {Function: FuncMIN, Column: "id"},
- },
- validate: func(t *testing.T, results []AggregationResult) {
- assert.Len(t, results, 1)
- // Should be min of parquet stats (10) - mock doesn't combine with live log
- assert.Equal(t, int64(10), results[0].Min)
- },
- },
- }
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- ctx := context.Background()
- results, err := computer.ComputeFastPathAggregations(ctx, tt.aggregations, dataSources, partitions)
- assert.NoError(t, err)
- tt.validate(t, results)
- })
- }
- }
- // Test case-insensitive column lookup and null handling for MIN/MAX aggregations
- func TestAggregationComputer_MinMaxEdgeCases(t *testing.T) {
- engine := NewMockSQLEngine()
- computer := NewAggregationComputer(engine.SQLEngine)
- tests := []struct {
- name string
- dataSources *TopicDataSources
- aggregations []AggregationSpec
- validate func(t *testing.T, results []AggregationResult, err error)
- }{
- {
- name: "Case insensitive column lookup",
- dataSources: &TopicDataSources{
- ParquetFiles: map[string][]*ParquetFileStats{
- "/topics/test/partition-1": {
- {
- RowCount: 50,
- ColumnStats: map[string]*ParquetColumnStats{
- "ID": createMockColumnStats("ID", int64(5), int64(95)), // Uppercase column name
- },
- },
- },
- },
- ParquetRowCount: 50,
- LiveLogRowCount: 0,
- PartitionsCount: 1,
- },
- aggregations: []AggregationSpec{
- {Function: FuncMIN, Column: "id"}, // lowercase column name
- {Function: FuncMAX, Column: "id"},
- },
- validate: func(t *testing.T, results []AggregationResult, err error) {
- assert.NoError(t, err)
- assert.Len(t, results, 2)
- assert.Equal(t, int64(5), results[0].Min, "MIN should work with case-insensitive lookup")
- assert.Equal(t, int64(95), results[1].Max, "MAX should work with case-insensitive lookup")
- },
- },
- {
- name: "Null column stats handling",
- dataSources: &TopicDataSources{
- ParquetFiles: map[string][]*ParquetFileStats{
- "/topics/test/partition-1": {
- {
- RowCount: 50,
- ColumnStats: map[string]*ParquetColumnStats{
- "id": {
- ColumnName: "id",
- MinValue: nil, // Null min value
- MaxValue: nil, // Null max value
- NullCount: 50,
- RowCount: 50,
- },
- },
- },
- },
- },
- ParquetRowCount: 50,
- LiveLogRowCount: 0,
- PartitionsCount: 1,
- },
- aggregations: []AggregationSpec{
- {Function: FuncMIN, Column: "id"},
- {Function: FuncMAX, Column: "id"},
- },
- validate: func(t *testing.T, results []AggregationResult, err error) {
- assert.NoError(t, err)
- assert.Len(t, results, 2)
- // When stats are null, should fall back to system column or return nil
- // This tests that we don't crash on null stats
- },
- },
- {
- name: "Mixed data types - string column",
- dataSources: &TopicDataSources{
- ParquetFiles: map[string][]*ParquetFileStats{
- "/topics/test/partition-1": {
- {
- RowCount: 30,
- ColumnStats: map[string]*ParquetColumnStats{
- "name": createMockColumnStats("name", "Alice", "Zoe"),
- },
- },
- },
- },
- ParquetRowCount: 30,
- LiveLogRowCount: 0,
- PartitionsCount: 1,
- },
- aggregations: []AggregationSpec{
- {Function: FuncMIN, Column: "name"},
- {Function: FuncMAX, Column: "name"},
- },
- validate: func(t *testing.T, results []AggregationResult, err error) {
- assert.NoError(t, err)
- assert.Len(t, results, 2)
- assert.Equal(t, "Alice", results[0].Min)
- assert.Equal(t, "Zoe", results[1].Max)
- },
- },
- {
- name: "Mixed data types - float column",
- dataSources: &TopicDataSources{
- ParquetFiles: map[string][]*ParquetFileStats{
- "/topics/test/partition-1": {
- {
- RowCount: 25,
- ColumnStats: map[string]*ParquetColumnStats{
- "price": createMockColumnStats("price", float64(19.99), float64(299.50)),
- },
- },
- },
- },
- ParquetRowCount: 25,
- LiveLogRowCount: 0,
- PartitionsCount: 1,
- },
- aggregations: []AggregationSpec{
- {Function: FuncMIN, Column: "price"},
- {Function: FuncMAX, Column: "price"},
- },
- validate: func(t *testing.T, results []AggregationResult, err error) {
- assert.NoError(t, err)
- assert.Len(t, results, 2)
- assert.Equal(t, float64(19.99), results[0].Min)
- assert.Equal(t, float64(299.50), results[1].Max)
- },
- },
- {
- name: "Column not found in parquet stats",
- dataSources: &TopicDataSources{
- ParquetFiles: map[string][]*ParquetFileStats{
- "/topics/test/partition-1": {
- {
- RowCount: 20,
- ColumnStats: map[string]*ParquetColumnStats{
- "id": createMockColumnStats("id", int64(1), int64(100)),
- // Note: "nonexistent_column" is not in stats
- },
- },
- },
- },
- ParquetRowCount: 20,
- LiveLogRowCount: 10, // Has live logs to fall back to
- PartitionsCount: 1,
- },
- aggregations: []AggregationSpec{
- {Function: FuncMIN, Column: "nonexistent_column"},
- {Function: FuncMAX, Column: "nonexistent_column"},
- },
- validate: func(t *testing.T, results []AggregationResult, err error) {
- assert.NoError(t, err)
- assert.Len(t, results, 2)
- // Should fall back to live log processing or return nil
- // The key is that it shouldn't crash
- },
- },
- {
- name: "Multiple parquet files with different ranges",
- dataSources: &TopicDataSources{
- ParquetFiles: map[string][]*ParquetFileStats{
- "/topics/test/partition-1": {
- {
- RowCount: 30,
- ColumnStats: map[string]*ParquetColumnStats{
- "score": createMockColumnStats("score", int64(10), int64(50)),
- },
- },
- {
- RowCount: 40,
- ColumnStats: map[string]*ParquetColumnStats{
- "score": createMockColumnStats("score", int64(5), int64(75)), // Lower min, higher max
- },
- },
- },
- },
- ParquetRowCount: 70,
- LiveLogRowCount: 0,
- PartitionsCount: 1,
- },
- aggregations: []AggregationSpec{
- {Function: FuncMIN, Column: "score"},
- {Function: FuncMAX, Column: "score"},
- },
- validate: func(t *testing.T, results []AggregationResult, err error) {
- assert.NoError(t, err)
- assert.Len(t, results, 2)
- assert.Equal(t, int64(5), results[0].Min, "Should find global minimum across all files")
- assert.Equal(t, int64(75), results[1].Max, "Should find global maximum across all files")
- },
- },
- }
- partitions := []string{"/topics/test/partition-1"}
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- ctx := context.Background()
- results, err := computer.ComputeFastPathAggregations(ctx, tt.aggregations, tt.dataSources, partitions)
- tt.validate(t, results, err)
- })
- }
- }
- // Test the specific bug where MIN/MAX was returning empty values
- func TestAggregationComputer_MinMaxEmptyValuesBugFix(t *testing.T) {
- engine := NewMockSQLEngine()
- computer := NewAggregationComputer(engine.SQLEngine)
- // This test specifically addresses the bug where MIN/MAX returned empty
- // due to improper null checking and extraction logic
- dataSources := &TopicDataSources{
- ParquetFiles: map[string][]*ParquetFileStats{
- "/topics/test/test-topic/partition1": {
- {
- RowCount: 100,
- ColumnStats: map[string]*ParquetColumnStats{
- "id": {
- ColumnName: "id",
- MinValue: &schema_pb.Value{Kind: &schema_pb.Value_Int32Value{Int32Value: 0}}, // Min should be 0
- MaxValue: &schema_pb.Value{Kind: &schema_pb.Value_Int32Value{Int32Value: 99}}, // Max should be 99
- NullCount: 0,
- RowCount: 100,
- },
- },
- },
- },
- },
- ParquetRowCount: 100,
- LiveLogRowCount: 0, // No live logs, pure parquet stats
- PartitionsCount: 1,
- }
- partitions := []string{"/topics/test/test-topic/partition1"}
- tests := []struct {
- name string
- aggregSpec AggregationSpec
- expected interface{}
- }{
- {
- name: "MIN should return 0 not empty",
- aggregSpec: AggregationSpec{Function: FuncMIN, Column: "id"},
- expected: int32(0), // Should extract the actual minimum value
- },
- {
- name: "MAX should return 99 not empty",
- aggregSpec: AggregationSpec{Function: FuncMAX, Column: "id"},
- expected: int32(99), // Should extract the actual maximum value
- },
- }
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- ctx := context.Background()
- results, err := computer.ComputeFastPathAggregations(ctx, []AggregationSpec{tt.aggregSpec}, dataSources, partitions)
- assert.NoError(t, err)
- assert.Len(t, results, 1)
- // Verify the result is not nil/empty
- if tt.aggregSpec.Function == FuncMIN {
- assert.NotNil(t, results[0].Min, "MIN result should not be nil")
- assert.Equal(t, tt.expected, results[0].Min)
- } else if tt.aggregSpec.Function == FuncMAX {
- assert.NotNil(t, results[0].Max, "MAX result should not be nil")
- assert.Equal(t, tt.expected, results[0].Max)
- }
- })
- }
- }
- // Test the formatAggregationResult function with MIN/MAX edge cases
- func TestSQLEngine_FormatAggregationResult_MinMax(t *testing.T) {
- engine := NewTestSQLEngine()
- tests := []struct {
- name string
- spec AggregationSpec
- result AggregationResult
- expected string
- }{
- {
- name: "MIN with zero value should not be empty",
- spec: AggregationSpec{Function: FuncMIN, Column: "id"},
- result: AggregationResult{Min: int32(0)},
- expected: "0",
- },
- {
- name: "MAX with large value",
- spec: AggregationSpec{Function: FuncMAX, Column: "id"},
- result: AggregationResult{Max: int32(99)},
- expected: "99",
- },
- {
- name: "MIN with negative value",
- spec: AggregationSpec{Function: FuncMIN, Column: "score"},
- result: AggregationResult{Min: int64(-50)},
- expected: "-50",
- },
- {
- name: "MAX with float value",
- spec: AggregationSpec{Function: FuncMAX, Column: "price"},
- result: AggregationResult{Max: float64(299.99)},
- expected: "299.99",
- },
- {
- name: "MIN with string value",
- spec: AggregationSpec{Function: FuncMIN, Column: "name"},
- result: AggregationResult{Min: "Alice"},
- expected: "Alice",
- },
- {
- name: "MIN with nil should return NULL",
- spec: AggregationSpec{Function: FuncMIN, Column: "missing"},
- result: AggregationResult{Min: nil},
- expected: "", // NULL values display as empty
- },
- }
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- sqlValue := engine.formatAggregationResult(tt.spec, tt.result)
- assert.Equal(t, tt.expected, sqlValue.String())
- })
- }
- }
- // Test the direct formatAggregationResult scenario that was originally broken
- func TestSQLEngine_MinMaxBugFixIntegration(t *testing.T) {
- // This test focuses on the core bug fix without the complexity of table discovery
- // It directly tests the scenario where MIN/MAX returned empty due to the bug
- engine := NewTestSQLEngine()
- // Test the direct formatting path that was failing
- tests := []struct {
- name string
- aggregSpec AggregationSpec
- aggResult AggregationResult
- expectedEmpty bool
- expectedValue string
- }{
- {
- name: "MIN with zero should not be empty (the original bug)",
- aggregSpec: AggregationSpec{Function: FuncMIN, Column: "id", Alias: "MIN(id)"},
- aggResult: AggregationResult{Min: int32(0)}, // This was returning empty before fix
- expectedEmpty: false,
- expectedValue: "0",
- },
- {
- name: "MAX with valid value should not be empty",
- aggregSpec: AggregationSpec{Function: FuncMAX, Column: "id", Alias: "MAX(id)"},
- aggResult: AggregationResult{Max: int32(99)},
- expectedEmpty: false,
- expectedValue: "99",
- },
- {
- name: "MIN with negative value should work",
- aggregSpec: AggregationSpec{Function: FuncMIN, Column: "score", Alias: "MIN(score)"},
- aggResult: AggregationResult{Min: int64(-10)},
- expectedEmpty: false,
- expectedValue: "-10",
- },
- {
- name: "MIN with nil should be empty (expected behavior)",
- aggregSpec: AggregationSpec{Function: FuncMIN, Column: "missing", Alias: "MIN(missing)"},
- aggResult: AggregationResult{Min: nil},
- expectedEmpty: true,
- expectedValue: "",
- },
- }
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- // Test the formatAggregationResult function directly
- sqlValue := engine.formatAggregationResult(tt.aggregSpec, tt.aggResult)
- result := sqlValue.String()
- if tt.expectedEmpty {
- assert.Empty(t, result, "Result should be empty for nil values")
- } else {
- assert.NotEmpty(t, result, "Result should not be empty")
- assert.Equal(t, tt.expectedValue, result)
- }
- })
- }
- }
- // Test the tryFastParquetAggregation method specifically for the bug
- func TestSQLEngine_FastParquetAggregationBugFix(t *testing.T) {
- // This test verifies that the fast path aggregation logic works correctly
- // and doesn't return nil/empty values when it should return actual data
- engine := NewMockSQLEngine()
- computer := NewAggregationComputer(engine.SQLEngine)
- // Create realistic data sources that mimic the user's scenario
- dataSources := &TopicDataSources{
- ParquetFiles: map[string][]*ParquetFileStats{
- "/topics/test/test-topic/v2025-09-01-22-54-02/0000-0630": {
- {
- RowCount: 100,
- ColumnStats: map[string]*ParquetColumnStats{
- "id": {
- ColumnName: "id",
- MinValue: &schema_pb.Value{Kind: &schema_pb.Value_Int32Value{Int32Value: 0}},
- MaxValue: &schema_pb.Value{Kind: &schema_pb.Value_Int32Value{Int32Value: 99}},
- NullCount: 0,
- RowCount: 100,
- },
- },
- },
- },
- },
- ParquetRowCount: 100,
- LiveLogRowCount: 0, // Pure parquet scenario
- PartitionsCount: 1,
- }
- partitions := []string{"/topics/test/test-topic/v2025-09-01-22-54-02/0000-0630"}
- tests := []struct {
- name string
- aggregations []AggregationSpec
- validateResults func(t *testing.T, results []AggregationResult)
- }{
- {
- name: "Single MIN aggregation should return value not nil",
- aggregations: []AggregationSpec{
- {Function: FuncMIN, Column: "id", Alias: "MIN(id)"},
- },
- validateResults: func(t *testing.T, results []AggregationResult) {
- assert.Len(t, results, 1)
- assert.NotNil(t, results[0].Min, "MIN result should not be nil")
- assert.Equal(t, int32(0), results[0].Min, "MIN should return the correct minimum value")
- },
- },
- {
- name: "Single MAX aggregation should return value not nil",
- aggregations: []AggregationSpec{
- {Function: FuncMAX, Column: "id", Alias: "MAX(id)"},
- },
- validateResults: func(t *testing.T, results []AggregationResult) {
- assert.Len(t, results, 1)
- assert.NotNil(t, results[0].Max, "MAX result should not be nil")
- assert.Equal(t, int32(99), results[0].Max, "MAX should return the correct maximum value")
- },
- },
- {
- name: "Combined MIN/MAX should both return values",
- aggregations: []AggregationSpec{
- {Function: FuncMIN, Column: "id", Alias: "MIN(id)"},
- {Function: FuncMAX, Column: "id", Alias: "MAX(id)"},
- },
- validateResults: func(t *testing.T, results []AggregationResult) {
- assert.Len(t, results, 2)
- assert.NotNil(t, results[0].Min, "MIN result should not be nil")
- assert.NotNil(t, results[1].Max, "MAX result should not be nil")
- assert.Equal(t, int32(0), results[0].Min)
- assert.Equal(t, int32(99), results[1].Max)
- },
- },
- }
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- ctx := context.Background()
- results, err := computer.ComputeFastPathAggregations(ctx, tt.aggregations, dataSources, partitions)
- assert.NoError(t, err, "ComputeFastPathAggregations should not error")
- tt.validateResults(t, results)
- })
- }
- }
- // Test ExecutionPlanBuilder
- func TestExecutionPlanBuilder_BuildAggregationPlan(t *testing.T) {
- engine := NewMockSQLEngine()
- builder := NewExecutionPlanBuilder(engine.SQLEngine)
- // Parse a simple SELECT statement using the native parser
- stmt, err := ParseSQL("SELECT COUNT(*) FROM test_topic")
- assert.NoError(t, err)
- selectStmt := stmt.(*SelectStatement)
- aggregations := []AggregationSpec{
- {Function: FuncCOUNT, Column: "*"},
- }
- strategy := AggregationStrategy{
- CanUseFastPath: true,
- Reason: "all_aggregations_supported",
- }
- dataSources := &TopicDataSources{
- ParquetRowCount: 100,
- LiveLogRowCount: 50,
- PartitionsCount: 3,
- ParquetFiles: map[string][]*ParquetFileStats{
- "partition-1": {{RowCount: 50}},
- "partition-2": {{RowCount: 50}},
- },
- }
- plan := builder.BuildAggregationPlan(selectStmt, aggregations, strategy, dataSources)
- assert.Equal(t, "SELECT", plan.QueryType)
- assert.Equal(t, "hybrid_fast_path", plan.ExecutionStrategy)
- assert.Contains(t, plan.DataSources, "parquet_stats")
- assert.Contains(t, plan.DataSources, "live_logs")
- assert.Equal(t, 3, plan.PartitionsScanned)
- assert.Equal(t, 2, plan.ParquetFilesScanned)
- assert.Contains(t, plan.OptimizationsUsed, "parquet_statistics")
- assert.Equal(t, []string{"COUNT(*)"}, plan.Aggregations)
- assert.Equal(t, int64(50), plan.TotalRowsProcessed) // Only live logs scanned
- }
- // Test Error Types
- func TestErrorTypes(t *testing.T) {
- t.Run("AggregationError", func(t *testing.T) {
- err := AggregationError{
- Operation: "MAX",
- Column: "id",
- Cause: errors.New("column not found"),
- }
- expected := "aggregation error in MAX(id): column not found"
- assert.Equal(t, expected, err.Error())
- })
- t.Run("DataSourceError", func(t *testing.T) {
- err := DataSourceError{
- Source: "partition_discovery:test.topic1",
- Cause: errors.New("network timeout"),
- }
- expected := "data source error in partition_discovery:test.topic1: network timeout"
- assert.Equal(t, expected, err.Error())
- })
- t.Run("OptimizationError", func(t *testing.T) {
- err := OptimizationError{
- Strategy: "fast_path_aggregation",
- Reason: "unsupported function: AVG",
- }
- expected := "optimization failed for fast_path_aggregation: unsupported function: AVG"
- assert.Equal(t, expected, err.Error())
- })
- }
- // Integration Tests
- func TestIntegration_FastPathOptimization(t *testing.T) {
- engine := NewMockSQLEngine()
- // Setup components
- optimizer := NewFastPathOptimizer(engine.SQLEngine)
- computer := NewAggregationComputer(engine.SQLEngine)
- // Mock data setup
- aggregations := []AggregationSpec{
- {Function: FuncCOUNT, Column: "*"},
- {Function: FuncMAX, Column: "id"},
- }
- // Step 1: Determine strategy
- strategy := optimizer.DetermineStrategy(aggregations)
- assert.True(t, strategy.CanUseFastPath)
- // Step 2: Mock data sources
- dataSources := &TopicDataSources{
- ParquetFiles: map[string][]*ParquetFileStats{
- "/topics/test/topic1/partition-1": {{
- RowCount: 75,
- ColumnStats: map[string]*ParquetColumnStats{
- "id": createMockColumnStats("id", int64(1), int64(100)),
- },
- }},
- },
- ParquetRowCount: 75,
- LiveLogRowCount: 25,
- PartitionsCount: 1,
- }
- partitions := []string{"/topics/test/topic1/partition-1"}
- // Step 3: Compute aggregations
- ctx := context.Background()
- results, err := computer.ComputeFastPathAggregations(ctx, aggregations, dataSources, partitions)
- assert.NoError(t, err)
- assert.Len(t, results, 2)
- assert.Equal(t, int64(100), results[0].Count) // 75 + 25
- assert.Equal(t, int64(100), results[1].Max) // From parquet stats mock
- }
- func TestIntegration_FallbackToFullScan(t *testing.T) {
- engine := NewMockSQLEngine()
- optimizer := NewFastPathOptimizer(engine.SQLEngine)
- // Unsupported aggregations
- aggregations := []AggregationSpec{
- {Function: "AVG", Column: "value"}, // Not supported
- }
- // Step 1: Strategy should reject fast path
- strategy := optimizer.DetermineStrategy(aggregations)
- assert.False(t, strategy.CanUseFastPath)
- assert.Equal(t, "unsupported_aggregation_functions", strategy.Reason)
- assert.NotEmpty(t, strategy.UnsupportedSpecs)
- }
- // Benchmark Tests
- func BenchmarkFastPathOptimizer_DetermineStrategy(b *testing.B) {
- engine := NewMockSQLEngine()
- optimizer := NewFastPathOptimizer(engine.SQLEngine)
- aggregations := []AggregationSpec{
- {Function: FuncCOUNT, Column: "*"},
- {Function: FuncMAX, Column: "id"},
- {Function: "MIN", Column: "value"},
- }
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- strategy := optimizer.DetermineStrategy(aggregations)
- _ = strategy.CanUseFastPath
- }
- }
- func BenchmarkAggregationComputer_ComputeFastPathAggregations(b *testing.B) {
- engine := NewMockSQLEngine()
- computer := NewAggregationComputer(engine.SQLEngine)
- dataSources := &TopicDataSources{
- ParquetFiles: map[string][]*ParquetFileStats{
- "partition-1": {{
- RowCount: 1000,
- ColumnStats: map[string]*ParquetColumnStats{
- "id": createMockColumnStats("id", int64(1), int64(1000)),
- },
- }},
- },
- ParquetRowCount: 1000,
- LiveLogRowCount: 100,
- }
- aggregations := []AggregationSpec{
- {Function: FuncCOUNT, Column: "*"},
- {Function: FuncMAX, Column: "id"},
- }
- partitions := []string{"partition-1"}
- ctx := context.Background()
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- results, err := computer.ComputeFastPathAggregations(ctx, aggregations, dataSources, partitions)
- if err != nil {
- b.Fatal(err)
- }
- _ = results
- }
- }
- // Tests for convertLogEntryToRecordValue - Protocol Buffer parsing bug fix
- func TestSQLEngine_ConvertLogEntryToRecordValue_ValidProtobuf(t *testing.T) {
- engine := NewTestSQLEngine()
- // Create a valid RecordValue protobuf with user data
- originalRecord := &schema_pb.RecordValue{
- Fields: map[string]*schema_pb.Value{
- "id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 42}},
- "name": {Kind: &schema_pb.Value_StringValue{StringValue: "test-user"}},
- "score": {Kind: &schema_pb.Value_DoubleValue{DoubleValue: 95.5}},
- },
- }
- // Serialize the protobuf (this is what MQ actually stores)
- protobufData, err := proto.Marshal(originalRecord)
- assert.NoError(t, err)
- // Create a LogEntry with the serialized data
- logEntry := &filer_pb.LogEntry{
- TsNs: 1609459200000000000, // 2021-01-01 00:00:00 UTC
- PartitionKeyHash: 123,
- Data: protobufData, // Protocol buffer data (not JSON!)
- Key: []byte("test-key-001"),
- }
- // Test the conversion
- result, source, err := engine.convertLogEntryToRecordValue(logEntry)
- // Verify no error
- assert.NoError(t, err)
- assert.Equal(t, "live_log", source)
- assert.NotNil(t, result)
- assert.NotNil(t, result.Fields)
- // Verify system columns are added correctly
- assert.Contains(t, result.Fields, SW_COLUMN_NAME_TIMESTAMP)
- assert.Contains(t, result.Fields, SW_COLUMN_NAME_KEY)
- assert.Equal(t, int64(1609459200000000000), result.Fields[SW_COLUMN_NAME_TIMESTAMP].GetInt64Value())
- assert.Equal(t, []byte("test-key-001"), result.Fields[SW_COLUMN_NAME_KEY].GetBytesValue())
- // Verify user data is preserved
- assert.Contains(t, result.Fields, "id")
- assert.Contains(t, result.Fields, "name")
- assert.Contains(t, result.Fields, "score")
- assert.Equal(t, int32(42), result.Fields["id"].GetInt32Value())
- assert.Equal(t, "test-user", result.Fields["name"].GetStringValue())
- assert.Equal(t, 95.5, result.Fields["score"].GetDoubleValue())
- }
- func TestSQLEngine_ConvertLogEntryToRecordValue_InvalidProtobuf(t *testing.T) {
- engine := NewTestSQLEngine()
- // Create LogEntry with invalid protobuf data (this would cause the original JSON parsing bug)
- logEntry := &filer_pb.LogEntry{
- TsNs: 1609459200000000000,
- PartitionKeyHash: 123,
- Data: []byte{0x17, 0x00, 0xFF, 0xFE}, // Invalid protobuf data (starts with \x17 like in the original error)
- Key: []byte("test-key"),
- }
- // Test the conversion
- result, source, err := engine.convertLogEntryToRecordValue(logEntry)
- // Should return error for invalid protobuf
- assert.Error(t, err)
- assert.Contains(t, err.Error(), "failed to unmarshal log entry protobuf")
- assert.Nil(t, result)
- assert.Empty(t, source)
- }
- func TestSQLEngine_ConvertLogEntryToRecordValue_EmptyProtobuf(t *testing.T) {
- engine := NewTestSQLEngine()
- // Create a minimal valid RecordValue (empty fields)
- emptyRecord := &schema_pb.RecordValue{
- Fields: map[string]*schema_pb.Value{},
- }
- protobufData, err := proto.Marshal(emptyRecord)
- assert.NoError(t, err)
- logEntry := &filer_pb.LogEntry{
- TsNs: 1609459200000000000,
- PartitionKeyHash: 456,
- Data: protobufData,
- Key: []byte("empty-key"),
- }
- // Test the conversion
- result, source, err := engine.convertLogEntryToRecordValue(logEntry)
- // Should succeed and add system columns
- assert.NoError(t, err)
- assert.Equal(t, "live_log", source)
- assert.NotNil(t, result)
- assert.NotNil(t, result.Fields)
- // Should have system columns
- assert.Contains(t, result.Fields, SW_COLUMN_NAME_TIMESTAMP)
- assert.Contains(t, result.Fields, SW_COLUMN_NAME_KEY)
- assert.Equal(t, int64(1609459200000000000), result.Fields[SW_COLUMN_NAME_TIMESTAMP].GetInt64Value())
- assert.Equal(t, []byte("empty-key"), result.Fields[SW_COLUMN_NAME_KEY].GetBytesValue())
- // Should have no user fields
- userFieldCount := 0
- for fieldName := range result.Fields {
- if fieldName != SW_COLUMN_NAME_TIMESTAMP && fieldName != SW_COLUMN_NAME_KEY {
- userFieldCount++
- }
- }
- assert.Equal(t, 0, userFieldCount)
- }
- func TestSQLEngine_ConvertLogEntryToRecordValue_NilFieldsMap(t *testing.T) {
- engine := NewTestSQLEngine()
- // Create RecordValue with nil Fields map (edge case)
- recordWithNilFields := &schema_pb.RecordValue{
- Fields: nil, // This should be handled gracefully
- }
- protobufData, err := proto.Marshal(recordWithNilFields)
- assert.NoError(t, err)
- logEntry := &filer_pb.LogEntry{
- TsNs: 1609459200000000000,
- PartitionKeyHash: 789,
- Data: protobufData,
- Key: []byte("nil-fields-key"),
- }
- // Test the conversion
- result, source, err := engine.convertLogEntryToRecordValue(logEntry)
- // Should succeed and create Fields map
- assert.NoError(t, err)
- assert.Equal(t, "live_log", source)
- assert.NotNil(t, result)
- assert.NotNil(t, result.Fields) // Should be created by the function
- // Should have system columns
- assert.Contains(t, result.Fields, SW_COLUMN_NAME_TIMESTAMP)
- assert.Contains(t, result.Fields, SW_COLUMN_NAME_KEY)
- assert.Equal(t, int64(1609459200000000000), result.Fields[SW_COLUMN_NAME_TIMESTAMP].GetInt64Value())
- assert.Equal(t, []byte("nil-fields-key"), result.Fields[SW_COLUMN_NAME_KEY].GetBytesValue())
- }
- func TestSQLEngine_ConvertLogEntryToRecordValue_SystemColumnOverride(t *testing.T) {
- engine := NewTestSQLEngine()
- // Create RecordValue that already has system column names (should be overridden)
- recordWithSystemCols := &schema_pb.RecordValue{
- Fields: map[string]*schema_pb.Value{
- "user_field": {Kind: &schema_pb.Value_StringValue{StringValue: "user-data"}},
- SW_COLUMN_NAME_TIMESTAMP: {Kind: &schema_pb.Value_Int64Value{Int64Value: 999999999}}, // Should be overridden
- SW_COLUMN_NAME_KEY: {Kind: &schema_pb.Value_StringValue{StringValue: "old-key"}}, // Should be overridden
- },
- }
- protobufData, err := proto.Marshal(recordWithSystemCols)
- assert.NoError(t, err)
- logEntry := &filer_pb.LogEntry{
- TsNs: 1609459200000000000,
- PartitionKeyHash: 100,
- Data: protobufData,
- Key: []byte("actual-key"),
- }
- // Test the conversion
- result, source, err := engine.convertLogEntryToRecordValue(logEntry)
- // Should succeed
- assert.NoError(t, err)
- assert.Equal(t, "live_log", source)
- assert.NotNil(t, result)
- // System columns should use LogEntry values, not protobuf values
- assert.Equal(t, int64(1609459200000000000), result.Fields[SW_COLUMN_NAME_TIMESTAMP].GetInt64Value())
- assert.Equal(t, []byte("actual-key"), result.Fields[SW_COLUMN_NAME_KEY].GetBytesValue())
- // User field should be preserved
- assert.Contains(t, result.Fields, "user_field")
- assert.Equal(t, "user-data", result.Fields["user_field"].GetStringValue())
- }
- func TestSQLEngine_ConvertLogEntryToRecordValue_ComplexDataTypes(t *testing.T) {
- engine := NewTestSQLEngine()
- // Test with various data types
- complexRecord := &schema_pb.RecordValue{
- Fields: map[string]*schema_pb.Value{
- "int32_field": {Kind: &schema_pb.Value_Int32Value{Int32Value: -42}},
- "int64_field": {Kind: &schema_pb.Value_Int64Value{Int64Value: 9223372036854775807}},
- "float_field": {Kind: &schema_pb.Value_FloatValue{FloatValue: 3.14159}},
- "double_field": {Kind: &schema_pb.Value_DoubleValue{DoubleValue: 2.718281828}},
- "bool_field": {Kind: &schema_pb.Value_BoolValue{BoolValue: true}},
- "string_field": {Kind: &schema_pb.Value_StringValue{StringValue: "test string with unicode 🎉"}},
- "bytes_field": {Kind: &schema_pb.Value_BytesValue{BytesValue: []byte{0x01, 0x02, 0x03}}},
- },
- }
- protobufData, err := proto.Marshal(complexRecord)
- assert.NoError(t, err)
- logEntry := &filer_pb.LogEntry{
- TsNs: 1609459200000000000,
- PartitionKeyHash: 200,
- Data: protobufData,
- Key: []byte("complex-key"),
- }
- // Test the conversion
- result, source, err := engine.convertLogEntryToRecordValue(logEntry)
- // Should succeed
- assert.NoError(t, err)
- assert.Equal(t, "live_log", source)
- assert.NotNil(t, result)
- // Verify all data types are preserved
- assert.Equal(t, int32(-42), result.Fields["int32_field"].GetInt32Value())
- assert.Equal(t, int64(9223372036854775807), result.Fields["int64_field"].GetInt64Value())
- assert.Equal(t, float32(3.14159), result.Fields["float_field"].GetFloatValue())
- assert.Equal(t, 2.718281828, result.Fields["double_field"].GetDoubleValue())
- assert.Equal(t, true, result.Fields["bool_field"].GetBoolValue())
- assert.Equal(t, "test string with unicode 🎉", result.Fields["string_field"].GetStringValue())
- assert.Equal(t, []byte{0x01, 0x02, 0x03}, result.Fields["bytes_field"].GetBytesValue())
- // System columns should still be present
- assert.Contains(t, result.Fields, SW_COLUMN_NAME_TIMESTAMP)
- assert.Contains(t, result.Fields, SW_COLUMN_NAME_KEY)
- }
- // Tests for log buffer deduplication functionality
- func TestSQLEngine_GetLogBufferStartFromFile_BinaryFormat(t *testing.T) {
- engine := NewTestSQLEngine()
- // Create sample buffer start (binary format)
- bufferStartBytes := make([]byte, 8)
- binary.BigEndian.PutUint64(bufferStartBytes, uint64(1609459100000000001))
- // Create file entry with buffer start + some chunks
- entry := &filer_pb.Entry{
- Name: "test-log-file",
- Extended: map[string][]byte{
- "buffer_start": bufferStartBytes,
- },
- Chunks: []*filer_pb.FileChunk{
- {FileId: "chunk1", Offset: 0, Size: 1000},
- {FileId: "chunk2", Offset: 1000, Size: 1000},
- {FileId: "chunk3", Offset: 2000, Size: 1000},
- },
- }
- // Test extraction
- result, err := engine.getLogBufferStartFromFile(entry)
- assert.NoError(t, err)
- assert.NotNil(t, result)
- assert.Equal(t, int64(1609459100000000001), result.StartIndex)
- // Test extraction works correctly with the binary format
- }
- func TestSQLEngine_GetLogBufferStartFromFile_NoMetadata(t *testing.T) {
- engine := NewTestSQLEngine()
- // Create file entry without buffer start
- entry := &filer_pb.Entry{
- Name: "test-log-file",
- Extended: nil,
- }
- // Test extraction
- result, err := engine.getLogBufferStartFromFile(entry)
- assert.NoError(t, err)
- assert.Nil(t, result)
- }
- func TestSQLEngine_GetLogBufferStartFromFile_InvalidData(t *testing.T) {
- engine := NewTestSQLEngine()
- // Create file entry with invalid buffer start (wrong size)
- entry := &filer_pb.Entry{
- Name: "test-log-file",
- Extended: map[string][]byte{
- "buffer_start": []byte("invalid-binary"),
- },
- }
- // Test extraction
- result, err := engine.getLogBufferStartFromFile(entry)
- assert.Error(t, err)
- assert.Contains(t, err.Error(), "invalid buffer_start format: expected 8 bytes")
- assert.Nil(t, result)
- }
- func TestSQLEngine_BuildLogBufferDeduplicationMap_NoBrokerClient(t *testing.T) {
- engine := NewTestSQLEngine()
- engine.catalog.brokerClient = nil // Simulate no broker client
- ctx := context.Background()
- result, err := engine.buildLogBufferDeduplicationMap(ctx, "/topics/test/test-topic")
- assert.NoError(t, err)
- assert.NotNil(t, result)
- assert.Empty(t, result)
- }
- func TestSQLEngine_LogBufferDeduplication_ServerRestartScenario(t *testing.T) {
- // Simulate scenario: Buffer indexes are now initialized with process start time
- // This tests that buffer start indexes are globally unique across server restarts
- // Before server restart: Process 1 buffer start (3 chunks)
- beforeRestartStart := LogBufferStart{
- StartIndex: 1609459100000000000, // Process 1 start time
- }
- // After server restart: Process 2 buffer start (3 chunks)
- afterRestartStart := LogBufferStart{
- StartIndex: 1609459300000000000, // Process 2 start time (DIFFERENT)
- }
- // Simulate 3 chunks for each file
- chunkCount := int64(3)
- // Calculate end indexes for range comparison
- beforeEnd := beforeRestartStart.StartIndex + chunkCount - 1 // [start, start+2]
- afterStart := afterRestartStart.StartIndex // [start, start+2]
- // Test range overlap detection (should NOT overlap)
- overlaps := beforeRestartStart.StartIndex <= (afterStart+chunkCount-1) && beforeEnd >= afterStart
- assert.False(t, overlaps, "Buffer ranges after restart should not overlap")
- // Verify the start indexes are globally unique
- assert.NotEqual(t, beforeRestartStart.StartIndex, afterRestartStart.StartIndex, "Start indexes should be different")
- assert.Less(t, beforeEnd, afterStart, "Ranges should be completely separate")
- // Expected values:
- // Before restart: [1609459100000000000, 1609459100000000002]
- // After restart: [1609459300000000000, 1609459300000000002]
- expectedBeforeEnd := int64(1609459100000000002)
- expectedAfterStart := int64(1609459300000000000)
- assert.Equal(t, expectedBeforeEnd, beforeEnd)
- assert.Equal(t, expectedAfterStart, afterStart)
- // This demonstrates that buffer start indexes initialized with process start time
- // prevent false positive duplicates across server restarts
- }
- func TestBrokerClient_BinaryBufferStartFormat(t *testing.T) {
- // Test scenario: getBufferStartFromEntry should only support binary format
- // This tests the standardized binary format for buffer_start metadata
- realBrokerClient := &BrokerClient{}
- // Test binary format (used by both log files and Parquet files)
- binaryEntry := &filer_pb.Entry{
- Name: "2025-01-07-14-30-45",
- IsDirectory: false,
- Extended: map[string][]byte{
- "buffer_start": func() []byte {
- // Binary format: 8-byte BigEndian
- buf := make([]byte, 8)
- binary.BigEndian.PutUint64(buf, uint64(2000001))
- return buf
- }(),
- },
- }
- bufferStart := realBrokerClient.getBufferStartFromEntry(binaryEntry)
- assert.NotNil(t, bufferStart)
- assert.Equal(t, int64(2000001), bufferStart.StartIndex, "Should parse binary buffer_start metadata")
- // Test Parquet file (same binary format)
- parquetEntry := &filer_pb.Entry{
- Name: "2025-01-07-14-30.parquet",
- IsDirectory: false,
- Extended: map[string][]byte{
- "buffer_start": func() []byte {
- buf := make([]byte, 8)
- binary.BigEndian.PutUint64(buf, uint64(1500001))
- return buf
- }(),
- },
- }
- bufferStart = realBrokerClient.getBufferStartFromEntry(parquetEntry)
- assert.NotNil(t, bufferStart)
- assert.Equal(t, int64(1500001), bufferStart.StartIndex, "Should parse binary buffer_start from Parquet file")
- // Test missing metadata
- emptyEntry := &filer_pb.Entry{
- Name: "no-metadata",
- IsDirectory: false,
- Extended: nil,
- }
- bufferStart = realBrokerClient.getBufferStartFromEntry(emptyEntry)
- assert.Nil(t, bufferStart, "Should return nil for entry without buffer_start metadata")
- // Test invalid format (wrong size)
- invalidEntry := &filer_pb.Entry{
- Name: "invalid-metadata",
- IsDirectory: false,
- Extended: map[string][]byte{
- "buffer_start": []byte("invalid"),
- },
- }
- bufferStart = realBrokerClient.getBufferStartFromEntry(invalidEntry)
- assert.Nil(t, bufferStart, "Should return nil for invalid buffer_start metadata")
- }
- // TestGetSQLValAlias tests the getSQLValAlias function, particularly for SQL injection prevention
- func TestGetSQLValAlias(t *testing.T) {
- engine := &SQLEngine{}
- tests := []struct {
- name string
- sqlVal *SQLVal
- expected string
- desc string
- }{
- {
- name: "simple string",
- sqlVal: &SQLVal{
- Type: StrVal,
- Val: []byte("hello"),
- },
- expected: "'hello'",
- desc: "Simple string should be wrapped in single quotes",
- },
- {
- name: "string with single quote",
- sqlVal: &SQLVal{
- Type: StrVal,
- Val: []byte("don't"),
- },
- expected: "'don''t'",
- desc: "String with single quote should have the quote escaped by doubling it",
- },
- {
- name: "string with multiple single quotes",
- sqlVal: &SQLVal{
- Type: StrVal,
- Val: []byte("'malicious'; DROP TABLE users; --"),
- },
- expected: "'''malicious''; DROP TABLE users; --'",
- desc: "String with SQL injection attempt should have all single quotes properly escaped",
- },
- {
- name: "empty string",
- sqlVal: &SQLVal{
- Type: StrVal,
- Val: []byte(""),
- },
- expected: "''",
- desc: "Empty string should result in empty quoted string",
- },
- {
- name: "integer value",
- sqlVal: &SQLVal{
- Type: IntVal,
- Val: []byte("123"),
- },
- expected: "123",
- desc: "Integer value should not be quoted",
- },
- {
- name: "float value",
- sqlVal: &SQLVal{
- Type: FloatVal,
- Val: []byte("123.45"),
- },
- expected: "123.45",
- desc: "Float value should not be quoted",
- },
- }
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- result := engine.getSQLValAlias(tt.sqlVal)
- assert.Equal(t, tt.expected, result, tt.desc)
- })
- }
- }
|