engine.go 186 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818
  1. package engine
  2. import (
  3. "context"
  4. "encoding/binary"
  5. "encoding/json"
  6. "fmt"
  7. "io"
  8. "math"
  9. "math/big"
  10. "regexp"
  11. "strconv"
  12. "strings"
  13. "time"
  14. "github.com/seaweedfs/seaweedfs/weed/filer"
  15. "github.com/seaweedfs/seaweedfs/weed/mq/schema"
  16. "github.com/seaweedfs/seaweedfs/weed/mq/topic"
  17. "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
  18. "github.com/seaweedfs/seaweedfs/weed/pb/mq_pb"
  19. "github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
  20. "github.com/seaweedfs/seaweedfs/weed/query/sqltypes"
  21. "github.com/seaweedfs/seaweedfs/weed/util"
  22. util_http "github.com/seaweedfs/seaweedfs/weed/util/http"
  23. "google.golang.org/protobuf/proto"
  24. )
  25. // SQL Function Name Constants
  26. const (
  27. // Aggregation Functions
  28. FuncCOUNT = "COUNT"
  29. FuncSUM = "SUM"
  30. FuncAVG = "AVG"
  31. FuncMIN = "MIN"
  32. FuncMAX = "MAX"
  33. // String Functions
  34. FuncUPPER = "UPPER"
  35. FuncLOWER = "LOWER"
  36. FuncLENGTH = "LENGTH"
  37. FuncTRIM = "TRIM"
  38. FuncBTRIM = "BTRIM" // CockroachDB's internal name for TRIM
  39. FuncLTRIM = "LTRIM"
  40. FuncRTRIM = "RTRIM"
  41. FuncSUBSTRING = "SUBSTRING"
  42. FuncLEFT = "LEFT"
  43. FuncRIGHT = "RIGHT"
  44. FuncCONCAT = "CONCAT"
  45. // DateTime Functions
  46. FuncCURRENT_DATE = "CURRENT_DATE"
  47. FuncCURRENT_TIME = "CURRENT_TIME"
  48. FuncCURRENT_TIMESTAMP = "CURRENT_TIMESTAMP"
  49. FuncNOW = "NOW"
  50. FuncEXTRACT = "EXTRACT"
  51. FuncDATE_TRUNC = "DATE_TRUNC"
  52. // PostgreSQL uses EXTRACT(part FROM date) instead of convenience functions like YEAR(), MONTH(), etc.
  53. )
  54. // PostgreSQL-compatible SQL AST types
  55. type Statement interface {
  56. isStatement()
  57. }
  58. type ShowStatement struct {
  59. Type string // "databases", "tables", "columns"
  60. Table string // for SHOW COLUMNS FROM table
  61. Schema string // for database context
  62. OnTable NameRef // for compatibility with existing code that checks OnTable
  63. }
  64. func (s *ShowStatement) isStatement() {}
  65. type UseStatement struct {
  66. Database string // database name to switch to
  67. }
  68. func (u *UseStatement) isStatement() {}
  69. type DDLStatement struct {
  70. Action string // "create", "alter", "drop"
  71. NewName NameRef
  72. TableSpec *TableSpec
  73. }
  74. type NameRef struct {
  75. Name StringGetter
  76. Qualifier StringGetter
  77. }
  78. type StringGetter interface {
  79. String() string
  80. }
  81. type stringValue string
  82. func (s stringValue) String() string { return string(s) }
  83. type TableSpec struct {
  84. Columns []ColumnDef
  85. }
  86. type ColumnDef struct {
  87. Name StringGetter
  88. Type TypeRef
  89. }
  90. type TypeRef struct {
  91. Type string
  92. }
  93. func (d *DDLStatement) isStatement() {}
  94. type SelectStatement struct {
  95. SelectExprs []SelectExpr
  96. From []TableExpr
  97. Where *WhereClause
  98. Limit *LimitClause
  99. WindowFunctions []*WindowFunction
  100. }
  101. type WhereClause struct {
  102. Expr ExprNode
  103. }
  104. type LimitClause struct {
  105. Rowcount ExprNode
  106. Offset ExprNode
  107. }
  108. func (s *SelectStatement) isStatement() {}
  109. // Window function types for time-series analytics
  110. type WindowSpec struct {
  111. PartitionBy []ExprNode
  112. OrderBy []*OrderByClause
  113. }
  114. type WindowFunction struct {
  115. Function string // ROW_NUMBER, RANK, LAG, LEAD
  116. Args []ExprNode // Function arguments
  117. Over *WindowSpec
  118. Alias string // Column alias for the result
  119. }
  120. type OrderByClause struct {
  121. Column string
  122. Order string // ASC or DESC
  123. }
  124. type SelectExpr interface {
  125. isSelectExpr()
  126. }
  127. type StarExpr struct{}
  128. func (s *StarExpr) isSelectExpr() {}
  129. type AliasedExpr struct {
  130. Expr ExprNode
  131. As AliasRef
  132. }
  133. type AliasRef interface {
  134. IsEmpty() bool
  135. String() string
  136. }
  137. type aliasValue string
  138. func (a aliasValue) IsEmpty() bool { return string(a) == "" }
  139. func (a aliasValue) String() string { return string(a) }
  140. func (a *AliasedExpr) isSelectExpr() {}
  141. type TableExpr interface {
  142. isTableExpr()
  143. }
  144. type AliasedTableExpr struct {
  145. Expr interface{}
  146. }
  147. func (a *AliasedTableExpr) isTableExpr() {}
  148. type TableName struct {
  149. Name StringGetter
  150. Qualifier StringGetter
  151. }
  152. type ExprNode interface {
  153. isExprNode()
  154. }
  155. type FuncExpr struct {
  156. Name StringGetter
  157. Exprs []SelectExpr
  158. }
  159. func (f *FuncExpr) isExprNode() {}
  160. type ColName struct {
  161. Name StringGetter
  162. }
  163. func (c *ColName) isExprNode() {}
  164. // ArithmeticExpr represents arithmetic operations like id+user_id and string concatenation like name||suffix
  165. type ArithmeticExpr struct {
  166. Left ExprNode
  167. Right ExprNode
  168. Operator string // +, -, *, /, %, ||
  169. }
  170. func (a *ArithmeticExpr) isExprNode() {}
  171. type ComparisonExpr struct {
  172. Left ExprNode
  173. Right ExprNode
  174. Operator string
  175. }
  176. func (c *ComparisonExpr) isExprNode() {}
  177. type AndExpr struct {
  178. Left ExprNode
  179. Right ExprNode
  180. }
  181. func (a *AndExpr) isExprNode() {}
  182. type OrExpr struct {
  183. Left ExprNode
  184. Right ExprNode
  185. }
  186. func (o *OrExpr) isExprNode() {}
  187. type ParenExpr struct {
  188. Expr ExprNode
  189. }
  190. func (p *ParenExpr) isExprNode() {}
  191. type SQLVal struct {
  192. Type int
  193. Val []byte
  194. }
  195. func (s *SQLVal) isExprNode() {}
  196. type ValTuple []ExprNode
  197. func (v ValTuple) isExprNode() {}
  198. type IntervalExpr struct {
  199. Value string // The interval value (e.g., "1 hour", "30 minutes")
  200. Unit string // The unit (parsed from value)
  201. }
  202. func (i *IntervalExpr) isExprNode() {}
  203. type BetweenExpr struct {
  204. Left ExprNode // The expression to test
  205. From ExprNode // Lower bound (inclusive)
  206. To ExprNode // Upper bound (inclusive)
  207. Not bool // true for NOT BETWEEN
  208. }
  209. func (b *BetweenExpr) isExprNode() {}
  210. type IsNullExpr struct {
  211. Expr ExprNode // The expression to test for null
  212. }
  213. func (i *IsNullExpr) isExprNode() {}
  214. type IsNotNullExpr struct {
  215. Expr ExprNode // The expression to test for not null
  216. }
  217. func (i *IsNotNullExpr) isExprNode() {}
  218. // SQLVal types
  219. const (
  220. IntVal = iota
  221. StrVal
  222. FloatVal
  223. )
  224. // Operator constants
  225. const (
  226. CreateStr = "create"
  227. AlterStr = "alter"
  228. DropStr = "drop"
  229. EqualStr = "="
  230. LessThanStr = "<"
  231. GreaterThanStr = ">"
  232. LessEqualStr = "<="
  233. GreaterEqualStr = ">="
  234. NotEqualStr = "!="
  235. )
  236. // parseIdentifier properly parses a potentially quoted identifier (database/table name)
  237. func parseIdentifier(identifier string) string {
  238. identifier = strings.TrimSpace(identifier)
  239. identifier = strings.TrimSuffix(identifier, ";") // Remove trailing semicolon
  240. // Handle double quotes (PostgreSQL standard)
  241. if len(identifier) >= 2 && identifier[0] == '"' && identifier[len(identifier)-1] == '"' {
  242. return identifier[1 : len(identifier)-1]
  243. }
  244. // Handle backticks (MySQL compatibility)
  245. if len(identifier) >= 2 && identifier[0] == '`' && identifier[len(identifier)-1] == '`' {
  246. return identifier[1 : len(identifier)-1]
  247. }
  248. return identifier
  249. }
  250. // ParseSQL parses PostgreSQL-compatible SQL statements using CockroachDB parser for SELECT queries
  251. func ParseSQL(sql string) (Statement, error) {
  252. sql = strings.TrimSpace(sql)
  253. sqlUpper := strings.ToUpper(sql)
  254. // Handle USE statement
  255. if strings.HasPrefix(sqlUpper, "USE ") {
  256. parts := strings.Fields(sql)
  257. if len(parts) < 2 {
  258. return nil, fmt.Errorf("USE statement requires a database name")
  259. }
  260. // Parse the database name properly, handling quoted identifiers
  261. dbName := parseIdentifier(strings.Join(parts[1:], " "))
  262. return &UseStatement{Database: dbName}, nil
  263. }
  264. // Handle DESCRIBE/DESC statements as aliases for SHOW COLUMNS FROM
  265. if strings.HasPrefix(sqlUpper, "DESCRIBE ") || strings.HasPrefix(sqlUpper, "DESC ") {
  266. parts := strings.Fields(sql)
  267. if len(parts) < 2 {
  268. return nil, fmt.Errorf("DESCRIBE/DESC statement requires a table name")
  269. }
  270. var tableName string
  271. var database string
  272. // Get the raw table name (before parsing identifiers)
  273. var rawTableName string
  274. if len(parts) >= 3 && strings.ToUpper(parts[1]) == "TABLE" {
  275. rawTableName = parts[2]
  276. } else {
  277. rawTableName = parts[1]
  278. }
  279. // Parse database.table format first, then apply parseIdentifier to each part
  280. if strings.Contains(rawTableName, ".") {
  281. // Handle quoted database.table like "db"."table"
  282. if strings.HasPrefix(rawTableName, "\"") || strings.HasPrefix(rawTableName, "`") {
  283. // Find the closing quote and the dot
  284. var quoteChar byte = '"'
  285. if rawTableName[0] == '`' {
  286. quoteChar = '`'
  287. }
  288. // Find the matching closing quote
  289. closingIndex := -1
  290. for i := 1; i < len(rawTableName); i++ {
  291. if rawTableName[i] == quoteChar {
  292. closingIndex = i
  293. break
  294. }
  295. }
  296. if closingIndex != -1 && closingIndex+1 < len(rawTableName) && rawTableName[closingIndex+1] == '.' {
  297. // Valid quoted database name
  298. database = parseIdentifier(rawTableName[:closingIndex+1])
  299. tableName = parseIdentifier(rawTableName[closingIndex+2:])
  300. } else {
  301. // Fall back to simple split then parse
  302. dbTableParts := strings.SplitN(rawTableName, ".", 2)
  303. database = parseIdentifier(dbTableParts[0])
  304. tableName = parseIdentifier(dbTableParts[1])
  305. }
  306. } else {
  307. // Simple case: no quotes, just split then parse
  308. dbTableParts := strings.SplitN(rawTableName, ".", 2)
  309. database = parseIdentifier(dbTableParts[0])
  310. tableName = parseIdentifier(dbTableParts[1])
  311. }
  312. } else {
  313. // No database.table format, just parse the table name
  314. tableName = parseIdentifier(rawTableName)
  315. }
  316. stmt := &ShowStatement{Type: "columns"}
  317. stmt.OnTable.Name = stringValue(tableName)
  318. if database != "" {
  319. stmt.OnTable.Qualifier = stringValue(database)
  320. }
  321. return stmt, nil
  322. }
  323. // Handle SHOW statements (keep custom parsing for these simple cases)
  324. if strings.HasPrefix(sqlUpper, "SHOW DATABASES") || strings.HasPrefix(sqlUpper, "SHOW SCHEMAS") {
  325. return &ShowStatement{Type: "databases"}, nil
  326. }
  327. if strings.HasPrefix(sqlUpper, "SHOW TABLES") {
  328. stmt := &ShowStatement{Type: "tables"}
  329. // Handle "SHOW TABLES FROM database" syntax
  330. if strings.Contains(sqlUpper, "FROM") {
  331. partsUpper := strings.Fields(sqlUpper)
  332. partsOriginal := strings.Fields(sql) // Use original casing
  333. for i, part := range partsUpper {
  334. if part == "FROM" && i+1 < len(partsOriginal) {
  335. // Parse the database name properly
  336. dbName := parseIdentifier(partsOriginal[i+1])
  337. stmt.Schema = dbName // Set the Schema field for the test
  338. stmt.OnTable.Name = stringValue(dbName) // Keep for compatibility
  339. break
  340. }
  341. }
  342. }
  343. return stmt, nil
  344. }
  345. if strings.HasPrefix(sqlUpper, "SHOW COLUMNS FROM") {
  346. // Parse "SHOW COLUMNS FROM table" or "SHOW COLUMNS FROM database.table"
  347. parts := strings.Fields(sql)
  348. if len(parts) < 4 {
  349. return nil, fmt.Errorf("SHOW COLUMNS FROM statement requires a table name")
  350. }
  351. // Get the raw table name (before parsing identifiers)
  352. rawTableName := parts[3]
  353. var tableName string
  354. var database string
  355. // Parse database.table format first, then apply parseIdentifier to each part
  356. if strings.Contains(rawTableName, ".") {
  357. // Handle quoted database.table like "db"."table"
  358. if strings.HasPrefix(rawTableName, "\"") || strings.HasPrefix(rawTableName, "`") {
  359. // Find the closing quote and the dot
  360. var quoteChar byte = '"'
  361. if rawTableName[0] == '`' {
  362. quoteChar = '`'
  363. }
  364. // Find the matching closing quote
  365. closingIndex := -1
  366. for i := 1; i < len(rawTableName); i++ {
  367. if rawTableName[i] == quoteChar {
  368. closingIndex = i
  369. break
  370. }
  371. }
  372. if closingIndex != -1 && closingIndex+1 < len(rawTableName) && rawTableName[closingIndex+1] == '.' {
  373. // Valid quoted database name
  374. database = parseIdentifier(rawTableName[:closingIndex+1])
  375. tableName = parseIdentifier(rawTableName[closingIndex+2:])
  376. } else {
  377. // Fall back to simple split then parse
  378. dbTableParts := strings.SplitN(rawTableName, ".", 2)
  379. database = parseIdentifier(dbTableParts[0])
  380. tableName = parseIdentifier(dbTableParts[1])
  381. }
  382. } else {
  383. // Simple case: no quotes, just split then parse
  384. dbTableParts := strings.SplitN(rawTableName, ".", 2)
  385. database = parseIdentifier(dbTableParts[0])
  386. tableName = parseIdentifier(dbTableParts[1])
  387. }
  388. } else {
  389. // No database.table format, just parse the table name
  390. tableName = parseIdentifier(rawTableName)
  391. }
  392. stmt := &ShowStatement{Type: "columns"}
  393. stmt.OnTable.Name = stringValue(tableName)
  394. if database != "" {
  395. stmt.OnTable.Qualifier = stringValue(database)
  396. }
  397. return stmt, nil
  398. }
  399. // Use CockroachDB parser for SELECT statements
  400. if strings.HasPrefix(sqlUpper, "SELECT") {
  401. parser := NewCockroachSQLParser()
  402. return parser.ParseSQL(sql)
  403. }
  404. return nil, UnsupportedFeatureError{
  405. Feature: fmt.Sprintf("statement type: %s", strings.Fields(sqlUpper)[0]),
  406. Reason: "statement parsing not implemented",
  407. }
  408. }
  409. // debugModeKey is used to store debug mode flag in context
  410. type debugModeKey struct{}
  411. // isDebugMode checks if we're in debug/explain mode
  412. func isDebugMode(ctx context.Context) bool {
  413. debug, ok := ctx.Value(debugModeKey{}).(bool)
  414. return ok && debug
  415. }
  416. // withDebugMode returns a context with debug mode enabled
  417. func withDebugMode(ctx context.Context) context.Context {
  418. return context.WithValue(ctx, debugModeKey{}, true)
  419. }
  420. // LogBufferStart tracks the starting buffer index for a file
  421. // Buffer indexes are monotonically increasing, count = len(chunks)
  422. type LogBufferStart struct {
  423. StartIndex int64 `json:"start_index"` // Starting buffer index (count = len(chunks))
  424. }
  425. // SQLEngine provides SQL query execution capabilities for SeaweedFS
  426. // Assumptions:
  427. // 1. MQ namespaces map directly to SQL databases
  428. // 2. MQ topics map directly to SQL tables
  429. // 3. Schema evolution is handled transparently with backward compatibility
  430. // 4. Queries run against Parquet-stored MQ messages
  431. type SQLEngine struct {
  432. catalog *SchemaCatalog
  433. }
  434. // NewSQLEngine creates a new SQL execution engine
  435. // Uses master address for service discovery and initialization
  436. func NewSQLEngine(masterAddress string) *SQLEngine {
  437. // Initialize global HTTP client if not already done
  438. // This is needed for reading partition data from the filer
  439. if util_http.GetGlobalHttpClient() == nil {
  440. util_http.InitGlobalHttpClient()
  441. }
  442. return &SQLEngine{
  443. catalog: NewSchemaCatalog(masterAddress),
  444. }
  445. }
  446. // NewSQLEngineWithCatalog creates a new SQL execution engine with a custom catalog
  447. // Used for testing or when you want to provide a pre-configured catalog
  448. func NewSQLEngineWithCatalog(catalog *SchemaCatalog) *SQLEngine {
  449. // Initialize global HTTP client if not already done
  450. // This is needed for reading partition data from the filer
  451. if util_http.GetGlobalHttpClient() == nil {
  452. util_http.InitGlobalHttpClient()
  453. }
  454. return &SQLEngine{
  455. catalog: catalog,
  456. }
  457. }
  458. // GetCatalog returns the schema catalog for external access
  459. func (e *SQLEngine) GetCatalog() *SchemaCatalog {
  460. return e.catalog
  461. }
  462. // ExecuteSQL parses and executes a SQL statement
  463. // Assumptions:
  464. // 1. All SQL statements are PostgreSQL-compatible via pg_query_go
  465. // 2. DDL operations (CREATE/ALTER/DROP) modify underlying MQ topics
  466. // 3. DML operations (SELECT) query Parquet files directly
  467. // 4. Error handling follows PostgreSQL conventions
  468. func (e *SQLEngine) ExecuteSQL(ctx context.Context, sql string) (*QueryResult, error) {
  469. startTime := time.Now()
  470. // Handle EXPLAIN as a special case
  471. sqlTrimmed := strings.TrimSpace(sql)
  472. sqlUpper := strings.ToUpper(sqlTrimmed)
  473. if strings.HasPrefix(sqlUpper, "EXPLAIN") {
  474. // Extract the actual query after EXPLAIN
  475. actualSQL := strings.TrimSpace(sqlTrimmed[7:]) // Remove "EXPLAIN"
  476. return e.executeExplain(ctx, actualSQL, startTime)
  477. }
  478. // Parse the SQL statement using PostgreSQL parser
  479. stmt, err := ParseSQL(sql)
  480. if err != nil {
  481. return &QueryResult{
  482. Error: fmt.Errorf("SQL parse error: %v", err),
  483. }, err
  484. }
  485. // Route to appropriate handler based on statement type
  486. switch stmt := stmt.(type) {
  487. case *ShowStatement:
  488. return e.executeShowStatementWithDescribe(ctx, stmt)
  489. case *UseStatement:
  490. return e.executeUseStatement(ctx, stmt)
  491. case *DDLStatement:
  492. return e.executeDDLStatement(ctx, stmt)
  493. case *SelectStatement:
  494. return e.executeSelectStatement(ctx, stmt)
  495. default:
  496. err := fmt.Errorf("unsupported SQL statement type: %T", stmt)
  497. return &QueryResult{Error: err}, err
  498. }
  499. }
  500. // executeExplain handles EXPLAIN statements by executing the query with plan tracking
  501. func (e *SQLEngine) executeExplain(ctx context.Context, actualSQL string, startTime time.Time) (*QueryResult, error) {
  502. // Enable debug mode for EXPLAIN queries
  503. ctx = withDebugMode(ctx)
  504. // Parse the actual SQL statement using PostgreSQL parser
  505. stmt, err := ParseSQL(actualSQL)
  506. if err != nil {
  507. return &QueryResult{
  508. Error: fmt.Errorf("SQL parse error in EXPLAIN query: %v", err),
  509. }, err
  510. }
  511. // Create execution plan
  512. plan := &QueryExecutionPlan{
  513. QueryType: strings.ToUpper(strings.Fields(actualSQL)[0]),
  514. DataSources: []string{},
  515. OptimizationsUsed: []string{},
  516. Details: make(map[string]interface{}),
  517. }
  518. var result *QueryResult
  519. // Route to appropriate handler based on statement type (with plan tracking)
  520. switch stmt := stmt.(type) {
  521. case *SelectStatement:
  522. result, err = e.executeSelectStatementWithPlan(ctx, stmt, plan)
  523. if err != nil {
  524. plan.Details["error"] = err.Error()
  525. }
  526. case *ShowStatement:
  527. plan.QueryType = "SHOW"
  528. plan.ExecutionStrategy = "metadata_only"
  529. result, err = e.executeShowStatementWithDescribe(ctx, stmt)
  530. default:
  531. err := fmt.Errorf("EXPLAIN not supported for statement type: %T", stmt)
  532. return &QueryResult{Error: err}, err
  533. }
  534. // Calculate execution time
  535. plan.ExecutionTimeMs = float64(time.Since(startTime).Nanoseconds()) / 1e6
  536. // Format execution plan as result
  537. return e.formatExecutionPlan(plan, result, err)
  538. }
  539. // formatExecutionPlan converts execution plan to a hierarchical tree format for display
  540. func (e *SQLEngine) formatExecutionPlan(plan *QueryExecutionPlan, originalResult *QueryResult, originalErr error) (*QueryResult, error) {
  541. columns := []string{"Query Execution Plan"}
  542. rows := [][]sqltypes.Value{}
  543. var planLines []string
  544. // Use new tree structure if available, otherwise fallback to legacy format
  545. if plan.RootNode != nil {
  546. planLines = e.buildTreePlan(plan, originalErr)
  547. } else {
  548. // Build legacy hierarchical plan display
  549. planLines = e.buildHierarchicalPlan(plan, originalErr)
  550. }
  551. for _, line := range planLines {
  552. rows = append(rows, []sqltypes.Value{
  553. sqltypes.NewVarChar(line),
  554. })
  555. }
  556. if originalErr != nil {
  557. return &QueryResult{
  558. Columns: columns,
  559. Rows: rows,
  560. ExecutionPlan: plan,
  561. Error: originalErr,
  562. }, originalErr
  563. }
  564. return &QueryResult{
  565. Columns: columns,
  566. Rows: rows,
  567. ExecutionPlan: plan,
  568. }, nil
  569. }
  570. // buildTreePlan creates the new tree-based execution plan display
  571. func (e *SQLEngine) buildTreePlan(plan *QueryExecutionPlan, err error) []string {
  572. var lines []string
  573. // Root header
  574. lines = append(lines, fmt.Sprintf("%s Query (%s)", plan.QueryType, plan.ExecutionStrategy))
  575. // Build the execution tree
  576. if plan.RootNode != nil {
  577. // Root execution node is always the last (and only) child of SELECT Query
  578. treeLines := e.formatExecutionNode(plan.RootNode, "└── ", " ", true)
  579. lines = append(lines, treeLines...)
  580. }
  581. // Add error information if present
  582. if err != nil {
  583. lines = append(lines, "")
  584. lines = append(lines, fmt.Sprintf("Error: %v", err))
  585. }
  586. return lines
  587. }
  588. // formatExecutionNode recursively formats execution tree nodes
  589. func (e *SQLEngine) formatExecutionNode(node ExecutionNode, prefix, childPrefix string, isRoot bool) []string {
  590. var lines []string
  591. description := node.GetDescription()
  592. // Format the current node
  593. if isRoot {
  594. lines = append(lines, fmt.Sprintf("%s%s", prefix, description))
  595. } else {
  596. lines = append(lines, fmt.Sprintf("%s%s", prefix, description))
  597. }
  598. // Add node-specific details
  599. switch n := node.(type) {
  600. case *FileSourceNode:
  601. lines = e.formatFileSourceDetails(lines, n, childPrefix, isRoot)
  602. case *ScanOperationNode:
  603. lines = e.formatScanOperationDetails(lines, n, childPrefix, isRoot)
  604. case *MergeOperationNode:
  605. lines = e.formatMergeOperationDetails(lines, n, childPrefix, isRoot)
  606. }
  607. // Format children
  608. children := node.GetChildren()
  609. if len(children) > 0 {
  610. for i, child := range children {
  611. isLastChild := i == len(children)-1
  612. var nextPrefix, nextChildPrefix string
  613. if isLastChild {
  614. nextPrefix = childPrefix + "└── "
  615. nextChildPrefix = childPrefix + " "
  616. } else {
  617. nextPrefix = childPrefix + "├── "
  618. nextChildPrefix = childPrefix + "│ "
  619. }
  620. childLines := e.formatExecutionNode(child, nextPrefix, nextChildPrefix, false)
  621. lines = append(lines, childLines...)
  622. }
  623. }
  624. return lines
  625. }
  626. // formatFileSourceDetails adds details for file source nodes
  627. func (e *SQLEngine) formatFileSourceDetails(lines []string, node *FileSourceNode, childPrefix string, isRoot bool) []string {
  628. prefix := childPrefix
  629. if isRoot {
  630. prefix = "│ "
  631. }
  632. // Add predicates
  633. if len(node.Predicates) > 0 {
  634. lines = append(lines, fmt.Sprintf("%s├── Predicates: %s", prefix, strings.Join(node.Predicates, " AND ")))
  635. }
  636. // Add operations
  637. if len(node.Operations) > 0 {
  638. lines = append(lines, fmt.Sprintf("%s└── Operations: %s", prefix, strings.Join(node.Operations, " + ")))
  639. } else if len(node.Predicates) == 0 {
  640. lines = append(lines, fmt.Sprintf("%s└── Operation: full_scan", prefix))
  641. }
  642. return lines
  643. }
  644. // formatScanOperationDetails adds details for scan operation nodes
  645. func (e *SQLEngine) formatScanOperationDetails(lines []string, node *ScanOperationNode, childPrefix string, isRoot bool) []string {
  646. prefix := childPrefix
  647. if isRoot {
  648. prefix = "│ "
  649. }
  650. hasChildren := len(node.Children) > 0
  651. // Add predicates if present
  652. if len(node.Predicates) > 0 {
  653. if hasChildren {
  654. lines = append(lines, fmt.Sprintf("%s├── Predicates: %s", prefix, strings.Join(node.Predicates, " AND ")))
  655. } else {
  656. lines = append(lines, fmt.Sprintf("%s└── Predicates: %s", prefix, strings.Join(node.Predicates, " AND ")))
  657. }
  658. }
  659. return lines
  660. }
  661. // formatMergeOperationDetails adds details for merge operation nodes
  662. func (e *SQLEngine) formatMergeOperationDetails(lines []string, node *MergeOperationNode, childPrefix string, isRoot bool) []string {
  663. hasChildren := len(node.Children) > 0
  664. // Add merge strategy info only if we have children, with proper indentation
  665. if strategy, exists := node.Details["merge_strategy"]; exists && hasChildren {
  666. // Strategy should be indented as a detail of this node, before its children
  667. lines = append(lines, fmt.Sprintf("%s├── Strategy: %v", childPrefix, strategy))
  668. }
  669. return lines
  670. }
  671. // buildHierarchicalPlan creates a tree-like structure for the execution plan
  672. func (e *SQLEngine) buildHierarchicalPlan(plan *QueryExecutionPlan, err error) []string {
  673. var lines []string
  674. // Root node - Query type and strategy
  675. lines = append(lines, fmt.Sprintf("%s Query (%s)", plan.QueryType, plan.ExecutionStrategy))
  676. // Aggregations section (if present)
  677. if len(plan.Aggregations) > 0 {
  678. lines = append(lines, "├── Aggregations")
  679. for i, agg := range plan.Aggregations {
  680. if i == len(plan.Aggregations)-1 {
  681. lines = append(lines, fmt.Sprintf("│ └── %s", agg))
  682. } else {
  683. lines = append(lines, fmt.Sprintf("│ ├── %s", agg))
  684. }
  685. }
  686. }
  687. // Data Sources section
  688. if len(plan.DataSources) > 0 {
  689. hasMore := len(plan.OptimizationsUsed) > 0 || plan.TotalRowsProcessed > 0 || len(plan.Details) > 0 || err != nil
  690. if hasMore {
  691. lines = append(lines, "├── Data Sources")
  692. } else {
  693. lines = append(lines, "└── Data Sources")
  694. }
  695. for i, source := range plan.DataSources {
  696. prefix := "│ "
  697. if !hasMore && i == len(plan.DataSources)-1 {
  698. prefix = " "
  699. }
  700. if i == len(plan.DataSources)-1 {
  701. lines = append(lines, fmt.Sprintf("%s└── %s", prefix, e.formatDataSource(source)))
  702. } else {
  703. lines = append(lines, fmt.Sprintf("%s├── %s", prefix, e.formatDataSource(source)))
  704. }
  705. }
  706. }
  707. // Optimizations section
  708. if len(plan.OptimizationsUsed) > 0 {
  709. hasMore := plan.TotalRowsProcessed > 0 || len(plan.Details) > 0 || err != nil
  710. if hasMore {
  711. lines = append(lines, "├── Optimizations")
  712. } else {
  713. lines = append(lines, "└── Optimizations")
  714. }
  715. for i, opt := range plan.OptimizationsUsed {
  716. prefix := "│ "
  717. if !hasMore && i == len(plan.OptimizationsUsed)-1 {
  718. prefix = " "
  719. }
  720. if i == len(plan.OptimizationsUsed)-1 {
  721. lines = append(lines, fmt.Sprintf("%s└── %s", prefix, e.formatOptimization(opt)))
  722. } else {
  723. lines = append(lines, fmt.Sprintf("%s├── %s", prefix, e.formatOptimization(opt)))
  724. }
  725. }
  726. }
  727. // Check for data sources tree availability
  728. partitionPaths, hasPartitions := plan.Details["partition_paths"].([]string)
  729. parquetFiles, _ := plan.Details["parquet_files"].([]string)
  730. liveLogFiles, _ := plan.Details["live_log_files"].([]string)
  731. // Statistics section
  732. statisticsPresent := plan.PartitionsScanned > 0 || plan.ParquetFilesScanned > 0 ||
  733. plan.LiveLogFilesScanned > 0 || plan.TotalRowsProcessed > 0
  734. if statisticsPresent {
  735. // Check if there are sections after Statistics (Data Sources Tree, Details, Performance)
  736. hasDataSourcesTree := hasPartitions && len(partitionPaths) > 0
  737. hasMoreAfterStats := hasDataSourcesTree || len(plan.Details) > 0 || err != nil || true // Performance is always present
  738. if hasMoreAfterStats {
  739. lines = append(lines, "├── Statistics")
  740. } else {
  741. lines = append(lines, "└── Statistics")
  742. }
  743. stats := []string{}
  744. if plan.PartitionsScanned > 0 {
  745. stats = append(stats, fmt.Sprintf("Partitions Scanned: %d", plan.PartitionsScanned))
  746. }
  747. if plan.ParquetFilesScanned > 0 {
  748. stats = append(stats, fmt.Sprintf("Parquet Files: %d", plan.ParquetFilesScanned))
  749. }
  750. if plan.LiveLogFilesScanned > 0 {
  751. stats = append(stats, fmt.Sprintf("Live Log Files: %d", plan.LiveLogFilesScanned))
  752. }
  753. // Always show row statistics for aggregations, even if 0 (to show fast path efficiency)
  754. if resultsReturned, hasResults := plan.Details["results_returned"]; hasResults {
  755. stats = append(stats, fmt.Sprintf("Rows Scanned: %d", plan.TotalRowsProcessed))
  756. stats = append(stats, fmt.Sprintf("Results Returned: %v", resultsReturned))
  757. // Add fast path explanation when no rows were scanned
  758. if plan.TotalRowsProcessed == 0 {
  759. // Use the actual scan method from Details instead of hardcoding
  760. if scanMethod, exists := plan.Details["scan_method"].(string); exists {
  761. stats = append(stats, fmt.Sprintf("Scan Method: %s", scanMethod))
  762. } else {
  763. stats = append(stats, "Scan Method: Metadata Only")
  764. }
  765. }
  766. } else if plan.TotalRowsProcessed > 0 {
  767. stats = append(stats, fmt.Sprintf("Rows Processed: %d", plan.TotalRowsProcessed))
  768. }
  769. // Broker buffer information
  770. if plan.BrokerBufferQueried {
  771. stats = append(stats, fmt.Sprintf("Broker Buffer Queried: Yes (%d messages)", plan.BrokerBufferMessages))
  772. if plan.BufferStartIndex > 0 {
  773. stats = append(stats, fmt.Sprintf("Buffer Start Index: %d (deduplication enabled)", plan.BufferStartIndex))
  774. }
  775. }
  776. for i, stat := range stats {
  777. if hasMoreAfterStats {
  778. // More sections after Statistics, so use │ prefix
  779. if i == len(stats)-1 {
  780. lines = append(lines, fmt.Sprintf("│ └── %s", stat))
  781. } else {
  782. lines = append(lines, fmt.Sprintf("│ ├── %s", stat))
  783. }
  784. } else {
  785. // This is the last main section, so use space prefix for final item
  786. if i == len(stats)-1 {
  787. lines = append(lines, fmt.Sprintf(" └── %s", stat))
  788. } else {
  789. lines = append(lines, fmt.Sprintf(" ├── %s", stat))
  790. }
  791. }
  792. }
  793. }
  794. // Data Sources Tree section (if file paths are available)
  795. if hasPartitions && len(partitionPaths) > 0 {
  796. // Check if there are more sections after this
  797. hasMore := len(plan.Details) > 0 || err != nil
  798. if hasMore {
  799. lines = append(lines, "├── Data Sources Tree")
  800. } else {
  801. lines = append(lines, "├── Data Sources Tree") // Performance always comes after
  802. }
  803. // Build a tree structure for each partition
  804. for i, partition := range partitionPaths {
  805. isLastPartition := i == len(partitionPaths)-1
  806. // Show partition directory
  807. partitionPrefix := "├── "
  808. if isLastPartition {
  809. partitionPrefix = "└── "
  810. }
  811. lines = append(lines, fmt.Sprintf("│ %s%s/", partitionPrefix, partition))
  812. // Show parquet files in this partition
  813. partitionParquetFiles := make([]string, 0)
  814. for _, file := range parquetFiles {
  815. if strings.HasPrefix(file, partition+"/") {
  816. fileName := file[len(partition)+1:]
  817. partitionParquetFiles = append(partitionParquetFiles, fileName)
  818. }
  819. }
  820. // Show live log files in this partition
  821. partitionLiveLogFiles := make([]string, 0)
  822. for _, file := range liveLogFiles {
  823. if strings.HasPrefix(file, partition+"/") {
  824. fileName := file[len(partition)+1:]
  825. partitionLiveLogFiles = append(partitionLiveLogFiles, fileName)
  826. }
  827. }
  828. // Display files with proper tree formatting
  829. totalFiles := len(partitionParquetFiles) + len(partitionLiveLogFiles)
  830. fileIndex := 0
  831. // Display parquet files
  832. for _, fileName := range partitionParquetFiles {
  833. fileIndex++
  834. isLastFile := fileIndex == totalFiles && isLastPartition
  835. var filePrefix string
  836. if isLastPartition {
  837. if isLastFile {
  838. filePrefix = " └── "
  839. } else {
  840. filePrefix = " ├── "
  841. }
  842. } else {
  843. if isLastFile {
  844. filePrefix = "│ └── "
  845. } else {
  846. filePrefix = "│ ├── "
  847. }
  848. }
  849. lines = append(lines, fmt.Sprintf("│ %s%s (parquet)", filePrefix, fileName))
  850. }
  851. // Display live log files
  852. for _, fileName := range partitionLiveLogFiles {
  853. fileIndex++
  854. isLastFile := fileIndex == totalFiles && isLastPartition
  855. var filePrefix string
  856. if isLastPartition {
  857. if isLastFile {
  858. filePrefix = " └── "
  859. } else {
  860. filePrefix = " ├── "
  861. }
  862. } else {
  863. if isLastFile {
  864. filePrefix = "│ └── "
  865. } else {
  866. filePrefix = "│ ├── "
  867. }
  868. }
  869. lines = append(lines, fmt.Sprintf("│ %s%s (live log)", filePrefix, fileName))
  870. }
  871. }
  872. }
  873. // Details section
  874. // Filter out details that are shown elsewhere
  875. filteredDetails := make([]string, 0)
  876. for key, value := range plan.Details {
  877. // Skip keys that are already formatted and displayed in the Statistics section
  878. if key != "results_returned" && key != "partition_paths" && key != "parquet_files" && key != "live_log_files" {
  879. filteredDetails = append(filteredDetails, fmt.Sprintf("%s: %v", key, value))
  880. }
  881. }
  882. if len(filteredDetails) > 0 {
  883. // Performance is always present, so check if there are errors after Details
  884. hasMore := err != nil
  885. if hasMore {
  886. lines = append(lines, "├── Details")
  887. } else {
  888. lines = append(lines, "├── Details") // Performance always comes after
  889. }
  890. for i, detail := range filteredDetails {
  891. if i == len(filteredDetails)-1 {
  892. lines = append(lines, fmt.Sprintf("│ └── %s", detail))
  893. } else {
  894. lines = append(lines, fmt.Sprintf("│ ├── %s", detail))
  895. }
  896. }
  897. }
  898. // Performance section (always present)
  899. if err != nil {
  900. lines = append(lines, "├── Performance")
  901. lines = append(lines, fmt.Sprintf("│ └── Execution Time: %.3fms", plan.ExecutionTimeMs))
  902. lines = append(lines, "└── Error")
  903. lines = append(lines, fmt.Sprintf(" └── %s", err.Error()))
  904. } else {
  905. lines = append(lines, "└── Performance")
  906. lines = append(lines, fmt.Sprintf(" └── Execution Time: %.3fms", plan.ExecutionTimeMs))
  907. }
  908. return lines
  909. }
  910. // formatDataSource provides user-friendly names for data sources
  911. func (e *SQLEngine) formatDataSource(source string) string {
  912. switch source {
  913. case "parquet_stats":
  914. return "Parquet Statistics (fast path)"
  915. case "parquet_files":
  916. return "Parquet Files (full scan)"
  917. case "live_logs":
  918. return "Live Log Files"
  919. case "broker_buffer":
  920. return "Broker Buffer (real-time)"
  921. default:
  922. return source
  923. }
  924. }
  925. // buildExecutionTree creates a tree representation of the query execution plan
  926. func (e *SQLEngine) buildExecutionTree(plan *QueryExecutionPlan, stmt *SelectStatement) ExecutionNode {
  927. // Extract WHERE clause predicates for pushdown analysis
  928. var predicates []string
  929. if stmt.Where != nil {
  930. predicates = e.extractPredicateStrings(stmt.Where.Expr)
  931. }
  932. // Check if we have detailed file information
  933. partitionPaths, hasPartitions := plan.Details["partition_paths"].([]string)
  934. parquetFiles, hasParquetFiles := plan.Details["parquet_files"].([]string)
  935. liveLogFiles, hasLiveLogFiles := plan.Details["live_log_files"].([]string)
  936. if !hasPartitions || len(partitionPaths) == 0 {
  937. // Fallback: create simple structure without file details
  938. return &ScanOperationNode{
  939. ScanType: "hybrid_scan",
  940. Description: fmt.Sprintf("Hybrid Scan (%s)", plan.ExecutionStrategy),
  941. Predicates: predicates,
  942. Details: map[string]interface{}{
  943. "note": "File details not available",
  944. },
  945. }
  946. }
  947. // Build file source nodes
  948. var parquetNodes []ExecutionNode
  949. var liveLogNodes []ExecutionNode
  950. var brokerBufferNodes []ExecutionNode
  951. // Create parquet file nodes
  952. if hasParquetFiles {
  953. for _, filePath := range parquetFiles {
  954. operations := e.determineParquetOperations(plan, filePath)
  955. parquetNodes = append(parquetNodes, &FileSourceNode{
  956. FilePath: filePath,
  957. SourceType: "parquet",
  958. Predicates: predicates,
  959. Operations: operations,
  960. OptimizationHint: e.determineOptimizationHint(plan, "parquet"),
  961. Details: map[string]interface{}{
  962. "format": "parquet",
  963. },
  964. })
  965. }
  966. }
  967. // Create live log file nodes
  968. if hasLiveLogFiles {
  969. for _, filePath := range liveLogFiles {
  970. operations := e.determineLiveLogOperations(plan, filePath)
  971. liveLogNodes = append(liveLogNodes, &FileSourceNode{
  972. FilePath: filePath,
  973. SourceType: "live_log",
  974. Predicates: predicates,
  975. Operations: operations,
  976. OptimizationHint: e.determineOptimizationHint(plan, "live_log"),
  977. Details: map[string]interface{}{
  978. "format": "log_entry",
  979. },
  980. })
  981. }
  982. }
  983. // Create broker buffer node only if queried AND has unflushed messages
  984. if plan.BrokerBufferQueried && plan.BrokerBufferMessages > 0 {
  985. brokerBufferNodes = append(brokerBufferNodes, &FileSourceNode{
  986. FilePath: "broker_memory_buffer",
  987. SourceType: "broker_buffer",
  988. Predicates: predicates,
  989. Operations: []string{"memory_scan"},
  990. OptimizationHint: "real_time",
  991. Details: map[string]interface{}{
  992. "messages": plan.BrokerBufferMessages,
  993. "buffer_start_idx": plan.BufferStartIndex,
  994. },
  995. })
  996. }
  997. // Build the tree structure based on data sources
  998. var scanNodes []ExecutionNode
  999. // Add parquet scan node ONLY if there are actual parquet files
  1000. if len(parquetNodes) > 0 {
  1001. scanNodes = append(scanNodes, &ScanOperationNode{
  1002. ScanType: "parquet_scan",
  1003. Description: fmt.Sprintf("Parquet File Scan (%d files)", len(parquetNodes)),
  1004. Predicates: predicates,
  1005. Children: parquetNodes,
  1006. Details: map[string]interface{}{
  1007. "files_count": len(parquetNodes),
  1008. "pushdown": "column_projection + predicate_filtering",
  1009. },
  1010. })
  1011. }
  1012. // Add live log scan node ONLY if there are actual live log files
  1013. if len(liveLogNodes) > 0 {
  1014. scanNodes = append(scanNodes, &ScanOperationNode{
  1015. ScanType: "live_log_scan",
  1016. Description: fmt.Sprintf("Live Log Scan (%d files)", len(liveLogNodes)),
  1017. Predicates: predicates,
  1018. Children: liveLogNodes,
  1019. Details: map[string]interface{}{
  1020. "files_count": len(liveLogNodes),
  1021. "pushdown": "predicate_filtering",
  1022. },
  1023. })
  1024. }
  1025. // Add broker buffer scan node ONLY if buffer was actually queried
  1026. if len(brokerBufferNodes) > 0 {
  1027. scanNodes = append(scanNodes, &ScanOperationNode{
  1028. ScanType: "broker_buffer_scan",
  1029. Description: "Real-time Buffer Scan",
  1030. Predicates: predicates,
  1031. Children: brokerBufferNodes,
  1032. Details: map[string]interface{}{
  1033. "real_time": true,
  1034. },
  1035. })
  1036. }
  1037. // Debug: Check what we actually have
  1038. totalFileNodes := len(parquetNodes) + len(liveLogNodes) + len(brokerBufferNodes)
  1039. if totalFileNodes == 0 {
  1040. // No actual files found, return simple fallback
  1041. return &ScanOperationNode{
  1042. ScanType: "hybrid_scan",
  1043. Description: fmt.Sprintf("Hybrid Scan (%s)", plan.ExecutionStrategy),
  1044. Predicates: predicates,
  1045. Details: map[string]interface{}{
  1046. "note": "No source files discovered",
  1047. },
  1048. }
  1049. }
  1050. // If no scan nodes, return a fallback structure
  1051. if len(scanNodes) == 0 {
  1052. return &ScanOperationNode{
  1053. ScanType: "hybrid_scan",
  1054. Description: fmt.Sprintf("Hybrid Scan (%s)", plan.ExecutionStrategy),
  1055. Predicates: predicates,
  1056. Details: map[string]interface{}{
  1057. "note": "No file details available",
  1058. },
  1059. }
  1060. }
  1061. // If only one scan type, return it directly
  1062. if len(scanNodes) == 1 {
  1063. return scanNodes[0]
  1064. }
  1065. // Multiple scan types - need merge operation
  1066. return &MergeOperationNode{
  1067. OperationType: "chronological_merge",
  1068. Description: "Chronological Merge (time-ordered)",
  1069. Children: scanNodes,
  1070. Details: map[string]interface{}{
  1071. "merge_strategy": "timestamp_based",
  1072. "sources_count": len(scanNodes),
  1073. },
  1074. }
  1075. }
  1076. // extractPredicateStrings extracts predicate descriptions from WHERE clause
  1077. func (e *SQLEngine) extractPredicateStrings(expr ExprNode) []string {
  1078. var predicates []string
  1079. e.extractPredicateStringsRecursive(expr, &predicates)
  1080. return predicates
  1081. }
  1082. func (e *SQLEngine) extractPredicateStringsRecursive(expr ExprNode, predicates *[]string) {
  1083. switch exprType := expr.(type) {
  1084. case *ComparisonExpr:
  1085. *predicates = append(*predicates, fmt.Sprintf("%s %s %s",
  1086. e.exprToString(exprType.Left), exprType.Operator, e.exprToString(exprType.Right)))
  1087. case *IsNullExpr:
  1088. *predicates = append(*predicates, fmt.Sprintf("%s IS NULL", e.exprToString(exprType.Expr)))
  1089. case *IsNotNullExpr:
  1090. *predicates = append(*predicates, fmt.Sprintf("%s IS NOT NULL", e.exprToString(exprType.Expr)))
  1091. case *AndExpr:
  1092. e.extractPredicateStringsRecursive(exprType.Left, predicates)
  1093. e.extractPredicateStringsRecursive(exprType.Right, predicates)
  1094. case *OrExpr:
  1095. e.extractPredicateStringsRecursive(exprType.Left, predicates)
  1096. e.extractPredicateStringsRecursive(exprType.Right, predicates)
  1097. case *ParenExpr:
  1098. e.extractPredicateStringsRecursive(exprType.Expr, predicates)
  1099. }
  1100. }
  1101. func (e *SQLEngine) exprToString(expr ExprNode) string {
  1102. switch exprType := expr.(type) {
  1103. case *ColName:
  1104. return exprType.Name.String()
  1105. default:
  1106. // For now, return a simplified representation
  1107. return fmt.Sprintf("%T", expr)
  1108. }
  1109. }
  1110. // determineParquetOperations determines what operations will be performed on parquet files
  1111. func (e *SQLEngine) determineParquetOperations(plan *QueryExecutionPlan, filePath string) []string {
  1112. var operations []string
  1113. // Check for column projection
  1114. if contains(plan.OptimizationsUsed, "column_projection") {
  1115. operations = append(operations, "column_projection")
  1116. }
  1117. // Check for predicate pushdown
  1118. if contains(plan.OptimizationsUsed, "predicate_pushdown") {
  1119. operations = append(operations, "predicate_pushdown")
  1120. }
  1121. // Check for statistics usage
  1122. if contains(plan.OptimizationsUsed, "parquet_statistics") || plan.ExecutionStrategy == "hybrid_fast_path" {
  1123. operations = append(operations, "statistics_skip")
  1124. } else {
  1125. operations = append(operations, "row_group_scan")
  1126. }
  1127. if len(operations) == 0 {
  1128. operations = append(operations, "full_scan")
  1129. }
  1130. return operations
  1131. }
  1132. // determineLiveLogOperations determines what operations will be performed on live log files
  1133. func (e *SQLEngine) determineLiveLogOperations(plan *QueryExecutionPlan, filePath string) []string {
  1134. var operations []string
  1135. // Live logs typically require sequential scan
  1136. operations = append(operations, "sequential_scan")
  1137. // Check for predicate filtering
  1138. if contains(plan.OptimizationsUsed, "predicate_pushdown") {
  1139. operations = append(operations, "predicate_filtering")
  1140. }
  1141. return operations
  1142. }
  1143. // determineOptimizationHint determines the optimization hint for a data source
  1144. func (e *SQLEngine) determineOptimizationHint(plan *QueryExecutionPlan, sourceType string) string {
  1145. switch plan.ExecutionStrategy {
  1146. case "hybrid_fast_path":
  1147. if sourceType == "parquet" {
  1148. return "statistics_only"
  1149. }
  1150. return "minimal_scan"
  1151. case "full_scan":
  1152. return "full_scan"
  1153. case "column_projection":
  1154. return "column_filter"
  1155. default:
  1156. return ""
  1157. }
  1158. }
  1159. // Helper function to check if slice contains string
  1160. func contains(slice []string, item string) bool {
  1161. for _, s := range slice {
  1162. if s == item {
  1163. return true
  1164. }
  1165. }
  1166. return false
  1167. }
  1168. // collectLiveLogFileNames collects live log file names from a partition directory
  1169. func (e *SQLEngine) collectLiveLogFileNames(filerClient filer_pb.FilerClient, partitionPath string) ([]string, error) {
  1170. var liveLogFiles []string
  1171. err := filerClient.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
  1172. // List all files in partition directory
  1173. request := &filer_pb.ListEntriesRequest{
  1174. Directory: partitionPath,
  1175. Prefix: "",
  1176. StartFromFileName: "",
  1177. InclusiveStartFrom: false,
  1178. Limit: 10000, // reasonable limit
  1179. }
  1180. stream, err := client.ListEntries(context.Background(), request)
  1181. if err != nil {
  1182. return err
  1183. }
  1184. for {
  1185. resp, err := stream.Recv()
  1186. if err != nil {
  1187. if err == io.EOF {
  1188. break
  1189. }
  1190. return err
  1191. }
  1192. entry := resp.Entry
  1193. if entry != nil && !entry.IsDirectory {
  1194. // Check if this is a log file (not a parquet file)
  1195. fileName := entry.Name
  1196. if !strings.HasSuffix(fileName, ".parquet") && !strings.HasSuffix(fileName, ".metadata") {
  1197. liveLogFiles = append(liveLogFiles, fileName)
  1198. }
  1199. }
  1200. }
  1201. return nil
  1202. })
  1203. if err != nil {
  1204. return nil, err
  1205. }
  1206. return liveLogFiles, nil
  1207. }
  1208. // formatOptimization provides user-friendly names for optimizations
  1209. func (e *SQLEngine) formatOptimization(opt string) string {
  1210. switch opt {
  1211. case "parquet_statistics":
  1212. return "Parquet Statistics Usage"
  1213. case "live_log_counting":
  1214. return "Live Log Row Counting"
  1215. case "deduplication":
  1216. return "Duplicate Data Avoidance"
  1217. case "predicate_pushdown":
  1218. return "WHERE Clause Pushdown"
  1219. case "column_statistics_pruning":
  1220. return "Column Statistics File Pruning"
  1221. case "column_projection":
  1222. return "Column Selection"
  1223. case "limit_pushdown":
  1224. return "LIMIT Optimization"
  1225. default:
  1226. return opt
  1227. }
  1228. }
  1229. // executeUseStatement handles USE database statements to switch current database context
  1230. func (e *SQLEngine) executeUseStatement(ctx context.Context, stmt *UseStatement) (*QueryResult, error) {
  1231. // Validate database name
  1232. if stmt.Database == "" {
  1233. err := fmt.Errorf("database name cannot be empty")
  1234. return &QueryResult{Error: err}, err
  1235. }
  1236. // Set the current database in the catalog
  1237. e.catalog.SetCurrentDatabase(stmt.Database)
  1238. // Return success message
  1239. result := &QueryResult{
  1240. Columns: []string{"message"},
  1241. Rows: [][]sqltypes.Value{
  1242. {sqltypes.MakeString([]byte(fmt.Sprintf("Database changed to: %s", stmt.Database)))},
  1243. },
  1244. Error: nil,
  1245. }
  1246. return result, nil
  1247. }
  1248. // executeDDLStatement handles CREATE operations only
  1249. // Note: ALTER TABLE and DROP TABLE are not supported to protect topic data
  1250. func (e *SQLEngine) executeDDLStatement(ctx context.Context, stmt *DDLStatement) (*QueryResult, error) {
  1251. switch stmt.Action {
  1252. case CreateStr:
  1253. return e.createTable(ctx, stmt)
  1254. case AlterStr:
  1255. err := fmt.Errorf("ALTER TABLE is not supported")
  1256. return &QueryResult{Error: err}, err
  1257. case DropStr:
  1258. err := fmt.Errorf("DROP TABLE is not supported")
  1259. return &QueryResult{Error: err}, err
  1260. default:
  1261. err := fmt.Errorf("unsupported DDL action: %s", stmt.Action)
  1262. return &QueryResult{Error: err}, err
  1263. }
  1264. }
  1265. // executeSelectStatementWithPlan handles SELECT queries with execution plan tracking
  1266. func (e *SQLEngine) executeSelectStatementWithPlan(ctx context.Context, stmt *SelectStatement, plan *QueryExecutionPlan) (*QueryResult, error) {
  1267. // Initialize plan details once
  1268. if plan != nil && plan.Details == nil {
  1269. plan.Details = make(map[string]interface{})
  1270. }
  1271. // Parse aggregations to populate plan
  1272. var aggregations []AggregationSpec
  1273. hasAggregations := false
  1274. selectAll := false
  1275. for _, selectExpr := range stmt.SelectExprs {
  1276. switch expr := selectExpr.(type) {
  1277. case *StarExpr:
  1278. selectAll = true
  1279. case *AliasedExpr:
  1280. switch col := expr.Expr.(type) {
  1281. case *FuncExpr:
  1282. // This is an aggregation function
  1283. aggSpec, err := e.parseAggregationFunction(col, expr)
  1284. if err != nil {
  1285. return &QueryResult{Error: err}, err
  1286. }
  1287. if aggSpec != nil {
  1288. aggregations = append(aggregations, *aggSpec)
  1289. hasAggregations = true
  1290. plan.Aggregations = append(plan.Aggregations, aggSpec.Function+"("+aggSpec.Column+")")
  1291. }
  1292. }
  1293. }
  1294. }
  1295. // Execute the query (handle aggregations specially for plan tracking)
  1296. var result *QueryResult
  1297. var err error
  1298. if hasAggregations {
  1299. // Extract table information for aggregation execution
  1300. var database, tableName string
  1301. if len(stmt.From) == 1 {
  1302. if table, ok := stmt.From[0].(*AliasedTableExpr); ok {
  1303. if tableExpr, ok := table.Expr.(TableName); ok {
  1304. tableName = tableExpr.Name.String()
  1305. if tableExpr.Qualifier != nil && tableExpr.Qualifier.String() != "" {
  1306. database = tableExpr.Qualifier.String()
  1307. }
  1308. }
  1309. }
  1310. }
  1311. // Use current database if not specified
  1312. if database == "" {
  1313. database = e.catalog.currentDatabase
  1314. if database == "" {
  1315. database = "default"
  1316. }
  1317. }
  1318. // Create hybrid scanner for aggregation execution
  1319. var filerClient filer_pb.FilerClient
  1320. if e.catalog.brokerClient != nil {
  1321. filerClient, err = e.catalog.brokerClient.GetFilerClient()
  1322. if err != nil {
  1323. return &QueryResult{Error: err}, err
  1324. }
  1325. }
  1326. hybridScanner, err := NewHybridMessageScanner(filerClient, e.catalog.brokerClient, database, tableName, e)
  1327. if err != nil {
  1328. return &QueryResult{Error: err}, err
  1329. }
  1330. // Execute aggregation query with plan tracking
  1331. result, err = e.executeAggregationQueryWithPlan(ctx, hybridScanner, aggregations, stmt, plan)
  1332. } else {
  1333. // Regular SELECT query with plan tracking
  1334. result, err = e.executeSelectStatementWithBrokerStats(ctx, stmt, plan)
  1335. }
  1336. if err == nil && result != nil {
  1337. // Extract table name for use in execution strategy determination
  1338. var tableName string
  1339. if len(stmt.From) == 1 {
  1340. if table, ok := stmt.From[0].(*AliasedTableExpr); ok {
  1341. if tableExpr, ok := table.Expr.(TableName); ok {
  1342. tableName = tableExpr.Name.String()
  1343. }
  1344. }
  1345. }
  1346. // Try to get topic information for partition count and row processing stats
  1347. if tableName != "" {
  1348. // Try to discover partitions for statistics
  1349. if partitions, discoverErr := e.discoverTopicPartitions("test", tableName); discoverErr == nil {
  1350. plan.PartitionsScanned = len(partitions)
  1351. }
  1352. // For aggregations, determine actual processing based on execution strategy
  1353. if hasAggregations {
  1354. plan.Details["results_returned"] = len(result.Rows)
  1355. // Determine actual work done based on execution strategy
  1356. if stmt.Where == nil {
  1357. // Use the same logic as actual execution to determine if fast path was used
  1358. var filerClient filer_pb.FilerClient
  1359. if e.catalog.brokerClient != nil {
  1360. filerClient, _ = e.catalog.brokerClient.GetFilerClient()
  1361. }
  1362. hybridScanner, scannerErr := NewHybridMessageScanner(filerClient, e.catalog.brokerClient, "test", tableName, e)
  1363. var canUseFastPath bool
  1364. if scannerErr == nil {
  1365. // Test if fast path can be used (same as actual execution)
  1366. _, canOptimize := e.tryFastParquetAggregation(ctx, hybridScanner, aggregations)
  1367. canUseFastPath = canOptimize
  1368. } else {
  1369. // Fallback to simple check
  1370. canUseFastPath = true
  1371. for _, spec := range aggregations {
  1372. if !e.canUseParquetStatsForAggregation(spec) {
  1373. canUseFastPath = false
  1374. break
  1375. }
  1376. }
  1377. }
  1378. if canUseFastPath {
  1379. // Fast path: minimal scanning (only live logs that weren't converted)
  1380. if actualScanCount, countErr := e.getActualRowsScannedForFastPath(ctx, "test", tableName); countErr == nil {
  1381. plan.TotalRowsProcessed = actualScanCount
  1382. } else {
  1383. plan.TotalRowsProcessed = 0 // Parquet stats only, no scanning
  1384. }
  1385. } else {
  1386. // Full scan: count all rows
  1387. if actualRowCount, countErr := e.getTopicTotalRowCount(ctx, "test", tableName); countErr == nil {
  1388. plan.TotalRowsProcessed = actualRowCount
  1389. } else {
  1390. plan.TotalRowsProcessed = int64(len(result.Rows))
  1391. plan.Details["note"] = "scan_count_unavailable"
  1392. }
  1393. }
  1394. } else {
  1395. // With WHERE clause: full scan required
  1396. if actualRowCount, countErr := e.getTopicTotalRowCount(ctx, "test", tableName); countErr == nil {
  1397. plan.TotalRowsProcessed = actualRowCount
  1398. } else {
  1399. plan.TotalRowsProcessed = int64(len(result.Rows))
  1400. plan.Details["note"] = "scan_count_unavailable"
  1401. }
  1402. }
  1403. } else {
  1404. // For non-aggregations, result count is meaningful
  1405. plan.TotalRowsProcessed = int64(len(result.Rows))
  1406. }
  1407. }
  1408. // Determine execution strategy based on query type (reuse fast path detection from above)
  1409. if hasAggregations {
  1410. // Skip execution strategy determination if plan was already populated by aggregation execution
  1411. // This prevents overwriting the correctly built plan from BuildAggregationPlan
  1412. if plan.ExecutionStrategy == "" {
  1413. // For aggregations, determine if fast path conditions are met
  1414. if stmt.Where == nil {
  1415. // Reuse the same logic used above for row counting
  1416. var canUseFastPath bool
  1417. if tableName != "" {
  1418. var filerClient filer_pb.FilerClient
  1419. if e.catalog.brokerClient != nil {
  1420. filerClient, _ = e.catalog.brokerClient.GetFilerClient()
  1421. }
  1422. if filerClient != nil {
  1423. hybridScanner, scannerErr := NewHybridMessageScanner(filerClient, e.catalog.brokerClient, "test", tableName, e)
  1424. if scannerErr == nil {
  1425. // Test if fast path can be used (same as actual execution)
  1426. _, canOptimize := e.tryFastParquetAggregation(ctx, hybridScanner, aggregations)
  1427. canUseFastPath = canOptimize
  1428. } else {
  1429. canUseFastPath = false
  1430. }
  1431. } else {
  1432. // Fallback check
  1433. canUseFastPath = true
  1434. for _, spec := range aggregations {
  1435. if !e.canUseParquetStatsForAggregation(spec) {
  1436. canUseFastPath = false
  1437. break
  1438. }
  1439. }
  1440. }
  1441. } else {
  1442. canUseFastPath = false
  1443. }
  1444. if canUseFastPath {
  1445. plan.ExecutionStrategy = "hybrid_fast_path"
  1446. plan.OptimizationsUsed = append(plan.OptimizationsUsed, "parquet_statistics", "live_log_counting", "deduplication")
  1447. plan.DataSources = []string{"parquet_stats", "live_logs"}
  1448. } else {
  1449. plan.ExecutionStrategy = "full_scan"
  1450. plan.DataSources = []string{"live_logs", "parquet_files"}
  1451. }
  1452. } else {
  1453. plan.ExecutionStrategy = "full_scan"
  1454. plan.DataSources = []string{"live_logs", "parquet_files"}
  1455. plan.OptimizationsUsed = append(plan.OptimizationsUsed, "predicate_pushdown")
  1456. }
  1457. }
  1458. } else {
  1459. // For regular SELECT queries
  1460. if selectAll {
  1461. plan.ExecutionStrategy = "hybrid_scan"
  1462. plan.DataSources = []string{"live_logs", "parquet_files"}
  1463. } else {
  1464. plan.ExecutionStrategy = "column_projection"
  1465. plan.DataSources = []string{"live_logs", "parquet_files"}
  1466. plan.OptimizationsUsed = append(plan.OptimizationsUsed, "column_projection")
  1467. }
  1468. }
  1469. // Add WHERE clause information
  1470. if stmt.Where != nil {
  1471. // Only add predicate_pushdown if not already added
  1472. alreadyHasPredicate := false
  1473. for _, opt := range plan.OptimizationsUsed {
  1474. if opt == "predicate_pushdown" {
  1475. alreadyHasPredicate = true
  1476. break
  1477. }
  1478. }
  1479. if !alreadyHasPredicate {
  1480. plan.OptimizationsUsed = append(plan.OptimizationsUsed, "predicate_pushdown")
  1481. }
  1482. plan.Details["where_clause"] = "present"
  1483. }
  1484. // Add LIMIT information
  1485. if stmt.Limit != nil {
  1486. plan.OptimizationsUsed = append(plan.OptimizationsUsed, "limit_pushdown")
  1487. if stmt.Limit.Rowcount != nil {
  1488. if limitExpr, ok := stmt.Limit.Rowcount.(*SQLVal); ok && limitExpr.Type == IntVal {
  1489. plan.Details["limit"] = string(limitExpr.Val)
  1490. }
  1491. }
  1492. }
  1493. }
  1494. // Build execution tree after all plan details are populated
  1495. if err == nil && result != nil && plan != nil {
  1496. plan.RootNode = e.buildExecutionTree(plan, stmt)
  1497. }
  1498. return result, err
  1499. }
  1500. // executeSelectStatement handles SELECT queries
  1501. // Assumptions:
  1502. // 1. Queries run against Parquet files in MQ topics
  1503. // 2. Predicate pushdown is used for efficiency
  1504. // 3. Cross-topic joins are supported via partition-aware execution
  1505. func (e *SQLEngine) executeSelectStatement(ctx context.Context, stmt *SelectStatement) (*QueryResult, error) {
  1506. // Parse FROM clause to get table (topic) information
  1507. if len(stmt.From) != 1 {
  1508. err := fmt.Errorf("SELECT supports single table queries only")
  1509. return &QueryResult{Error: err}, err
  1510. }
  1511. // Extract table reference
  1512. var database, tableName string
  1513. switch table := stmt.From[0].(type) {
  1514. case *AliasedTableExpr:
  1515. switch tableExpr := table.Expr.(type) {
  1516. case TableName:
  1517. tableName = tableExpr.Name.String()
  1518. if tableExpr.Qualifier != nil && tableExpr.Qualifier.String() != "" {
  1519. database = tableExpr.Qualifier.String()
  1520. }
  1521. default:
  1522. err := fmt.Errorf("unsupported table expression: %T", tableExpr)
  1523. return &QueryResult{Error: err}, err
  1524. }
  1525. default:
  1526. err := fmt.Errorf("unsupported FROM clause: %T", table)
  1527. return &QueryResult{Error: err}, err
  1528. }
  1529. // Use current database context if not specified
  1530. if database == "" {
  1531. database = e.catalog.GetCurrentDatabase()
  1532. if database == "" {
  1533. database = "default"
  1534. }
  1535. }
  1536. // Auto-discover and register topic if not already in catalog
  1537. if _, err := e.catalog.GetTableInfo(database, tableName); err != nil {
  1538. // Topic not in catalog, try to discover and register it
  1539. if regErr := e.discoverAndRegisterTopic(ctx, database, tableName); regErr != nil {
  1540. // Return error immediately for non-existent topics instead of falling back to sample data
  1541. return &QueryResult{Error: regErr}, regErr
  1542. }
  1543. }
  1544. // Create HybridMessageScanner for the topic (reads both live logs + Parquet files)
  1545. // Get filerClient from broker connection (works with both real and mock brokers)
  1546. var filerClient filer_pb.FilerClient
  1547. var filerClientErr error
  1548. filerClient, filerClientErr = e.catalog.brokerClient.GetFilerClient()
  1549. if filerClientErr != nil {
  1550. // Return error if filer client is not available for topic access
  1551. return &QueryResult{Error: filerClientErr}, filerClientErr
  1552. }
  1553. hybridScanner, err := NewHybridMessageScanner(filerClient, e.catalog.brokerClient, database, tableName, e)
  1554. if err != nil {
  1555. // Handle quiet topics gracefully: topics exist but have no active schema/brokers
  1556. if IsNoSchemaError(err) {
  1557. // Return empty result for quiet topics (normal in production environments)
  1558. return &QueryResult{
  1559. Columns: []string{},
  1560. Rows: [][]sqltypes.Value{},
  1561. Database: database,
  1562. Table: tableName,
  1563. }, nil
  1564. }
  1565. // Return error for other access issues (truly non-existent topics, etc.)
  1566. topicErr := fmt.Errorf("failed to access topic %s.%s: %v", database, tableName, err)
  1567. return &QueryResult{Error: topicErr}, topicErr
  1568. }
  1569. // Parse SELECT columns and detect aggregation functions
  1570. var columns []string
  1571. var aggregations []AggregationSpec
  1572. selectAll := false
  1573. hasAggregations := false
  1574. _ = hasAggregations // Used later in aggregation routing
  1575. // Track required base columns for arithmetic expressions
  1576. baseColumnsSet := make(map[string]bool)
  1577. for _, selectExpr := range stmt.SelectExprs {
  1578. switch expr := selectExpr.(type) {
  1579. case *StarExpr:
  1580. selectAll = true
  1581. case *AliasedExpr:
  1582. switch col := expr.Expr.(type) {
  1583. case *ColName:
  1584. colName := col.Name.String()
  1585. // Check if this "column" is actually an arithmetic expression with functions
  1586. if arithmeticExpr := e.parseColumnLevelCalculation(colName); arithmeticExpr != nil {
  1587. columns = append(columns, e.getArithmeticExpressionAlias(arithmeticExpr))
  1588. e.extractBaseColumns(arithmeticExpr, baseColumnsSet)
  1589. } else {
  1590. columns = append(columns, colName)
  1591. baseColumnsSet[colName] = true
  1592. }
  1593. case *ArithmeticExpr:
  1594. // Handle arithmetic expressions like id+user_id and string concatenation like name||suffix
  1595. columns = append(columns, e.getArithmeticExpressionAlias(col))
  1596. // Extract base columns needed for this arithmetic expression
  1597. e.extractBaseColumns(col, baseColumnsSet)
  1598. case *SQLVal:
  1599. // Handle string/numeric literals like 'good', 123, etc.
  1600. columns = append(columns, e.getSQLValAlias(col))
  1601. case *FuncExpr:
  1602. // Distinguish between aggregation functions and string functions
  1603. funcName := strings.ToUpper(col.Name.String())
  1604. if e.isAggregationFunction(funcName) {
  1605. // Handle aggregation functions
  1606. aggSpec, err := e.parseAggregationFunction(col, expr)
  1607. if err != nil {
  1608. return &QueryResult{Error: err}, err
  1609. }
  1610. aggregations = append(aggregations, *aggSpec)
  1611. hasAggregations = true
  1612. } else if e.isStringFunction(funcName) {
  1613. // Handle string functions like UPPER, LENGTH, etc.
  1614. columns = append(columns, e.getStringFunctionAlias(col))
  1615. // Extract base columns needed for this string function
  1616. e.extractBaseColumnsFromFunction(col, baseColumnsSet)
  1617. } else if e.isDateTimeFunction(funcName) {
  1618. // Handle datetime functions like CURRENT_DATE, NOW, EXTRACT, DATE_TRUNC
  1619. columns = append(columns, e.getDateTimeFunctionAlias(col))
  1620. // Extract base columns needed for this datetime function
  1621. e.extractBaseColumnsFromFunction(col, baseColumnsSet)
  1622. } else {
  1623. return &QueryResult{Error: fmt.Errorf("unsupported function: %s", funcName)}, fmt.Errorf("unsupported function: %s", funcName)
  1624. }
  1625. default:
  1626. err := fmt.Errorf("unsupported SELECT expression: %T", col)
  1627. return &QueryResult{Error: err}, err
  1628. }
  1629. default:
  1630. err := fmt.Errorf("unsupported SELECT expression: %T", expr)
  1631. return &QueryResult{Error: err}, err
  1632. }
  1633. }
  1634. // If we have aggregations, use aggregation query path
  1635. if hasAggregations {
  1636. return e.executeAggregationQuery(ctx, hybridScanner, aggregations, stmt)
  1637. }
  1638. // Parse WHERE clause for predicate pushdown
  1639. var predicate func(*schema_pb.RecordValue) bool
  1640. if stmt.Where != nil {
  1641. predicate, err = e.buildPredicateWithContext(stmt.Where.Expr, stmt.SelectExprs)
  1642. if err != nil {
  1643. return &QueryResult{Error: err}, err
  1644. }
  1645. }
  1646. // Parse LIMIT and OFFSET clauses
  1647. // Use -1 to distinguish "no LIMIT" from "LIMIT 0"
  1648. limit := -1
  1649. offset := 0
  1650. if stmt.Limit != nil && stmt.Limit.Rowcount != nil {
  1651. switch limitExpr := stmt.Limit.Rowcount.(type) {
  1652. case *SQLVal:
  1653. if limitExpr.Type == IntVal {
  1654. var parseErr error
  1655. limit64, parseErr := strconv.ParseInt(string(limitExpr.Val), 10, 64)
  1656. if parseErr != nil {
  1657. return &QueryResult{Error: parseErr}, parseErr
  1658. }
  1659. if limit64 > math.MaxInt32 || limit64 < 0 {
  1660. return &QueryResult{Error: fmt.Errorf("LIMIT value %d is out of valid range", limit64)}, fmt.Errorf("LIMIT value %d is out of valid range", limit64)
  1661. }
  1662. limit = int(limit64)
  1663. }
  1664. }
  1665. }
  1666. // Parse OFFSET clause if present
  1667. if stmt.Limit != nil && stmt.Limit.Offset != nil {
  1668. switch offsetExpr := stmt.Limit.Offset.(type) {
  1669. case *SQLVal:
  1670. if offsetExpr.Type == IntVal {
  1671. var parseErr error
  1672. offset64, parseErr := strconv.ParseInt(string(offsetExpr.Val), 10, 64)
  1673. if parseErr != nil {
  1674. return &QueryResult{Error: parseErr}, parseErr
  1675. }
  1676. if offset64 > math.MaxInt32 || offset64 < 0 {
  1677. return &QueryResult{Error: fmt.Errorf("OFFSET value %d is out of valid range", offset64)}, fmt.Errorf("OFFSET value %d is out of valid range", offset64)
  1678. }
  1679. offset = int(offset64)
  1680. }
  1681. }
  1682. }
  1683. // Build hybrid scan options
  1684. // Extract time filters from WHERE clause to optimize scanning
  1685. startTimeNs, stopTimeNs := int64(0), int64(0)
  1686. if stmt.Where != nil {
  1687. startTimeNs, stopTimeNs = e.extractTimeFilters(stmt.Where.Expr)
  1688. }
  1689. hybridScanOptions := HybridScanOptions{
  1690. StartTimeNs: startTimeNs, // Extracted from WHERE clause time comparisons
  1691. StopTimeNs: stopTimeNs, // Extracted from WHERE clause time comparisons
  1692. Limit: limit,
  1693. Offset: offset,
  1694. Predicate: predicate,
  1695. }
  1696. if !selectAll {
  1697. // Convert baseColumnsSet to slice for hybrid scan options
  1698. baseColumns := make([]string, 0, len(baseColumnsSet))
  1699. for columnName := range baseColumnsSet {
  1700. baseColumns = append(baseColumns, columnName)
  1701. }
  1702. // Use base columns (not expression aliases) for data retrieval
  1703. if len(baseColumns) > 0 {
  1704. hybridScanOptions.Columns = baseColumns
  1705. } else {
  1706. // If no base columns found (shouldn't happen), use original columns
  1707. hybridScanOptions.Columns = columns
  1708. }
  1709. }
  1710. // Execute the hybrid scan (live logs + Parquet files)
  1711. results, err := hybridScanner.Scan(ctx, hybridScanOptions)
  1712. if err != nil {
  1713. return &QueryResult{Error: err}, err
  1714. }
  1715. // Convert to SQL result format
  1716. if selectAll {
  1717. if len(columns) > 0 {
  1718. // SELECT *, specific_columns - include both auto-discovered and explicit columns
  1719. return hybridScanner.ConvertToSQLResultWithMixedColumns(results, columns), nil
  1720. } else {
  1721. // SELECT * only - let converter determine all columns (excludes system columns)
  1722. columns = nil
  1723. return hybridScanner.ConvertToSQLResult(results, columns), nil
  1724. }
  1725. }
  1726. // Handle custom column expressions (including arithmetic)
  1727. return e.ConvertToSQLResultWithExpressions(hybridScanner, results, stmt.SelectExprs), nil
  1728. }
  1729. // executeSelectStatementWithBrokerStats handles SELECT queries with broker buffer statistics capture
  1730. // This is used by EXPLAIN queries to capture complete data source information including broker memory
  1731. func (e *SQLEngine) executeSelectStatementWithBrokerStats(ctx context.Context, stmt *SelectStatement, plan *QueryExecutionPlan) (*QueryResult, error) {
  1732. // Parse FROM clause to get table (topic) information
  1733. if len(stmt.From) != 1 {
  1734. err := fmt.Errorf("SELECT supports single table queries only")
  1735. return &QueryResult{Error: err}, err
  1736. }
  1737. // Extract table reference
  1738. var database, tableName string
  1739. switch table := stmt.From[0].(type) {
  1740. case *AliasedTableExpr:
  1741. switch tableExpr := table.Expr.(type) {
  1742. case TableName:
  1743. tableName = tableExpr.Name.String()
  1744. if tableExpr.Qualifier != nil && tableExpr.Qualifier.String() != "" {
  1745. database = tableExpr.Qualifier.String()
  1746. }
  1747. default:
  1748. err := fmt.Errorf("unsupported table expression: %T", tableExpr)
  1749. return &QueryResult{Error: err}, err
  1750. }
  1751. default:
  1752. err := fmt.Errorf("unsupported FROM clause: %T", table)
  1753. return &QueryResult{Error: err}, err
  1754. }
  1755. // Use current database context if not specified
  1756. if database == "" {
  1757. database = e.catalog.GetCurrentDatabase()
  1758. if database == "" {
  1759. database = "default"
  1760. }
  1761. }
  1762. // Auto-discover and register topic if not already in catalog
  1763. if _, err := e.catalog.GetTableInfo(database, tableName); err != nil {
  1764. // Topic not in catalog, try to discover and register it
  1765. if regErr := e.discoverAndRegisterTopic(ctx, database, tableName); regErr != nil {
  1766. // Return error immediately for non-existent topics instead of falling back to sample data
  1767. return &QueryResult{Error: regErr}, regErr
  1768. }
  1769. }
  1770. // Create HybridMessageScanner for the topic (reads both live logs + Parquet files)
  1771. // Get filerClient from broker connection (works with both real and mock brokers)
  1772. var filerClient filer_pb.FilerClient
  1773. var filerClientErr error
  1774. filerClient, filerClientErr = e.catalog.brokerClient.GetFilerClient()
  1775. if filerClientErr != nil {
  1776. // Return error if filer client is not available for topic access
  1777. return &QueryResult{Error: filerClientErr}, filerClientErr
  1778. }
  1779. hybridScanner, err := NewHybridMessageScanner(filerClient, e.catalog.brokerClient, database, tableName, e)
  1780. if err != nil {
  1781. // Handle quiet topics gracefully: topics exist but have no active schema/brokers
  1782. if IsNoSchemaError(err) {
  1783. // Return empty result for quiet topics (normal in production environments)
  1784. return &QueryResult{
  1785. Columns: []string{},
  1786. Rows: [][]sqltypes.Value{},
  1787. Database: database,
  1788. Table: tableName,
  1789. }, nil
  1790. }
  1791. // Return error for other access issues (truly non-existent topics, etc.)
  1792. topicErr := fmt.Errorf("failed to access topic %s.%s: %v", database, tableName, err)
  1793. return &QueryResult{Error: topicErr}, topicErr
  1794. }
  1795. // Parse SELECT columns and detect aggregation functions
  1796. var columns []string
  1797. var aggregations []AggregationSpec
  1798. selectAll := false
  1799. hasAggregations := false
  1800. _ = hasAggregations // Used later in aggregation routing
  1801. // Track required base columns for arithmetic expressions
  1802. baseColumnsSet := make(map[string]bool)
  1803. for _, selectExpr := range stmt.SelectExprs {
  1804. switch expr := selectExpr.(type) {
  1805. case *StarExpr:
  1806. selectAll = true
  1807. case *AliasedExpr:
  1808. switch col := expr.Expr.(type) {
  1809. case *ColName:
  1810. colName := col.Name.String()
  1811. columns = append(columns, colName)
  1812. baseColumnsSet[colName] = true
  1813. case *ArithmeticExpr:
  1814. // Handle arithmetic expressions like id+user_id and string concatenation like name||suffix
  1815. columns = append(columns, e.getArithmeticExpressionAlias(col))
  1816. // Extract base columns needed for this arithmetic expression
  1817. e.extractBaseColumns(col, baseColumnsSet)
  1818. case *SQLVal:
  1819. // Handle string/numeric literals like 'good', 123, etc.
  1820. columns = append(columns, e.getSQLValAlias(col))
  1821. case *FuncExpr:
  1822. // Distinguish between aggregation functions and string functions
  1823. funcName := strings.ToUpper(col.Name.String())
  1824. if e.isAggregationFunction(funcName) {
  1825. // Handle aggregation functions
  1826. aggSpec, err := e.parseAggregationFunction(col, expr)
  1827. if err != nil {
  1828. return &QueryResult{Error: err}, err
  1829. }
  1830. aggregations = append(aggregations, *aggSpec)
  1831. hasAggregations = true
  1832. } else if e.isStringFunction(funcName) {
  1833. // Handle string functions like UPPER, LENGTH, etc.
  1834. columns = append(columns, e.getStringFunctionAlias(col))
  1835. // Extract base columns needed for this string function
  1836. e.extractBaseColumnsFromFunction(col, baseColumnsSet)
  1837. } else if e.isDateTimeFunction(funcName) {
  1838. // Handle datetime functions like CURRENT_DATE, NOW, EXTRACT, DATE_TRUNC
  1839. columns = append(columns, e.getDateTimeFunctionAlias(col))
  1840. // Extract base columns needed for this datetime function
  1841. e.extractBaseColumnsFromFunction(col, baseColumnsSet)
  1842. } else {
  1843. return &QueryResult{Error: fmt.Errorf("unsupported function: %s", funcName)}, fmt.Errorf("unsupported function: %s", funcName)
  1844. }
  1845. default:
  1846. err := fmt.Errorf("unsupported SELECT expression: %T", col)
  1847. return &QueryResult{Error: err}, err
  1848. }
  1849. default:
  1850. err := fmt.Errorf("unsupported SELECT expression: %T", expr)
  1851. return &QueryResult{Error: err}, err
  1852. }
  1853. }
  1854. // If we have aggregations, use aggregation query path
  1855. if hasAggregations {
  1856. return e.executeAggregationQuery(ctx, hybridScanner, aggregations, stmt)
  1857. }
  1858. // Parse WHERE clause for predicate pushdown
  1859. var predicate func(*schema_pb.RecordValue) bool
  1860. if stmt.Where != nil {
  1861. predicate, err = e.buildPredicateWithContext(stmt.Where.Expr, stmt.SelectExprs)
  1862. if err != nil {
  1863. return &QueryResult{Error: err}, err
  1864. }
  1865. }
  1866. // Parse LIMIT and OFFSET clauses
  1867. // Use -1 to distinguish "no LIMIT" from "LIMIT 0"
  1868. limit := -1
  1869. offset := 0
  1870. if stmt.Limit != nil && stmt.Limit.Rowcount != nil {
  1871. switch limitExpr := stmt.Limit.Rowcount.(type) {
  1872. case *SQLVal:
  1873. if limitExpr.Type == IntVal {
  1874. var parseErr error
  1875. limit64, parseErr := strconv.ParseInt(string(limitExpr.Val), 10, 64)
  1876. if parseErr != nil {
  1877. return &QueryResult{Error: parseErr}, parseErr
  1878. }
  1879. if limit64 > math.MaxInt32 || limit64 < 0 {
  1880. return &QueryResult{Error: fmt.Errorf("LIMIT value %d is out of valid range", limit64)}, fmt.Errorf("LIMIT value %d is out of valid range", limit64)
  1881. }
  1882. limit = int(limit64)
  1883. }
  1884. }
  1885. }
  1886. // Parse OFFSET clause if present
  1887. if stmt.Limit != nil && stmt.Limit.Offset != nil {
  1888. switch offsetExpr := stmt.Limit.Offset.(type) {
  1889. case *SQLVal:
  1890. if offsetExpr.Type == IntVal {
  1891. var parseErr error
  1892. offset64, parseErr := strconv.ParseInt(string(offsetExpr.Val), 10, 64)
  1893. if parseErr != nil {
  1894. return &QueryResult{Error: parseErr}, parseErr
  1895. }
  1896. if offset64 > math.MaxInt32 || offset64 < 0 {
  1897. return &QueryResult{Error: fmt.Errorf("OFFSET value %d is out of valid range", offset64)}, fmt.Errorf("OFFSET value %d is out of valid range", offset64)
  1898. }
  1899. offset = int(offset64)
  1900. }
  1901. }
  1902. }
  1903. // Build hybrid scan options
  1904. // Extract time filters from WHERE clause to optimize scanning
  1905. startTimeNs, stopTimeNs := int64(0), int64(0)
  1906. if stmt.Where != nil {
  1907. startTimeNs, stopTimeNs = e.extractTimeFilters(stmt.Where.Expr)
  1908. }
  1909. hybridScanOptions := HybridScanOptions{
  1910. StartTimeNs: startTimeNs, // Extracted from WHERE clause time comparisons
  1911. StopTimeNs: stopTimeNs, // Extracted from WHERE clause time comparisons
  1912. Limit: limit,
  1913. Offset: offset,
  1914. Predicate: predicate,
  1915. }
  1916. if !selectAll {
  1917. // Convert baseColumnsSet to slice for hybrid scan options
  1918. baseColumns := make([]string, 0, len(baseColumnsSet))
  1919. for columnName := range baseColumnsSet {
  1920. baseColumns = append(baseColumns, columnName)
  1921. }
  1922. // Use base columns (not expression aliases) for data retrieval
  1923. if len(baseColumns) > 0 {
  1924. hybridScanOptions.Columns = baseColumns
  1925. } else {
  1926. // If no base columns found (shouldn't happen), use original columns
  1927. hybridScanOptions.Columns = columns
  1928. }
  1929. }
  1930. // Execute the hybrid scan with stats capture for EXPLAIN
  1931. var results []HybridScanResult
  1932. if plan != nil {
  1933. // EXPLAIN mode - capture broker buffer stats
  1934. var stats *HybridScanStats
  1935. results, stats, err = hybridScanner.ScanWithStats(ctx, hybridScanOptions)
  1936. if err != nil {
  1937. return &QueryResult{Error: err}, err
  1938. }
  1939. // Populate plan with broker buffer information
  1940. if stats != nil {
  1941. plan.BrokerBufferQueried = stats.BrokerBufferQueried
  1942. plan.BrokerBufferMessages = stats.BrokerBufferMessages
  1943. plan.BufferStartIndex = stats.BufferStartIndex
  1944. // Add broker_buffer to data sources if buffer was queried
  1945. if stats.BrokerBufferQueried {
  1946. // Check if broker_buffer is already in data sources
  1947. hasBrokerBuffer := false
  1948. for _, source := range plan.DataSources {
  1949. if source == "broker_buffer" {
  1950. hasBrokerBuffer = true
  1951. break
  1952. }
  1953. }
  1954. if !hasBrokerBuffer {
  1955. plan.DataSources = append(plan.DataSources, "broker_buffer")
  1956. }
  1957. }
  1958. }
  1959. // Populate execution plan details with source file information for Data Sources Tree
  1960. if partitions, discoverErr := e.discoverTopicPartitions(database, tableName); discoverErr == nil {
  1961. // Add partition paths to execution plan details
  1962. plan.Details["partition_paths"] = partitions
  1963. // Persist time filter details for downstream pruning/diagnostics
  1964. plan.Details[PlanDetailStartTimeNs] = startTimeNs
  1965. plan.Details[PlanDetailStopTimeNs] = stopTimeNs
  1966. if isDebugMode(ctx) {
  1967. fmt.Printf("Debug: Time filters extracted - startTimeNs=%d stopTimeNs=%d\n", startTimeNs, stopTimeNs)
  1968. }
  1969. // Collect actual file information for each partition
  1970. var parquetFiles []string
  1971. var liveLogFiles []string
  1972. parquetSources := make(map[string]bool)
  1973. var parquetReadErrors []string
  1974. var liveLogListErrors []string
  1975. for _, partitionPath := range partitions {
  1976. // Get parquet files for this partition
  1977. if parquetStats, err := hybridScanner.ReadParquetStatistics(partitionPath); err == nil {
  1978. // Prune files by time range with debug logging
  1979. filteredStats := pruneParquetFilesByTime(ctx, parquetStats, hybridScanner, startTimeNs, stopTimeNs)
  1980. // Further prune by column statistics from WHERE clause
  1981. if stmt.Where != nil {
  1982. beforeColumnPrune := len(filteredStats)
  1983. filteredStats = e.pruneParquetFilesByColumnStats(ctx, filteredStats, stmt.Where.Expr)
  1984. columnPrunedCount := beforeColumnPrune - len(filteredStats)
  1985. if columnPrunedCount > 0 {
  1986. if isDebugMode(ctx) {
  1987. fmt.Printf("Debug: Column statistics pruning skipped %d parquet files in %s\n", columnPrunedCount, partitionPath)
  1988. }
  1989. // Track column statistics optimization
  1990. if !contains(plan.OptimizationsUsed, "column_statistics_pruning") {
  1991. plan.OptimizationsUsed = append(plan.OptimizationsUsed, "column_statistics_pruning")
  1992. }
  1993. }
  1994. }
  1995. for _, stats := range filteredStats {
  1996. parquetFiles = append(parquetFiles, fmt.Sprintf("%s/%s", partitionPath, stats.FileName))
  1997. }
  1998. } else {
  1999. parquetReadErrors = append(parquetReadErrors, fmt.Sprintf("%s: %v", partitionPath, err))
  2000. if isDebugMode(ctx) {
  2001. fmt.Printf("Debug: Failed to read parquet statistics in %s: %v\n", partitionPath, err)
  2002. }
  2003. }
  2004. // Merge accurate parquet sources from metadata
  2005. if sources, err := e.getParquetSourceFilesFromMetadata(partitionPath); err == nil {
  2006. for src := range sources {
  2007. parquetSources[src] = true
  2008. }
  2009. }
  2010. // Get live log files for this partition
  2011. if liveFiles, err := e.collectLiveLogFileNames(hybridScanner.filerClient, partitionPath); err == nil {
  2012. for _, fileName := range liveFiles {
  2013. // Exclude live log files that have been converted to parquet (deduplicated)
  2014. if parquetSources[fileName] {
  2015. continue
  2016. }
  2017. liveLogFiles = append(liveLogFiles, fmt.Sprintf("%s/%s", partitionPath, fileName))
  2018. }
  2019. } else {
  2020. liveLogListErrors = append(liveLogListErrors, fmt.Sprintf("%s: %v", partitionPath, err))
  2021. if isDebugMode(ctx) {
  2022. fmt.Printf("Debug: Failed to list live log files in %s: %v\n", partitionPath, err)
  2023. }
  2024. }
  2025. }
  2026. if len(parquetFiles) > 0 {
  2027. plan.Details["parquet_files"] = parquetFiles
  2028. }
  2029. if len(liveLogFiles) > 0 {
  2030. plan.Details["live_log_files"] = liveLogFiles
  2031. }
  2032. if len(parquetReadErrors) > 0 {
  2033. plan.Details["error_parquet_statistics"] = parquetReadErrors
  2034. }
  2035. if len(liveLogListErrors) > 0 {
  2036. plan.Details["error_live_log_listing"] = liveLogListErrors
  2037. }
  2038. // Update scan statistics for execution plan display
  2039. plan.PartitionsScanned = len(partitions)
  2040. plan.ParquetFilesScanned = len(parquetFiles)
  2041. plan.LiveLogFilesScanned = len(liveLogFiles)
  2042. } else {
  2043. // Handle partition discovery error
  2044. plan.Details["error_partition_discovery"] = discoverErr.Error()
  2045. }
  2046. } else {
  2047. // Normal mode - just get results
  2048. results, err = hybridScanner.Scan(ctx, hybridScanOptions)
  2049. if err != nil {
  2050. return &QueryResult{Error: err}, err
  2051. }
  2052. }
  2053. // Convert to SQL result format
  2054. if selectAll {
  2055. if len(columns) > 0 {
  2056. // SELECT *, specific_columns - include both auto-discovered and explicit columns
  2057. return hybridScanner.ConvertToSQLResultWithMixedColumns(results, columns), nil
  2058. } else {
  2059. // SELECT * only - let converter determine all columns (excludes system columns)
  2060. columns = nil
  2061. return hybridScanner.ConvertToSQLResult(results, columns), nil
  2062. }
  2063. }
  2064. // Handle custom column expressions (including arithmetic)
  2065. return e.ConvertToSQLResultWithExpressions(hybridScanner, results, stmt.SelectExprs), nil
  2066. }
  2067. // extractTimeFilters extracts time range filters from WHERE clause for optimization
  2068. // This allows push-down of time-based queries to improve scan performance
  2069. // Returns (startTimeNs, stopTimeNs) where 0 means unbounded
  2070. func (e *SQLEngine) extractTimeFilters(expr ExprNode) (int64, int64) {
  2071. startTimeNs, stopTimeNs := int64(0), int64(0)
  2072. // Recursively extract time filters from expression tree
  2073. e.extractTimeFiltersRecursive(expr, &startTimeNs, &stopTimeNs)
  2074. // Special case: if startTimeNs == stopTimeNs, treat it like an equality query
  2075. // to avoid premature scan termination. The predicate will handle exact matching.
  2076. if startTimeNs != 0 && startTimeNs == stopTimeNs {
  2077. stopTimeNs = 0
  2078. }
  2079. return startTimeNs, stopTimeNs
  2080. }
  2081. // extractTimeFiltersWithValidation extracts time filters and validates that WHERE clause contains only time-based predicates
  2082. // Returns (startTimeNs, stopTimeNs, onlyTimePredicates) where onlyTimePredicates indicates if fast path is safe
  2083. func (e *SQLEngine) extractTimeFiltersWithValidation(expr ExprNode) (int64, int64, bool) {
  2084. startTimeNs, stopTimeNs := int64(0), int64(0)
  2085. onlyTimePredicates := true
  2086. // Recursively extract time filters and validate predicates
  2087. e.extractTimeFiltersWithValidationRecursive(expr, &startTimeNs, &stopTimeNs, &onlyTimePredicates)
  2088. // Special case: if startTimeNs == stopTimeNs, treat it like an equality query
  2089. if startTimeNs != 0 && startTimeNs == stopTimeNs {
  2090. stopTimeNs = 0
  2091. }
  2092. return startTimeNs, stopTimeNs, onlyTimePredicates
  2093. }
  2094. // extractTimeFiltersRecursive recursively processes WHERE expressions to find time comparisons
  2095. func (e *SQLEngine) extractTimeFiltersRecursive(expr ExprNode, startTimeNs, stopTimeNs *int64) {
  2096. switch exprType := expr.(type) {
  2097. case *ComparisonExpr:
  2098. e.extractTimeFromComparison(exprType, startTimeNs, stopTimeNs)
  2099. case *AndExpr:
  2100. // For AND expressions, combine time filters (intersection)
  2101. e.extractTimeFiltersRecursive(exprType.Left, startTimeNs, stopTimeNs)
  2102. e.extractTimeFiltersRecursive(exprType.Right, startTimeNs, stopTimeNs)
  2103. case *OrExpr:
  2104. // For OR expressions, we can't easily optimize time ranges
  2105. // Skip time filter extraction for OR clauses to avoid incorrect results
  2106. return
  2107. case *ParenExpr:
  2108. // Unwrap parentheses and continue
  2109. e.extractTimeFiltersRecursive(exprType.Expr, startTimeNs, stopTimeNs)
  2110. }
  2111. }
  2112. // extractTimeFiltersWithValidationRecursive recursively processes WHERE expressions to find time comparisons and validate predicates
  2113. func (e *SQLEngine) extractTimeFiltersWithValidationRecursive(expr ExprNode, startTimeNs, stopTimeNs *int64, onlyTimePredicates *bool) {
  2114. switch exprType := expr.(type) {
  2115. case *ComparisonExpr:
  2116. // Check if this is a time-based comparison
  2117. leftCol := e.getColumnName(exprType.Left)
  2118. rightCol := e.getColumnName(exprType.Right)
  2119. isTimeComparison := e.isTimestampColumn(leftCol) || e.isTimestampColumn(rightCol)
  2120. if isTimeComparison {
  2121. // Extract time filter from this comparison
  2122. e.extractTimeFromComparison(exprType, startTimeNs, stopTimeNs)
  2123. } else {
  2124. // Non-time predicate found - fast path is not safe
  2125. *onlyTimePredicates = false
  2126. }
  2127. case *AndExpr:
  2128. // For AND expressions, both sides must be time-only for fast path to be safe
  2129. e.extractTimeFiltersWithValidationRecursive(exprType.Left, startTimeNs, stopTimeNs, onlyTimePredicates)
  2130. e.extractTimeFiltersWithValidationRecursive(exprType.Right, startTimeNs, stopTimeNs, onlyTimePredicates)
  2131. case *OrExpr:
  2132. // OR expressions are complex and not supported in fast path
  2133. *onlyTimePredicates = false
  2134. return
  2135. case *ParenExpr:
  2136. // Unwrap parentheses and continue
  2137. e.extractTimeFiltersWithValidationRecursive(exprType.Expr, startTimeNs, stopTimeNs, onlyTimePredicates)
  2138. default:
  2139. // Unknown expression type - not safe for fast path
  2140. *onlyTimePredicates = false
  2141. }
  2142. }
  2143. // extractTimeFromComparison extracts time bounds from comparison expressions
  2144. // Handles comparisons against timestamp columns (system columns and schema-defined timestamp types)
  2145. func (e *SQLEngine) extractTimeFromComparison(comp *ComparisonExpr, startTimeNs, stopTimeNs *int64) {
  2146. // Check if this is a time-related column comparison
  2147. leftCol := e.getColumnName(comp.Left)
  2148. rightCol := e.getColumnName(comp.Right)
  2149. var valueExpr ExprNode
  2150. var reversed bool
  2151. // Determine which side is the time column (using schema types)
  2152. if e.isTimestampColumn(leftCol) {
  2153. valueExpr = comp.Right
  2154. reversed = false
  2155. } else if e.isTimestampColumn(rightCol) {
  2156. valueExpr = comp.Left
  2157. reversed = true
  2158. } else {
  2159. // Not a time comparison
  2160. return
  2161. }
  2162. // Extract the time value
  2163. timeValue := e.extractTimeValue(valueExpr)
  2164. if timeValue == 0 {
  2165. // Couldn't parse time value
  2166. return
  2167. }
  2168. // Apply the comparison operator to determine time bounds
  2169. operator := comp.Operator
  2170. if reversed {
  2171. // Reverse the operator if column and value are swapped
  2172. operator = e.reverseOperator(operator)
  2173. }
  2174. switch operator {
  2175. case GreaterThanStr: // timestamp > value
  2176. if *startTimeNs == 0 || timeValue > *startTimeNs {
  2177. *startTimeNs = timeValue
  2178. }
  2179. case GreaterEqualStr: // timestamp >= value
  2180. if *startTimeNs == 0 || timeValue >= *startTimeNs {
  2181. *startTimeNs = timeValue
  2182. }
  2183. case LessThanStr: // timestamp < value
  2184. if *stopTimeNs == 0 || timeValue < *stopTimeNs {
  2185. *stopTimeNs = timeValue
  2186. }
  2187. case LessEqualStr: // timestamp <= value
  2188. if *stopTimeNs == 0 || timeValue <= *stopTimeNs {
  2189. *stopTimeNs = timeValue
  2190. }
  2191. case EqualStr: // timestamp = value (point query)
  2192. // For exact matches, we set startTimeNs slightly before the target
  2193. // This works around a scan boundary bug where >= X starts after X instead of at X
  2194. // The predicate function will handle exact matching
  2195. *startTimeNs = timeValue - 1
  2196. // Do NOT set stopTimeNs - let the predicate handle exact matching
  2197. }
  2198. }
  2199. // isTimestampColumn checks if a column is a timestamp using schema type information
  2200. func (e *SQLEngine) isTimestampColumn(columnName string) bool {
  2201. if columnName == "" {
  2202. return false
  2203. }
  2204. // System timestamp columns are always time columns
  2205. if columnName == SW_COLUMN_NAME_TIMESTAMP || columnName == SW_DISPLAY_NAME_TIMESTAMP {
  2206. return true
  2207. }
  2208. // For user-defined columns, check actual schema type information
  2209. if e.catalog != nil {
  2210. currentDB := e.catalog.GetCurrentDatabase()
  2211. if currentDB == "" {
  2212. currentDB = "default"
  2213. }
  2214. // Get current table context from query execution
  2215. // Note: This is a limitation - we need table context here
  2216. // In a full implementation, this would be passed from the query context
  2217. tableInfo, err := e.getCurrentTableInfo(currentDB)
  2218. if err == nil && tableInfo != nil {
  2219. for _, col := range tableInfo.Columns {
  2220. if strings.EqualFold(col.Name, columnName) {
  2221. // Use actual SQL type to determine if this is a timestamp
  2222. return e.isSQLTypeTimestamp(col.Type)
  2223. }
  2224. }
  2225. }
  2226. }
  2227. // Only return true if we have explicit type information
  2228. // No guessing based on column names
  2229. return false
  2230. }
  2231. // getTimeFiltersFromPlan extracts time filter values from execution plan details
  2232. func getTimeFiltersFromPlan(plan *QueryExecutionPlan) (startTimeNs, stopTimeNs int64) {
  2233. if plan == nil || plan.Details == nil {
  2234. return 0, 0
  2235. }
  2236. if startNsVal, ok := plan.Details[PlanDetailStartTimeNs]; ok {
  2237. if startNs, ok2 := startNsVal.(int64); ok2 {
  2238. startTimeNs = startNs
  2239. }
  2240. }
  2241. if stopNsVal, ok := plan.Details[PlanDetailStopTimeNs]; ok {
  2242. if stopNs, ok2 := stopNsVal.(int64); ok2 {
  2243. stopTimeNs = stopNs
  2244. }
  2245. }
  2246. return
  2247. }
  2248. // pruneParquetFilesByTime filters parquet files based on timestamp ranges, with optional debug logging
  2249. func pruneParquetFilesByTime(ctx context.Context, parquetStats []*ParquetFileStats, hybridScanner *HybridMessageScanner, startTimeNs, stopTimeNs int64) []*ParquetFileStats {
  2250. if startTimeNs == 0 && stopTimeNs == 0 {
  2251. return parquetStats
  2252. }
  2253. debugEnabled := ctx != nil && isDebugMode(ctx)
  2254. qStart := startTimeNs
  2255. qStop := stopTimeNs
  2256. if qStop == 0 {
  2257. qStop = math.MaxInt64
  2258. }
  2259. n := 0
  2260. for _, fs := range parquetStats {
  2261. if debugEnabled {
  2262. fmt.Printf("Debug: Checking parquet file %s for pruning\n", fs.FileName)
  2263. }
  2264. if minNs, maxNs, ok := hybridScanner.getTimestampRangeFromStats(fs); ok {
  2265. if debugEnabled {
  2266. fmt.Printf("Debug: Prune check parquet %s min=%d max=%d qStart=%d qStop=%d\n", fs.FileName, minNs, maxNs, qStart, qStop)
  2267. }
  2268. if qStop < minNs || (qStart != 0 && qStart > maxNs) {
  2269. if debugEnabled {
  2270. fmt.Printf("Debug: Skipping parquet file %s due to no time overlap\n", fs.FileName)
  2271. }
  2272. continue
  2273. }
  2274. } else if debugEnabled {
  2275. fmt.Printf("Debug: No stats range available for parquet %s, cannot prune\n", fs.FileName)
  2276. }
  2277. parquetStats[n] = fs
  2278. n++
  2279. }
  2280. return parquetStats[:n]
  2281. }
  2282. // pruneParquetFilesByColumnStats filters parquet files based on column statistics and WHERE predicates
  2283. func (e *SQLEngine) pruneParquetFilesByColumnStats(ctx context.Context, parquetStats []*ParquetFileStats, whereExpr ExprNode) []*ParquetFileStats {
  2284. if whereExpr == nil {
  2285. return parquetStats
  2286. }
  2287. debugEnabled := ctx != nil && isDebugMode(ctx)
  2288. n := 0
  2289. for _, fs := range parquetStats {
  2290. if e.canSkipParquetFile(ctx, fs, whereExpr) {
  2291. if debugEnabled {
  2292. fmt.Printf("Debug: Skipping parquet file %s due to column statistics pruning\n", fs.FileName)
  2293. }
  2294. continue
  2295. }
  2296. parquetStats[n] = fs
  2297. n++
  2298. }
  2299. return parquetStats[:n]
  2300. }
  2301. // canSkipParquetFile determines if a parquet file can be skipped based on column statistics
  2302. func (e *SQLEngine) canSkipParquetFile(ctx context.Context, fileStats *ParquetFileStats, whereExpr ExprNode) bool {
  2303. switch expr := whereExpr.(type) {
  2304. case *ComparisonExpr:
  2305. return e.canSkipFileByComparison(ctx, fileStats, expr)
  2306. case *AndExpr:
  2307. // For AND: skip if ANY condition allows skipping (more aggressive pruning)
  2308. return e.canSkipParquetFile(ctx, fileStats, expr.Left) || e.canSkipParquetFile(ctx, fileStats, expr.Right)
  2309. case *OrExpr:
  2310. // For OR: skip only if ALL conditions allow skipping (conservative)
  2311. return e.canSkipParquetFile(ctx, fileStats, expr.Left) && e.canSkipParquetFile(ctx, fileStats, expr.Right)
  2312. default:
  2313. // Unknown expression type - don't skip
  2314. return false
  2315. }
  2316. }
  2317. // canSkipFileByComparison checks if a file can be skipped based on a comparison predicate
  2318. func (e *SQLEngine) canSkipFileByComparison(ctx context.Context, fileStats *ParquetFileStats, expr *ComparisonExpr) bool {
  2319. // Extract column name and comparison value
  2320. var columnName string
  2321. var compareSchemaValue *schema_pb.Value
  2322. var operator string = expr.Operator
  2323. // Determine which side is the column and which is the value
  2324. if colRef, ok := expr.Left.(*ColName); ok {
  2325. columnName = colRef.Name.String()
  2326. if sqlVal, ok := expr.Right.(*SQLVal); ok {
  2327. compareSchemaValue = e.convertSQLValToSchemaValue(sqlVal)
  2328. } else {
  2329. return false // Can't optimize complex expressions
  2330. }
  2331. } else if colRef, ok := expr.Right.(*ColName); ok {
  2332. columnName = colRef.Name.String()
  2333. if sqlVal, ok := expr.Left.(*SQLVal); ok {
  2334. compareSchemaValue = e.convertSQLValToSchemaValue(sqlVal)
  2335. // Flip operator for reversed comparison
  2336. operator = e.flipOperator(operator)
  2337. } else {
  2338. return false
  2339. }
  2340. } else {
  2341. return false // No column reference found
  2342. }
  2343. // Validate comparison value
  2344. if compareSchemaValue == nil {
  2345. return false
  2346. }
  2347. // Get column statistics
  2348. colStats, exists := fileStats.ColumnStats[columnName]
  2349. if !exists || colStats == nil {
  2350. // Try case-insensitive lookup
  2351. for colName, stats := range fileStats.ColumnStats {
  2352. if strings.EqualFold(colName, columnName) {
  2353. colStats = stats
  2354. exists = true
  2355. break
  2356. }
  2357. }
  2358. }
  2359. if !exists || colStats == nil || colStats.MinValue == nil || colStats.MaxValue == nil {
  2360. return false // No statistics available
  2361. }
  2362. // Apply pruning logic based on operator
  2363. switch operator {
  2364. case ">":
  2365. // Skip if max(column) <= compareValue
  2366. return e.compareValues(colStats.MaxValue, compareSchemaValue) <= 0
  2367. case ">=":
  2368. // Skip if max(column) < compareValue
  2369. return e.compareValues(colStats.MaxValue, compareSchemaValue) < 0
  2370. case "<":
  2371. // Skip if min(column) >= compareValue
  2372. return e.compareValues(colStats.MinValue, compareSchemaValue) >= 0
  2373. case "<=":
  2374. // Skip if min(column) > compareValue
  2375. return e.compareValues(colStats.MinValue, compareSchemaValue) > 0
  2376. case "=":
  2377. // Skip if compareValue is outside [min, max] range
  2378. return e.compareValues(compareSchemaValue, colStats.MinValue) < 0 ||
  2379. e.compareValues(compareSchemaValue, colStats.MaxValue) > 0
  2380. case "!=", "<>":
  2381. // Skip if min == max == compareValue (all values are the same and equal to compareValue)
  2382. return e.compareValues(colStats.MinValue, colStats.MaxValue) == 0 &&
  2383. e.compareValues(colStats.MinValue, compareSchemaValue) == 0
  2384. default:
  2385. return false // Unknown operator
  2386. }
  2387. }
  2388. // flipOperator flips comparison operators when operands are swapped
  2389. func (e *SQLEngine) flipOperator(op string) string {
  2390. switch op {
  2391. case ">":
  2392. return "<"
  2393. case ">=":
  2394. return "<="
  2395. case "<":
  2396. return ">"
  2397. case "<=":
  2398. return ">="
  2399. case "=", "!=", "<>":
  2400. return op // These are symmetric
  2401. default:
  2402. return op
  2403. }
  2404. }
  2405. // populatePlanFileDetails populates execution plan with detailed file information for partitions
  2406. // Includes column statistics pruning optimization when WHERE clause is provided
  2407. func (e *SQLEngine) populatePlanFileDetails(ctx context.Context, plan *QueryExecutionPlan, hybridScanner *HybridMessageScanner, partitions []string, stmt *SelectStatement) {
  2408. debugEnabled := ctx != nil && isDebugMode(ctx)
  2409. // Collect actual file information for each partition
  2410. var parquetFiles []string
  2411. var liveLogFiles []string
  2412. parquetSources := make(map[string]bool)
  2413. var parquetReadErrors []string
  2414. var liveLogListErrors []string
  2415. // Extract time filters from plan details
  2416. startTimeNs, stopTimeNs := getTimeFiltersFromPlan(plan)
  2417. for _, partitionPath := range partitions {
  2418. // Get parquet files for this partition
  2419. if parquetStats, err := hybridScanner.ReadParquetStatistics(partitionPath); err == nil {
  2420. // Prune files by time range
  2421. filteredStats := pruneParquetFilesByTime(ctx, parquetStats, hybridScanner, startTimeNs, stopTimeNs)
  2422. // Further prune by column statistics from WHERE clause
  2423. if stmt != nil && stmt.Where != nil {
  2424. beforeColumnPrune := len(filteredStats)
  2425. filteredStats = e.pruneParquetFilesByColumnStats(ctx, filteredStats, stmt.Where.Expr)
  2426. columnPrunedCount := beforeColumnPrune - len(filteredStats)
  2427. if columnPrunedCount > 0 {
  2428. if debugEnabled {
  2429. fmt.Printf("Debug: Column statistics pruning skipped %d parquet files in %s\n", columnPrunedCount, partitionPath)
  2430. }
  2431. // Track column statistics optimization
  2432. if !contains(plan.OptimizationsUsed, "column_statistics_pruning") {
  2433. plan.OptimizationsUsed = append(plan.OptimizationsUsed, "column_statistics_pruning")
  2434. }
  2435. }
  2436. }
  2437. for _, stats := range filteredStats {
  2438. parquetFiles = append(parquetFiles, fmt.Sprintf("%s/%s", partitionPath, stats.FileName))
  2439. }
  2440. } else {
  2441. parquetReadErrors = append(parquetReadErrors, fmt.Sprintf("%s: %v", partitionPath, err))
  2442. if debugEnabled {
  2443. fmt.Printf("Debug: Failed to read parquet statistics in %s: %v\n", partitionPath, err)
  2444. }
  2445. }
  2446. // Merge accurate parquet sources from metadata
  2447. if sources, err := e.getParquetSourceFilesFromMetadata(partitionPath); err == nil {
  2448. for src := range sources {
  2449. parquetSources[src] = true
  2450. }
  2451. }
  2452. // Get live log files for this partition
  2453. if liveFiles, err := e.collectLiveLogFileNames(hybridScanner.filerClient, partitionPath); err == nil {
  2454. for _, fileName := range liveFiles {
  2455. // Exclude live log files that have been converted to parquet (deduplicated)
  2456. if parquetSources[fileName] {
  2457. continue
  2458. }
  2459. liveLogFiles = append(liveLogFiles, fmt.Sprintf("%s/%s", partitionPath, fileName))
  2460. }
  2461. } else {
  2462. liveLogListErrors = append(liveLogListErrors, fmt.Sprintf("%s: %v", partitionPath, err))
  2463. if debugEnabled {
  2464. fmt.Printf("Debug: Failed to list live log files in %s: %v\n", partitionPath, err)
  2465. }
  2466. }
  2467. }
  2468. // Add file lists to plan details
  2469. if len(parquetFiles) > 0 {
  2470. plan.Details["parquet_files"] = parquetFiles
  2471. }
  2472. if len(liveLogFiles) > 0 {
  2473. plan.Details["live_log_files"] = liveLogFiles
  2474. }
  2475. if len(parquetReadErrors) > 0 {
  2476. plan.Details["error_parquet_statistics"] = parquetReadErrors
  2477. }
  2478. if len(liveLogListErrors) > 0 {
  2479. plan.Details["error_live_log_listing"] = liveLogListErrors
  2480. }
  2481. }
  2482. // isSQLTypeTimestamp checks if a SQL type string represents a timestamp type
  2483. func (e *SQLEngine) isSQLTypeTimestamp(sqlType string) bool {
  2484. upperType := strings.ToUpper(strings.TrimSpace(sqlType))
  2485. // Handle type with precision/length specifications
  2486. if idx := strings.Index(upperType, "("); idx != -1 {
  2487. upperType = upperType[:idx]
  2488. }
  2489. switch upperType {
  2490. case "TIMESTAMP", "DATETIME":
  2491. return true
  2492. case "BIGINT":
  2493. // BIGINT could be a timestamp if it follows the pattern for timestamp storage
  2494. // This is a heuristic - in a better system, we'd have semantic type information
  2495. return false // Conservative approach - require explicit TIMESTAMP type
  2496. default:
  2497. return false
  2498. }
  2499. }
  2500. // getCurrentTableInfo attempts to get table info for the current query context
  2501. // This is a simplified implementation - ideally table context would be passed explicitly
  2502. func (e *SQLEngine) getCurrentTableInfo(database string) (*TableInfo, error) {
  2503. // This is a limitation of the current architecture
  2504. // In practice, we'd need the table context from the current query
  2505. // For now, return nil to fallback to naming conventions
  2506. // TODO: Enhance architecture to pass table context through query execution
  2507. return nil, fmt.Errorf("table context not available in current architecture")
  2508. }
  2509. // getColumnName extracts column name from expression (handles ColName types)
  2510. func (e *SQLEngine) getColumnName(expr ExprNode) string {
  2511. switch exprType := expr.(type) {
  2512. case *ColName:
  2513. return exprType.Name.String()
  2514. }
  2515. return ""
  2516. }
  2517. // resolveColumnAlias tries to resolve a column name that might be an alias
  2518. func (e *SQLEngine) resolveColumnAlias(columnName string, selectExprs []SelectExpr) string {
  2519. if selectExprs == nil {
  2520. return columnName
  2521. }
  2522. // Check if this column name is actually an alias in the SELECT list
  2523. for _, selectExpr := range selectExprs {
  2524. if aliasedExpr, ok := selectExpr.(*AliasedExpr); ok && aliasedExpr != nil {
  2525. // Check if the alias matches our column name
  2526. if aliasedExpr.As != nil && !aliasedExpr.As.IsEmpty() && aliasedExpr.As.String() == columnName {
  2527. // If the aliased expression is a column, return the actual column name
  2528. if colExpr, ok := aliasedExpr.Expr.(*ColName); ok && colExpr != nil {
  2529. return colExpr.Name.String()
  2530. }
  2531. }
  2532. }
  2533. }
  2534. // If no alias found, return the original column name
  2535. return columnName
  2536. }
  2537. // extractTimeValue parses time values from SQL expressions
  2538. // Supports nanosecond timestamps, ISO dates, and relative times
  2539. func (e *SQLEngine) extractTimeValue(expr ExprNode) int64 {
  2540. switch exprType := expr.(type) {
  2541. case *SQLVal:
  2542. switch exprType.Type {
  2543. case IntVal:
  2544. // Parse as nanosecond timestamp
  2545. if val, err := strconv.ParseInt(string(exprType.Val), 10, 64); err == nil {
  2546. return val
  2547. }
  2548. case StrVal:
  2549. // Parse as ISO date or other string formats
  2550. timeStr := string(exprType.Val)
  2551. // Try parsing as RFC3339 (ISO 8601)
  2552. if t, err := time.Parse(time.RFC3339, timeStr); err == nil {
  2553. return t.UnixNano()
  2554. }
  2555. // Try parsing as RFC3339 with nanoseconds
  2556. if t, err := time.Parse(time.RFC3339Nano, timeStr); err == nil {
  2557. return t.UnixNano()
  2558. }
  2559. // Try parsing as date only (YYYY-MM-DD)
  2560. if t, err := time.Parse("2006-01-02", timeStr); err == nil {
  2561. return t.UnixNano()
  2562. }
  2563. // Try parsing as datetime (YYYY-MM-DD HH:MM:SS)
  2564. if t, err := time.Parse("2006-01-02 15:04:05", timeStr); err == nil {
  2565. return t.UnixNano()
  2566. }
  2567. }
  2568. }
  2569. return 0 // Couldn't parse
  2570. }
  2571. // reverseOperator reverses comparison operators when column and value are swapped
  2572. func (e *SQLEngine) reverseOperator(op string) string {
  2573. switch op {
  2574. case GreaterThanStr:
  2575. return LessThanStr
  2576. case GreaterEqualStr:
  2577. return LessEqualStr
  2578. case LessThanStr:
  2579. return GreaterThanStr
  2580. case LessEqualStr:
  2581. return GreaterEqualStr
  2582. case EqualStr:
  2583. return EqualStr
  2584. case NotEqualStr:
  2585. return NotEqualStr
  2586. default:
  2587. return op
  2588. }
  2589. }
  2590. // buildPredicate creates a predicate function from a WHERE clause expression
  2591. // This is a simplified implementation - a full implementation would be much more complex
  2592. func (e *SQLEngine) buildPredicate(expr ExprNode) (func(*schema_pb.RecordValue) bool, error) {
  2593. return e.buildPredicateWithContext(expr, nil)
  2594. }
  2595. // buildPredicateWithContext creates a predicate function with SELECT context for alias resolution
  2596. func (e *SQLEngine) buildPredicateWithContext(expr ExprNode, selectExprs []SelectExpr) (func(*schema_pb.RecordValue) bool, error) {
  2597. switch exprType := expr.(type) {
  2598. case *ComparisonExpr:
  2599. return e.buildComparisonPredicateWithContext(exprType, selectExprs)
  2600. case *BetweenExpr:
  2601. return e.buildBetweenPredicateWithContext(exprType, selectExprs)
  2602. case *IsNullExpr:
  2603. return e.buildIsNullPredicateWithContext(exprType, selectExprs)
  2604. case *IsNotNullExpr:
  2605. return e.buildIsNotNullPredicateWithContext(exprType, selectExprs)
  2606. case *AndExpr:
  2607. leftPred, err := e.buildPredicateWithContext(exprType.Left, selectExprs)
  2608. if err != nil {
  2609. return nil, err
  2610. }
  2611. rightPred, err := e.buildPredicateWithContext(exprType.Right, selectExprs)
  2612. if err != nil {
  2613. return nil, err
  2614. }
  2615. return func(record *schema_pb.RecordValue) bool {
  2616. return leftPred(record) && rightPred(record)
  2617. }, nil
  2618. case *OrExpr:
  2619. leftPred, err := e.buildPredicateWithContext(exprType.Left, selectExprs)
  2620. if err != nil {
  2621. return nil, err
  2622. }
  2623. rightPred, err := e.buildPredicateWithContext(exprType.Right, selectExprs)
  2624. if err != nil {
  2625. return nil, err
  2626. }
  2627. return func(record *schema_pb.RecordValue) bool {
  2628. return leftPred(record) || rightPred(record)
  2629. }, nil
  2630. default:
  2631. return nil, fmt.Errorf("unsupported WHERE expression: %T", expr)
  2632. }
  2633. }
  2634. // buildComparisonPredicateWithContext creates a predicate for comparison operations with alias support
  2635. func (e *SQLEngine) buildComparisonPredicateWithContext(expr *ComparisonExpr, selectExprs []SelectExpr) (func(*schema_pb.RecordValue) bool, error) {
  2636. var columnName string
  2637. var compareValue interface{}
  2638. var operator string
  2639. // Check if column is on the left side (normal case: column > value)
  2640. if colName, ok := expr.Left.(*ColName); ok {
  2641. rawColumnName := colName.Name.String()
  2642. // Resolve potential alias to actual column name
  2643. columnName = e.resolveColumnAlias(rawColumnName, selectExprs)
  2644. // Map display names to internal names for system columns
  2645. columnName = e.getSystemColumnInternalName(columnName)
  2646. operator = expr.Operator
  2647. // Extract comparison value from right side
  2648. val, err := e.extractComparisonValue(expr.Right)
  2649. if err != nil {
  2650. return nil, fmt.Errorf("failed to extract right-side value: %v", err)
  2651. }
  2652. compareValue = e.convertValueForTimestampColumn(columnName, val, expr.Right)
  2653. } else if colName, ok := expr.Right.(*ColName); ok {
  2654. // Column is on the right side (reversed case: value < column)
  2655. rawColumnName := colName.Name.String()
  2656. // Resolve potential alias to actual column name
  2657. columnName = e.resolveColumnAlias(rawColumnName, selectExprs)
  2658. // Map display names to internal names for system columns
  2659. columnName = e.getSystemColumnInternalName(columnName)
  2660. // Reverse the operator when column is on right side
  2661. operator = e.reverseOperator(expr.Operator)
  2662. // Extract comparison value from left side
  2663. val, err := e.extractComparisonValue(expr.Left)
  2664. if err != nil {
  2665. return nil, fmt.Errorf("failed to extract left-side value: %v", err)
  2666. }
  2667. compareValue = e.convertValueForTimestampColumn(columnName, val, expr.Left)
  2668. } else {
  2669. // Handle literal-only comparisons like 1 = 0, 'a' = 'b', etc.
  2670. leftVal, leftErr := e.extractComparisonValue(expr.Left)
  2671. rightVal, rightErr := e.extractComparisonValue(expr.Right)
  2672. if leftErr != nil || rightErr != nil {
  2673. return nil, fmt.Errorf("no column name found in comparison expression, left: %T, right: %T", expr.Left, expr.Right)
  2674. }
  2675. // Evaluate the literal comparison once
  2676. result := e.compareLiteralValues(leftVal, rightVal, expr.Operator)
  2677. // Return a constant predicate
  2678. return func(record *schema_pb.RecordValue) bool {
  2679. return result
  2680. }, nil
  2681. }
  2682. // Return the predicate function
  2683. return func(record *schema_pb.RecordValue) bool {
  2684. fieldValue, exists := record.Fields[columnName]
  2685. if !exists {
  2686. return false // Column doesn't exist in record
  2687. }
  2688. // Use the comparison evaluation function
  2689. return e.evaluateComparison(fieldValue, operator, compareValue)
  2690. }, nil
  2691. }
  2692. // buildBetweenPredicateWithContext creates a predicate for BETWEEN operations
  2693. func (e *SQLEngine) buildBetweenPredicateWithContext(expr *BetweenExpr, selectExprs []SelectExpr) (func(*schema_pb.RecordValue) bool, error) {
  2694. var columnName string
  2695. var fromValue, toValue interface{}
  2696. // Check if left side is a column name
  2697. if colName, ok := expr.Left.(*ColName); ok {
  2698. rawColumnName := colName.Name.String()
  2699. // Resolve potential alias to actual column name
  2700. columnName = e.resolveColumnAlias(rawColumnName, selectExprs)
  2701. // Map display names to internal names for system columns
  2702. columnName = e.getSystemColumnInternalName(columnName)
  2703. // Extract FROM value
  2704. fromVal, err := e.extractComparisonValue(expr.From)
  2705. if err != nil {
  2706. return nil, fmt.Errorf("failed to extract BETWEEN from value: %v", err)
  2707. }
  2708. fromValue = e.convertValueForTimestampColumn(columnName, fromVal, expr.From)
  2709. // Extract TO value
  2710. toVal, err := e.extractComparisonValue(expr.To)
  2711. if err != nil {
  2712. return nil, fmt.Errorf("failed to extract BETWEEN to value: %v", err)
  2713. }
  2714. toValue = e.convertValueForTimestampColumn(columnName, toVal, expr.To)
  2715. } else {
  2716. return nil, fmt.Errorf("BETWEEN left operand must be a column name, got: %T", expr.Left)
  2717. }
  2718. // Return the predicate function
  2719. return func(record *schema_pb.RecordValue) bool {
  2720. fieldValue, exists := record.Fields[columnName]
  2721. if !exists {
  2722. return false
  2723. }
  2724. // Evaluate: fieldValue >= fromValue AND fieldValue <= toValue
  2725. greaterThanOrEqualFrom := e.evaluateComparison(fieldValue, ">=", fromValue)
  2726. lessThanOrEqualTo := e.evaluateComparison(fieldValue, "<=", toValue)
  2727. result := greaterThanOrEqualFrom && lessThanOrEqualTo
  2728. // Handle NOT BETWEEN
  2729. if expr.Not {
  2730. result = !result
  2731. }
  2732. return result
  2733. }, nil
  2734. }
  2735. // buildIsNullPredicateWithContext creates a predicate for IS NULL operations
  2736. func (e *SQLEngine) buildIsNullPredicateWithContext(expr *IsNullExpr, selectExprs []SelectExpr) (func(*schema_pb.RecordValue) bool, error) {
  2737. // Check if the expression is a column name
  2738. if colName, ok := expr.Expr.(*ColName); ok {
  2739. rawColumnName := colName.Name.String()
  2740. // Resolve potential alias to actual column name
  2741. columnName := e.resolveColumnAlias(rawColumnName, selectExprs)
  2742. // Map display names to internal names for system columns
  2743. columnName = e.getSystemColumnInternalName(columnName)
  2744. // Return the predicate function
  2745. return func(record *schema_pb.RecordValue) bool {
  2746. // Check if field exists and if it's null or missing
  2747. fieldValue, exists := record.Fields[columnName]
  2748. if !exists {
  2749. return true // Field doesn't exist = NULL
  2750. }
  2751. // Check if the field value itself is null/empty
  2752. return e.isValueNull(fieldValue)
  2753. }, nil
  2754. } else {
  2755. return nil, fmt.Errorf("IS NULL left operand must be a column name, got: %T", expr.Expr)
  2756. }
  2757. }
  2758. // buildIsNotNullPredicateWithContext creates a predicate for IS NOT NULL operations
  2759. func (e *SQLEngine) buildIsNotNullPredicateWithContext(expr *IsNotNullExpr, selectExprs []SelectExpr) (func(*schema_pb.RecordValue) bool, error) {
  2760. // Check if the expression is a column name
  2761. if colName, ok := expr.Expr.(*ColName); ok {
  2762. rawColumnName := colName.Name.String()
  2763. // Resolve potential alias to actual column name
  2764. columnName := e.resolveColumnAlias(rawColumnName, selectExprs)
  2765. // Map display names to internal names for system columns
  2766. columnName = e.getSystemColumnInternalName(columnName)
  2767. // Return the predicate function
  2768. return func(record *schema_pb.RecordValue) bool {
  2769. // Check if field exists and if it's not null
  2770. fieldValue, exists := record.Fields[columnName]
  2771. if !exists {
  2772. return false // Field doesn't exist = NULL, so NOT NULL is false
  2773. }
  2774. // Check if the field value itself is not null/empty
  2775. return !e.isValueNull(fieldValue)
  2776. }, nil
  2777. } else {
  2778. return nil, fmt.Errorf("IS NOT NULL left operand must be a column name, got: %T", expr.Expr)
  2779. }
  2780. }
  2781. // isValueNull checks if a schema_pb.Value is null or represents a null value
  2782. func (e *SQLEngine) isValueNull(value *schema_pb.Value) bool {
  2783. if value == nil {
  2784. return true
  2785. }
  2786. // Check the Kind field to see if it represents a null value
  2787. if value.Kind == nil {
  2788. return true
  2789. }
  2790. // For different value types, check if they represent null/empty values
  2791. switch kind := value.Kind.(type) {
  2792. case *schema_pb.Value_StringValue:
  2793. // Empty string could be considered null depending on semantics
  2794. // For now, treat empty string as not null (SQL standard behavior)
  2795. return false
  2796. case *schema_pb.Value_BoolValue:
  2797. return false // Boolean values are never null
  2798. case *schema_pb.Value_Int32Value, *schema_pb.Value_Int64Value:
  2799. return false // Integer values are never null
  2800. case *schema_pb.Value_FloatValue, *schema_pb.Value_DoubleValue:
  2801. return false // Numeric values are never null
  2802. case *schema_pb.Value_BytesValue:
  2803. // Bytes could be null if empty, but for now treat as not null
  2804. return false
  2805. case *schema_pb.Value_TimestampValue:
  2806. // Check if timestamp is zero/uninitialized
  2807. return kind.TimestampValue == nil
  2808. case *schema_pb.Value_DateValue:
  2809. return kind.DateValue == nil
  2810. case *schema_pb.Value_TimeValue:
  2811. return kind.TimeValue == nil
  2812. default:
  2813. // Unknown type, consider it null to be safe
  2814. return true
  2815. }
  2816. }
  2817. // extractComparisonValue extracts the comparison value from a SQL expression
  2818. func (e *SQLEngine) extractComparisonValue(expr ExprNode) (interface{}, error) {
  2819. switch val := expr.(type) {
  2820. case *SQLVal:
  2821. switch val.Type {
  2822. case IntVal:
  2823. intVal, err := strconv.ParseInt(string(val.Val), 10, 64)
  2824. if err != nil {
  2825. return nil, err
  2826. }
  2827. return intVal, nil
  2828. case StrVal:
  2829. return string(val.Val), nil
  2830. case FloatVal:
  2831. floatVal, err := strconv.ParseFloat(string(val.Val), 64)
  2832. if err != nil {
  2833. return nil, err
  2834. }
  2835. return floatVal, nil
  2836. default:
  2837. return nil, fmt.Errorf("unsupported SQL value type: %v", val.Type)
  2838. }
  2839. case *ArithmeticExpr:
  2840. // Handle arithmetic expressions like CURRENT_TIMESTAMP - INTERVAL '1 hour'
  2841. return e.evaluateArithmeticExpressionForComparison(val)
  2842. case *FuncExpr:
  2843. // Handle function calls like NOW(), CURRENT_TIMESTAMP
  2844. return e.evaluateFunctionExpressionForComparison(val)
  2845. case *IntervalExpr:
  2846. // Handle standalone INTERVAL expressions
  2847. nanos, err := e.evaluateInterval(val.Value)
  2848. if err != nil {
  2849. return nil, err
  2850. }
  2851. return nanos, nil
  2852. case ValTuple:
  2853. // Handle IN expressions with multiple values: column IN (value1, value2, value3)
  2854. var inValues []interface{}
  2855. for _, tupleVal := range val {
  2856. switch v := tupleVal.(type) {
  2857. case *SQLVal:
  2858. switch v.Type {
  2859. case IntVal:
  2860. intVal, err := strconv.ParseInt(string(v.Val), 10, 64)
  2861. if err != nil {
  2862. return nil, err
  2863. }
  2864. inValues = append(inValues, intVal)
  2865. case StrVal:
  2866. inValues = append(inValues, string(v.Val))
  2867. case FloatVal:
  2868. floatVal, err := strconv.ParseFloat(string(v.Val), 64)
  2869. if err != nil {
  2870. return nil, err
  2871. }
  2872. inValues = append(inValues, floatVal)
  2873. }
  2874. }
  2875. }
  2876. return inValues, nil
  2877. default:
  2878. return nil, fmt.Errorf("unsupported comparison value type: %T", expr)
  2879. }
  2880. }
  2881. // evaluateArithmeticExpressionForComparison evaluates an arithmetic expression for WHERE clause comparisons
  2882. func (e *SQLEngine) evaluateArithmeticExpressionForComparison(expr *ArithmeticExpr) (interface{}, error) {
  2883. // Check if this is timestamp arithmetic with intervals
  2884. if e.isTimestampArithmetic(expr.Left, expr.Right) && (expr.Operator == "+" || expr.Operator == "-") {
  2885. // Evaluate timestamp arithmetic and return the result as nanoseconds
  2886. result, err := e.evaluateTimestampArithmetic(expr.Left, expr.Right, expr.Operator)
  2887. if err != nil {
  2888. return nil, err
  2889. }
  2890. // Extract the timestamp value as nanoseconds for comparison
  2891. if result.Kind != nil {
  2892. switch resultKind := result.Kind.(type) {
  2893. case *schema_pb.Value_Int64Value:
  2894. return resultKind.Int64Value, nil
  2895. case *schema_pb.Value_StringValue:
  2896. // If it's a formatted timestamp string, parse it back to nanoseconds
  2897. if timestamp, err := time.Parse("2006-01-02T15:04:05.000000000Z", resultKind.StringValue); err == nil {
  2898. return timestamp.UnixNano(), nil
  2899. }
  2900. return nil, fmt.Errorf("could not parse timestamp string: %s", resultKind.StringValue)
  2901. }
  2902. }
  2903. return nil, fmt.Errorf("invalid timestamp arithmetic result")
  2904. }
  2905. // For other arithmetic operations, we'd need to evaluate them differently
  2906. // For now, return an error for unsupported arithmetic
  2907. return nil, fmt.Errorf("unsupported arithmetic expression in WHERE clause: %s", expr.Operator)
  2908. }
  2909. // evaluateFunctionExpressionForComparison evaluates a function expression for WHERE clause comparisons
  2910. func (e *SQLEngine) evaluateFunctionExpressionForComparison(expr *FuncExpr) (interface{}, error) {
  2911. funcName := strings.ToUpper(expr.Name.String())
  2912. switch funcName {
  2913. case "NOW", "CURRENT_TIMESTAMP":
  2914. result, err := e.Now()
  2915. if err != nil {
  2916. return nil, err
  2917. }
  2918. // Return as nanoseconds for comparison
  2919. if result.Kind != nil {
  2920. if resultKind, ok := result.Kind.(*schema_pb.Value_TimestampValue); ok {
  2921. // Convert microseconds to nanoseconds
  2922. return resultKind.TimestampValue.TimestampMicros * 1000, nil
  2923. }
  2924. }
  2925. return nil, fmt.Errorf("invalid NOW() result: expected TimestampValue, got %T", result.Kind)
  2926. case "CURRENT_DATE":
  2927. result, err := e.CurrentDate()
  2928. if err != nil {
  2929. return nil, err
  2930. }
  2931. // Convert date to nanoseconds (start of day)
  2932. if result.Kind != nil {
  2933. if resultKind, ok := result.Kind.(*schema_pb.Value_StringValue); ok {
  2934. if date, err := time.Parse("2006-01-02", resultKind.StringValue); err == nil {
  2935. return date.UnixNano(), nil
  2936. }
  2937. }
  2938. }
  2939. return nil, fmt.Errorf("invalid CURRENT_DATE result")
  2940. case "CURRENT_TIME":
  2941. result, err := e.CurrentTime()
  2942. if err != nil {
  2943. return nil, err
  2944. }
  2945. // For time comparison, we might need special handling
  2946. // For now, just return the string value
  2947. if result.Kind != nil {
  2948. if resultKind, ok := result.Kind.(*schema_pb.Value_StringValue); ok {
  2949. return resultKind.StringValue, nil
  2950. }
  2951. }
  2952. return nil, fmt.Errorf("invalid CURRENT_TIME result")
  2953. default:
  2954. return nil, fmt.Errorf("unsupported function in WHERE clause: %s", funcName)
  2955. }
  2956. }
  2957. // evaluateComparison performs the actual comparison
  2958. func (e *SQLEngine) evaluateComparison(fieldValue *schema_pb.Value, operator string, compareValue interface{}) bool {
  2959. // This is a simplified implementation
  2960. // A full implementation would handle type coercion and all comparison operators
  2961. switch operator {
  2962. case "=":
  2963. return e.valuesEqual(fieldValue, compareValue)
  2964. case "<":
  2965. return e.valueLessThan(fieldValue, compareValue)
  2966. case ">":
  2967. return e.valueGreaterThan(fieldValue, compareValue)
  2968. case "<=":
  2969. return e.valuesEqual(fieldValue, compareValue) || e.valueLessThan(fieldValue, compareValue)
  2970. case ">=":
  2971. return e.valuesEqual(fieldValue, compareValue) || e.valueGreaterThan(fieldValue, compareValue)
  2972. case "!=", "<>":
  2973. return !e.valuesEqual(fieldValue, compareValue)
  2974. case "LIKE", "like":
  2975. return e.valueLike(fieldValue, compareValue)
  2976. case "IN", "in":
  2977. return e.valueIn(fieldValue, compareValue)
  2978. default:
  2979. return false
  2980. }
  2981. }
  2982. // Helper functions for value comparison with proper type coercion
  2983. func (e *SQLEngine) valuesEqual(fieldValue *schema_pb.Value, compareValue interface{}) bool {
  2984. // Handle string comparisons first
  2985. if strField, ok := fieldValue.Kind.(*schema_pb.Value_StringValue); ok {
  2986. if strVal, ok := compareValue.(string); ok {
  2987. return strField.StringValue == strVal
  2988. }
  2989. return false
  2990. }
  2991. // Handle boolean comparisons
  2992. if boolField, ok := fieldValue.Kind.(*schema_pb.Value_BoolValue); ok {
  2993. if boolVal, ok := compareValue.(bool); ok {
  2994. return boolField.BoolValue == boolVal
  2995. }
  2996. return false
  2997. }
  2998. // Handle logical type comparisons
  2999. if timestampField, ok := fieldValue.Kind.(*schema_pb.Value_TimestampValue); ok {
  3000. if timestampVal, ok := compareValue.(int64); ok {
  3001. return timestampField.TimestampValue.TimestampMicros == timestampVal
  3002. }
  3003. return false
  3004. }
  3005. if dateField, ok := fieldValue.Kind.(*schema_pb.Value_DateValue); ok {
  3006. if dateVal, ok := compareValue.(int32); ok {
  3007. return dateField.DateValue.DaysSinceEpoch == dateVal
  3008. }
  3009. return false
  3010. }
  3011. // Handle DecimalValue comparison (convert to string for comparison)
  3012. if decimalField, ok := fieldValue.Kind.(*schema_pb.Value_DecimalValue); ok {
  3013. if decimalStr, ok := compareValue.(string); ok {
  3014. // Convert decimal bytes back to string for comparison
  3015. decimalValue := e.decimalToString(decimalField.DecimalValue)
  3016. return decimalValue == decimalStr
  3017. }
  3018. return false
  3019. }
  3020. if timeField, ok := fieldValue.Kind.(*schema_pb.Value_TimeValue); ok {
  3021. if timeVal, ok := compareValue.(int64); ok {
  3022. return timeField.TimeValue.TimeMicros == timeVal
  3023. }
  3024. return false
  3025. }
  3026. // Handle direct int64 comparisons for timestamp precision (before float64 conversion)
  3027. if int64Field, ok := fieldValue.Kind.(*schema_pb.Value_Int64Value); ok {
  3028. if int64Val, ok := compareValue.(int64); ok {
  3029. return int64Field.Int64Value == int64Val
  3030. }
  3031. if intVal, ok := compareValue.(int); ok {
  3032. return int64Field.Int64Value == int64(intVal)
  3033. }
  3034. }
  3035. // Handle direct int32 comparisons
  3036. if int32Field, ok := fieldValue.Kind.(*schema_pb.Value_Int32Value); ok {
  3037. if int32Val, ok := compareValue.(int32); ok {
  3038. return int32Field.Int32Value == int32Val
  3039. }
  3040. if intVal, ok := compareValue.(int); ok {
  3041. return int32Field.Int32Value == int32(intVal)
  3042. }
  3043. if int64Val, ok := compareValue.(int64); ok && int64Val >= math.MinInt32 && int64Val <= math.MaxInt32 {
  3044. return int32Field.Int32Value == int32(int64Val)
  3045. }
  3046. }
  3047. // Handle numeric comparisons with type coercion (fallback for other numeric types)
  3048. fieldNum := e.convertToNumber(fieldValue)
  3049. compareNum := e.convertCompareValueToNumber(compareValue)
  3050. if fieldNum != nil && compareNum != nil {
  3051. return *fieldNum == *compareNum
  3052. }
  3053. return false
  3054. }
  3055. // convertCompareValueToNumber converts compare values from SQL queries to float64
  3056. func (e *SQLEngine) convertCompareValueToNumber(compareValue interface{}) *float64 {
  3057. switch v := compareValue.(type) {
  3058. case int:
  3059. result := float64(v)
  3060. return &result
  3061. case int32:
  3062. result := float64(v)
  3063. return &result
  3064. case int64:
  3065. result := float64(v)
  3066. return &result
  3067. case float32:
  3068. result := float64(v)
  3069. return &result
  3070. case float64:
  3071. return &v
  3072. case string:
  3073. // Try to parse string as number for flexible comparisons
  3074. if parsed, err := strconv.ParseFloat(v, 64); err == nil {
  3075. return &parsed
  3076. }
  3077. }
  3078. return nil
  3079. }
  3080. // decimalToString converts a DecimalValue back to string representation
  3081. func (e *SQLEngine) decimalToString(decimalValue *schema_pb.DecimalValue) string {
  3082. if decimalValue == nil || decimalValue.Value == nil {
  3083. return "0"
  3084. }
  3085. // Convert bytes back to big.Int
  3086. intValue := new(big.Int).SetBytes(decimalValue.Value)
  3087. // Convert to string with proper decimal placement
  3088. str := intValue.String()
  3089. // Handle decimal placement based on scale
  3090. scale := int(decimalValue.Scale)
  3091. if scale > 0 && len(str) > scale {
  3092. // Insert decimal point
  3093. decimalPos := len(str) - scale
  3094. return str[:decimalPos] + "." + str[decimalPos:]
  3095. }
  3096. return str
  3097. }
  3098. func (e *SQLEngine) valueLessThan(fieldValue *schema_pb.Value, compareValue interface{}) bool {
  3099. // Handle string comparisons lexicographically
  3100. if strField, ok := fieldValue.Kind.(*schema_pb.Value_StringValue); ok {
  3101. if strVal, ok := compareValue.(string); ok {
  3102. return strField.StringValue < strVal
  3103. }
  3104. return false
  3105. }
  3106. // Handle logical type comparisons
  3107. if timestampField, ok := fieldValue.Kind.(*schema_pb.Value_TimestampValue); ok {
  3108. if timestampVal, ok := compareValue.(int64); ok {
  3109. return timestampField.TimestampValue.TimestampMicros < timestampVal
  3110. }
  3111. return false
  3112. }
  3113. if dateField, ok := fieldValue.Kind.(*schema_pb.Value_DateValue); ok {
  3114. if dateVal, ok := compareValue.(int32); ok {
  3115. return dateField.DateValue.DaysSinceEpoch < dateVal
  3116. }
  3117. return false
  3118. }
  3119. if timeField, ok := fieldValue.Kind.(*schema_pb.Value_TimeValue); ok {
  3120. if timeVal, ok := compareValue.(int64); ok {
  3121. return timeField.TimeValue.TimeMicros < timeVal
  3122. }
  3123. return false
  3124. }
  3125. // Handle direct int64 comparisons for timestamp precision (before float64 conversion)
  3126. if int64Field, ok := fieldValue.Kind.(*schema_pb.Value_Int64Value); ok {
  3127. if int64Val, ok := compareValue.(int64); ok {
  3128. return int64Field.Int64Value < int64Val
  3129. }
  3130. if intVal, ok := compareValue.(int); ok {
  3131. return int64Field.Int64Value < int64(intVal)
  3132. }
  3133. }
  3134. // Handle direct int32 comparisons
  3135. if int32Field, ok := fieldValue.Kind.(*schema_pb.Value_Int32Value); ok {
  3136. if int32Val, ok := compareValue.(int32); ok {
  3137. return int32Field.Int32Value < int32Val
  3138. }
  3139. if intVal, ok := compareValue.(int); ok {
  3140. return int32Field.Int32Value < int32(intVal)
  3141. }
  3142. if int64Val, ok := compareValue.(int64); ok && int64Val >= math.MinInt32 && int64Val <= math.MaxInt32 {
  3143. return int32Field.Int32Value < int32(int64Val)
  3144. }
  3145. }
  3146. // Handle numeric comparisons with type coercion (fallback for other numeric types)
  3147. fieldNum := e.convertToNumber(fieldValue)
  3148. compareNum := e.convertCompareValueToNumber(compareValue)
  3149. if fieldNum != nil && compareNum != nil {
  3150. return *fieldNum < *compareNum
  3151. }
  3152. return false
  3153. }
  3154. func (e *SQLEngine) valueGreaterThan(fieldValue *schema_pb.Value, compareValue interface{}) bool {
  3155. // Handle string comparisons lexicographically
  3156. if strField, ok := fieldValue.Kind.(*schema_pb.Value_StringValue); ok {
  3157. if strVal, ok := compareValue.(string); ok {
  3158. return strField.StringValue > strVal
  3159. }
  3160. return false
  3161. }
  3162. // Handle logical type comparisons
  3163. if timestampField, ok := fieldValue.Kind.(*schema_pb.Value_TimestampValue); ok {
  3164. if timestampVal, ok := compareValue.(int64); ok {
  3165. return timestampField.TimestampValue.TimestampMicros > timestampVal
  3166. }
  3167. return false
  3168. }
  3169. if dateField, ok := fieldValue.Kind.(*schema_pb.Value_DateValue); ok {
  3170. if dateVal, ok := compareValue.(int32); ok {
  3171. return dateField.DateValue.DaysSinceEpoch > dateVal
  3172. }
  3173. return false
  3174. }
  3175. if timeField, ok := fieldValue.Kind.(*schema_pb.Value_TimeValue); ok {
  3176. if timeVal, ok := compareValue.(int64); ok {
  3177. return timeField.TimeValue.TimeMicros > timeVal
  3178. }
  3179. return false
  3180. }
  3181. // Handle direct int64 comparisons for timestamp precision (before float64 conversion)
  3182. if int64Field, ok := fieldValue.Kind.(*schema_pb.Value_Int64Value); ok {
  3183. if int64Val, ok := compareValue.(int64); ok {
  3184. return int64Field.Int64Value > int64Val
  3185. }
  3186. if intVal, ok := compareValue.(int); ok {
  3187. return int64Field.Int64Value > int64(intVal)
  3188. }
  3189. }
  3190. // Handle direct int32 comparisons
  3191. if int32Field, ok := fieldValue.Kind.(*schema_pb.Value_Int32Value); ok {
  3192. if int32Val, ok := compareValue.(int32); ok {
  3193. return int32Field.Int32Value > int32Val
  3194. }
  3195. if intVal, ok := compareValue.(int); ok {
  3196. return int32Field.Int32Value > int32(intVal)
  3197. }
  3198. if int64Val, ok := compareValue.(int64); ok && int64Val >= math.MinInt32 && int64Val <= math.MaxInt32 {
  3199. return int32Field.Int32Value > int32(int64Val)
  3200. }
  3201. }
  3202. // Handle numeric comparisons with type coercion (fallback for other numeric types)
  3203. fieldNum := e.convertToNumber(fieldValue)
  3204. compareNum := e.convertCompareValueToNumber(compareValue)
  3205. if fieldNum != nil && compareNum != nil {
  3206. return *fieldNum > *compareNum
  3207. }
  3208. return false
  3209. }
  3210. // valueLike implements SQL LIKE pattern matching with % and _ wildcards
  3211. func (e *SQLEngine) valueLike(fieldValue *schema_pb.Value, compareValue interface{}) bool {
  3212. // Only support LIKE for string values
  3213. stringVal, ok := fieldValue.Kind.(*schema_pb.Value_StringValue)
  3214. if !ok {
  3215. return false
  3216. }
  3217. pattern, ok := compareValue.(string)
  3218. if !ok {
  3219. return false
  3220. }
  3221. // Convert SQL LIKE pattern to Go regex pattern
  3222. // % matches any sequence of characters (.*), _ matches single character (.)
  3223. regexPattern := strings.ReplaceAll(pattern, "%", ".*")
  3224. regexPattern = strings.ReplaceAll(regexPattern, "_", ".")
  3225. regexPattern = "^" + regexPattern + "$" // Anchor to match entire string
  3226. // Compile and match regex
  3227. regex, err := regexp.Compile(regexPattern)
  3228. if err != nil {
  3229. return false // Invalid pattern
  3230. }
  3231. return regex.MatchString(stringVal.StringValue)
  3232. }
  3233. // valueIn implements SQL IN operator for checking if value exists in a list
  3234. func (e *SQLEngine) valueIn(fieldValue *schema_pb.Value, compareValue interface{}) bool {
  3235. // For now, handle simple case where compareValue is a slice of values
  3236. // In a full implementation, this would handle SQL IN expressions properly
  3237. values, ok := compareValue.([]interface{})
  3238. if !ok {
  3239. return false
  3240. }
  3241. // Check if fieldValue matches any value in the list
  3242. for _, value := range values {
  3243. if e.valuesEqual(fieldValue, value) {
  3244. return true
  3245. }
  3246. }
  3247. return false
  3248. }
  3249. // Helper methods for specific operations
  3250. func (e *SQLEngine) showDatabases(ctx context.Context) (*QueryResult, error) {
  3251. databases := e.catalog.ListDatabases()
  3252. result := &QueryResult{
  3253. Columns: []string{"Database"},
  3254. Rows: make([][]sqltypes.Value, len(databases)),
  3255. }
  3256. for i, db := range databases {
  3257. result.Rows[i] = []sqltypes.Value{
  3258. sqltypes.NewVarChar(db),
  3259. }
  3260. }
  3261. return result, nil
  3262. }
  3263. func (e *SQLEngine) showTables(ctx context.Context, dbName string) (*QueryResult, error) {
  3264. // Use current database context if no database specified
  3265. if dbName == "" {
  3266. dbName = e.catalog.GetCurrentDatabase()
  3267. if dbName == "" {
  3268. dbName = "default"
  3269. }
  3270. }
  3271. tables, err := e.catalog.ListTables(dbName)
  3272. if err != nil {
  3273. return &QueryResult{Error: err}, err
  3274. }
  3275. result := &QueryResult{
  3276. Columns: []string{"Tables_in_" + dbName},
  3277. Rows: make([][]sqltypes.Value, len(tables)),
  3278. }
  3279. for i, table := range tables {
  3280. result.Rows[i] = []sqltypes.Value{
  3281. sqltypes.NewVarChar(table),
  3282. }
  3283. }
  3284. return result, nil
  3285. }
  3286. // compareLiteralValues compares two literal values with the given operator
  3287. func (e *SQLEngine) compareLiteralValues(left, right interface{}, operator string) bool {
  3288. switch operator {
  3289. case "=", "==":
  3290. return e.literalValuesEqual(left, right)
  3291. case "!=", "<>":
  3292. return !e.literalValuesEqual(left, right)
  3293. case "<":
  3294. return e.compareLiteralNumber(left, right) < 0
  3295. case "<=":
  3296. return e.compareLiteralNumber(left, right) <= 0
  3297. case ">":
  3298. return e.compareLiteralNumber(left, right) > 0
  3299. case ">=":
  3300. return e.compareLiteralNumber(left, right) >= 0
  3301. default:
  3302. // For unsupported operators, default to false
  3303. return false
  3304. }
  3305. }
  3306. // literalValuesEqual checks if two literal values are equal
  3307. func (e *SQLEngine) literalValuesEqual(left, right interface{}) bool {
  3308. // Convert both to strings for comparison
  3309. leftStr := fmt.Sprintf("%v", left)
  3310. rightStr := fmt.Sprintf("%v", right)
  3311. return leftStr == rightStr
  3312. }
  3313. // compareLiteralNumber compares two values as numbers
  3314. func (e *SQLEngine) compareLiteralNumber(left, right interface{}) int {
  3315. leftNum, leftOk := e.convertToFloat64(left)
  3316. rightNum, rightOk := e.convertToFloat64(right)
  3317. if !leftOk || !rightOk {
  3318. // Fall back to string comparison if not numeric
  3319. leftStr := fmt.Sprintf("%v", left)
  3320. rightStr := fmt.Sprintf("%v", right)
  3321. if leftStr < rightStr {
  3322. return -1
  3323. } else if leftStr > rightStr {
  3324. return 1
  3325. } else {
  3326. return 0
  3327. }
  3328. }
  3329. if leftNum < rightNum {
  3330. return -1
  3331. } else if leftNum > rightNum {
  3332. return 1
  3333. } else {
  3334. return 0
  3335. }
  3336. }
  3337. // convertToFloat64 attempts to convert a value to float64
  3338. func (e *SQLEngine) convertToFloat64(value interface{}) (float64, bool) {
  3339. switch v := value.(type) {
  3340. case int64:
  3341. return float64(v), true
  3342. case int32:
  3343. return float64(v), true
  3344. case int:
  3345. return float64(v), true
  3346. case float64:
  3347. return v, true
  3348. case float32:
  3349. return float64(v), true
  3350. case string:
  3351. if num, err := strconv.ParseFloat(v, 64); err == nil {
  3352. return num, true
  3353. }
  3354. return 0, false
  3355. default:
  3356. return 0, false
  3357. }
  3358. }
  3359. func (e *SQLEngine) createTable(ctx context.Context, stmt *DDLStatement) (*QueryResult, error) {
  3360. // Parse CREATE TABLE statement
  3361. // Assumption: Table name format is [database.]table_name
  3362. tableName := stmt.NewName.Name.String()
  3363. database := ""
  3364. // Check if database is specified in table name
  3365. if stmt.NewName.Qualifier.String() != "" {
  3366. database = stmt.NewName.Qualifier.String()
  3367. } else {
  3368. // Use current database context or default
  3369. database = e.catalog.GetCurrentDatabase()
  3370. if database == "" {
  3371. database = "default"
  3372. }
  3373. }
  3374. // Parse column definitions from CREATE TABLE
  3375. // Assumption: stmt.TableSpec contains column definitions
  3376. if stmt.TableSpec == nil || len(stmt.TableSpec.Columns) == 0 {
  3377. err := fmt.Errorf("CREATE TABLE requires column definitions")
  3378. return &QueryResult{Error: err}, err
  3379. }
  3380. // Convert SQL columns to MQ schema fields
  3381. fields := make([]*schema_pb.Field, len(stmt.TableSpec.Columns))
  3382. for i, col := range stmt.TableSpec.Columns {
  3383. fieldType, err := e.convertSQLTypeToMQ(col.Type)
  3384. if err != nil {
  3385. return &QueryResult{Error: err}, err
  3386. }
  3387. fields[i] = &schema_pb.Field{
  3388. Name: col.Name.String(),
  3389. Type: fieldType,
  3390. }
  3391. }
  3392. // Create record type for the topic
  3393. recordType := &schema_pb.RecordType{
  3394. Fields: fields,
  3395. }
  3396. // Create the topic via broker using configurable partition count
  3397. partitionCount := e.catalog.GetDefaultPartitionCount()
  3398. err := e.catalog.brokerClient.ConfigureTopic(ctx, database, tableName, partitionCount, recordType)
  3399. if err != nil {
  3400. return &QueryResult{Error: err}, err
  3401. }
  3402. // Register the new topic in catalog
  3403. mqSchema := &schema.Schema{
  3404. Namespace: database,
  3405. Name: tableName,
  3406. RecordType: recordType,
  3407. RevisionId: 1, // Initial revision
  3408. }
  3409. err = e.catalog.RegisterTopic(database, tableName, mqSchema)
  3410. if err != nil {
  3411. return &QueryResult{Error: err}, err
  3412. }
  3413. // Return success result
  3414. result := &QueryResult{
  3415. Columns: []string{"Result"},
  3416. Rows: [][]sqltypes.Value{
  3417. {sqltypes.NewVarChar(fmt.Sprintf("Table '%s.%s' created successfully", database, tableName))},
  3418. },
  3419. }
  3420. return result, nil
  3421. }
  3422. // ExecutionPlanBuilder handles building execution plans for queries
  3423. type ExecutionPlanBuilder struct {
  3424. engine *SQLEngine
  3425. }
  3426. // NewExecutionPlanBuilder creates a new execution plan builder
  3427. func NewExecutionPlanBuilder(engine *SQLEngine) *ExecutionPlanBuilder {
  3428. return &ExecutionPlanBuilder{engine: engine}
  3429. }
  3430. // BuildAggregationPlan builds an execution plan for aggregation queries
  3431. func (builder *ExecutionPlanBuilder) BuildAggregationPlan(
  3432. stmt *SelectStatement,
  3433. aggregations []AggregationSpec,
  3434. strategy AggregationStrategy,
  3435. dataSources *TopicDataSources,
  3436. ) *QueryExecutionPlan {
  3437. plan := &QueryExecutionPlan{
  3438. QueryType: "SELECT",
  3439. ExecutionStrategy: builder.determineExecutionStrategy(stmt, strategy),
  3440. DataSources: builder.buildDataSourcesList(strategy, dataSources),
  3441. PartitionsScanned: dataSources.PartitionsCount,
  3442. ParquetFilesScanned: builder.countParquetFiles(dataSources),
  3443. LiveLogFilesScanned: builder.countLiveLogFiles(dataSources),
  3444. OptimizationsUsed: builder.buildOptimizationsList(stmt, strategy, dataSources),
  3445. Aggregations: builder.buildAggregationsList(aggregations),
  3446. Details: make(map[string]interface{}),
  3447. }
  3448. // Set row counts based on strategy
  3449. if strategy.CanUseFastPath {
  3450. // Only live logs and broker buffer rows are actually scanned; parquet uses metadata
  3451. plan.TotalRowsProcessed = dataSources.LiveLogRowCount
  3452. if dataSources.BrokerUnflushedCount > 0 {
  3453. plan.TotalRowsProcessed += dataSources.BrokerUnflushedCount
  3454. }
  3455. // Set scan method based on what data sources actually exist
  3456. if dataSources.ParquetRowCount > 0 && (dataSources.LiveLogRowCount > 0 || dataSources.BrokerUnflushedCount > 0) {
  3457. plan.Details["scan_method"] = "Parquet Metadata + Live Log/Broker Counting"
  3458. } else if dataSources.ParquetRowCount > 0 {
  3459. plan.Details["scan_method"] = "Parquet Metadata Only"
  3460. } else {
  3461. plan.Details["scan_method"] = "Live Log/Broker Counting Only"
  3462. }
  3463. } else {
  3464. plan.TotalRowsProcessed = dataSources.ParquetRowCount + dataSources.LiveLogRowCount
  3465. plan.Details["scan_method"] = "Full Data Scan"
  3466. }
  3467. return plan
  3468. }
  3469. // determineExecutionStrategy determines the execution strategy based on query characteristics
  3470. func (builder *ExecutionPlanBuilder) determineExecutionStrategy(stmt *SelectStatement, strategy AggregationStrategy) string {
  3471. if stmt.Where != nil {
  3472. return "full_scan"
  3473. }
  3474. if strategy.CanUseFastPath {
  3475. return "hybrid_fast_path"
  3476. }
  3477. return "full_scan"
  3478. }
  3479. // buildDataSourcesList builds the list of data sources used
  3480. func (builder *ExecutionPlanBuilder) buildDataSourcesList(strategy AggregationStrategy, dataSources *TopicDataSources) []string {
  3481. sources := []string{}
  3482. if strategy.CanUseFastPath {
  3483. // Only show parquet stats if there are actual parquet files
  3484. if dataSources.ParquetRowCount > 0 {
  3485. sources = append(sources, "parquet_stats")
  3486. }
  3487. if dataSources.LiveLogRowCount > 0 {
  3488. sources = append(sources, "live_logs")
  3489. }
  3490. if dataSources.BrokerUnflushedCount > 0 {
  3491. sources = append(sources, "broker_buffer")
  3492. }
  3493. } else {
  3494. sources = append(sources, "live_logs", "parquet_files")
  3495. }
  3496. // Note: broker_buffer is added dynamically during execution when broker is queried
  3497. // See aggregations.go lines 397-409 for the broker buffer data source addition logic
  3498. return sources
  3499. }
  3500. // countParquetFiles counts the total number of parquet files across all partitions
  3501. func (builder *ExecutionPlanBuilder) countParquetFiles(dataSources *TopicDataSources) int {
  3502. count := 0
  3503. for _, fileStats := range dataSources.ParquetFiles {
  3504. count += len(fileStats)
  3505. }
  3506. return count
  3507. }
  3508. // countLiveLogFiles returns the total number of live log files across all partitions
  3509. func (builder *ExecutionPlanBuilder) countLiveLogFiles(dataSources *TopicDataSources) int {
  3510. return dataSources.LiveLogFilesCount
  3511. }
  3512. // buildOptimizationsList builds the list of optimizations used
  3513. func (builder *ExecutionPlanBuilder) buildOptimizationsList(stmt *SelectStatement, strategy AggregationStrategy, dataSources *TopicDataSources) []string {
  3514. optimizations := []string{}
  3515. if strategy.CanUseFastPath {
  3516. // Only include parquet statistics if there are actual parquet files
  3517. if dataSources.ParquetRowCount > 0 {
  3518. optimizations = append(optimizations, "parquet_statistics")
  3519. }
  3520. if dataSources.LiveLogRowCount > 0 {
  3521. optimizations = append(optimizations, "live_log_counting")
  3522. }
  3523. // Always include deduplication when using fast path
  3524. optimizations = append(optimizations, "deduplication")
  3525. }
  3526. if stmt.Where != nil {
  3527. // Check if "predicate_pushdown" is already in the list
  3528. found := false
  3529. for _, opt := range optimizations {
  3530. if opt == "predicate_pushdown" {
  3531. found = true
  3532. break
  3533. }
  3534. }
  3535. if !found {
  3536. optimizations = append(optimizations, "predicate_pushdown")
  3537. }
  3538. }
  3539. return optimizations
  3540. }
  3541. // buildAggregationsList builds the list of aggregations for display
  3542. func (builder *ExecutionPlanBuilder) buildAggregationsList(aggregations []AggregationSpec) []string {
  3543. aggList := make([]string, len(aggregations))
  3544. for i, spec := range aggregations {
  3545. aggList[i] = fmt.Sprintf("%s(%s)", spec.Function, spec.Column)
  3546. }
  3547. return aggList
  3548. }
  3549. // parseAggregationFunction parses an aggregation function expression
  3550. func (e *SQLEngine) parseAggregationFunction(funcExpr *FuncExpr, aliasExpr *AliasedExpr) (*AggregationSpec, error) {
  3551. funcName := strings.ToUpper(funcExpr.Name.String())
  3552. spec := &AggregationSpec{
  3553. Function: funcName,
  3554. }
  3555. // Parse function arguments
  3556. switch funcName {
  3557. case FuncCOUNT:
  3558. if len(funcExpr.Exprs) != 1 {
  3559. return nil, fmt.Errorf("COUNT function expects exactly 1 argument")
  3560. }
  3561. switch arg := funcExpr.Exprs[0].(type) {
  3562. case *StarExpr:
  3563. spec.Column = "*"
  3564. spec.Alias = "COUNT(*)"
  3565. case *AliasedExpr:
  3566. if colName, ok := arg.Expr.(*ColName); ok {
  3567. spec.Column = colName.Name.String()
  3568. spec.Alias = fmt.Sprintf("COUNT(%s)", spec.Column)
  3569. } else {
  3570. return nil, fmt.Errorf("COUNT argument must be a column name or *")
  3571. }
  3572. default:
  3573. return nil, fmt.Errorf("unsupported COUNT argument: %T", arg)
  3574. }
  3575. case FuncSUM, FuncAVG, FuncMIN, FuncMAX:
  3576. if len(funcExpr.Exprs) != 1 {
  3577. return nil, fmt.Errorf("%s function expects exactly 1 argument", funcName)
  3578. }
  3579. switch arg := funcExpr.Exprs[0].(type) {
  3580. case *AliasedExpr:
  3581. if colName, ok := arg.Expr.(*ColName); ok {
  3582. spec.Column = colName.Name.String()
  3583. spec.Alias = fmt.Sprintf("%s(%s)", funcName, spec.Column)
  3584. } else {
  3585. return nil, fmt.Errorf("%s argument must be a column name", funcName)
  3586. }
  3587. default:
  3588. return nil, fmt.Errorf("unsupported %s argument: %T", funcName, arg)
  3589. }
  3590. default:
  3591. return nil, fmt.Errorf("unsupported aggregation function: %s", funcName)
  3592. }
  3593. // Override with user-specified alias if provided
  3594. if aliasExpr != nil && aliasExpr.As != nil && !aliasExpr.As.IsEmpty() {
  3595. spec.Alias = aliasExpr.As.String()
  3596. }
  3597. return spec, nil
  3598. }
  3599. // computeLiveLogMinMax scans live log files to find MIN/MAX values for a specific column
  3600. func (e *SQLEngine) computeLiveLogMinMax(partitionPath string, columnName string, parquetSourceFiles map[string]bool) (interface{}, interface{}, error) {
  3601. if e.catalog.brokerClient == nil {
  3602. return nil, nil, fmt.Errorf("no broker client available")
  3603. }
  3604. filerClient, err := e.catalog.brokerClient.GetFilerClient()
  3605. if err != nil {
  3606. return nil, nil, fmt.Errorf("failed to get filer client: %v", err)
  3607. }
  3608. var minValue, maxValue interface{}
  3609. var minSchemaValue, maxSchemaValue *schema_pb.Value
  3610. // Process each live log file
  3611. err = filer_pb.ReadDirAllEntries(context.Background(), filerClient, util.FullPath(partitionPath), "", func(entry *filer_pb.Entry, isLast bool) error {
  3612. // Skip parquet files and directories
  3613. if entry.IsDirectory || strings.HasSuffix(entry.Name, ".parquet") {
  3614. return nil
  3615. }
  3616. // Skip files that have been converted to parquet (deduplication)
  3617. if parquetSourceFiles[entry.Name] {
  3618. return nil
  3619. }
  3620. filePath := partitionPath + "/" + entry.Name
  3621. // Scan this log file for MIN/MAX values
  3622. fileMin, fileMax, err := e.computeFileMinMax(filerClient, filePath, columnName)
  3623. if err != nil {
  3624. fmt.Printf("Warning: failed to compute min/max for file %s: %v\n", filePath, err)
  3625. return nil // Continue with other files
  3626. }
  3627. // Update global min/max
  3628. if fileMin != nil {
  3629. if minSchemaValue == nil || e.compareValues(fileMin, minSchemaValue) < 0 {
  3630. minSchemaValue = fileMin
  3631. minValue = e.extractRawValue(fileMin)
  3632. }
  3633. }
  3634. if fileMax != nil {
  3635. if maxSchemaValue == nil || e.compareValues(fileMax, maxSchemaValue) > 0 {
  3636. maxSchemaValue = fileMax
  3637. maxValue = e.extractRawValue(fileMax)
  3638. }
  3639. }
  3640. return nil
  3641. })
  3642. if err != nil {
  3643. return nil, nil, fmt.Errorf("failed to process partition directory %s: %v", partitionPath, err)
  3644. }
  3645. return minValue, maxValue, nil
  3646. }
  3647. // computeFileMinMax scans a single log file to find MIN/MAX values for a specific column
  3648. func (e *SQLEngine) computeFileMinMax(filerClient filer_pb.FilerClient, filePath string, columnName string) (*schema_pb.Value, *schema_pb.Value, error) {
  3649. var minValue, maxValue *schema_pb.Value
  3650. err := e.eachLogEntryInFile(filerClient, filePath, func(logEntry *filer_pb.LogEntry) error {
  3651. // Convert log entry to record value
  3652. recordValue, _, err := e.convertLogEntryToRecordValue(logEntry)
  3653. if err != nil {
  3654. return err // This will stop processing this file but not fail the overall query
  3655. }
  3656. // Extract the requested column value
  3657. var columnValue *schema_pb.Value
  3658. if e.isSystemColumn(columnName) {
  3659. // Handle system columns
  3660. switch strings.ToLower(columnName) {
  3661. case SW_COLUMN_NAME_TIMESTAMP:
  3662. columnValue = &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: logEntry.TsNs}}
  3663. case SW_COLUMN_NAME_KEY:
  3664. columnValue = &schema_pb.Value{Kind: &schema_pb.Value_BytesValue{BytesValue: logEntry.Key}}
  3665. case SW_COLUMN_NAME_SOURCE:
  3666. columnValue = &schema_pb.Value{Kind: &schema_pb.Value_StringValue{StringValue: "live_log"}}
  3667. }
  3668. } else {
  3669. // Handle regular data columns
  3670. if value, exists := recordValue.Fields[columnName]; exists {
  3671. columnValue = value
  3672. }
  3673. }
  3674. if columnValue == nil {
  3675. return nil // Skip this record
  3676. }
  3677. // Update min/max
  3678. if minValue == nil || e.compareValues(columnValue, minValue) < 0 {
  3679. minValue = columnValue
  3680. }
  3681. if maxValue == nil || e.compareValues(columnValue, maxValue) > 0 {
  3682. maxValue = columnValue
  3683. }
  3684. return nil
  3685. })
  3686. return minValue, maxValue, err
  3687. }
  3688. // eachLogEntryInFile reads a log file and calls the provided function for each log entry
  3689. func (e *SQLEngine) eachLogEntryInFile(filerClient filer_pb.FilerClient, filePath string, fn func(*filer_pb.LogEntry) error) error {
  3690. // Extract directory and filename
  3691. // filePath is like "partitionPath/filename"
  3692. lastSlash := strings.LastIndex(filePath, "/")
  3693. if lastSlash == -1 {
  3694. return fmt.Errorf("invalid file path: %s", filePath)
  3695. }
  3696. dirPath := filePath[:lastSlash]
  3697. fileName := filePath[lastSlash+1:]
  3698. // Get file entry
  3699. var fileEntry *filer_pb.Entry
  3700. err := filer_pb.ReadDirAllEntries(context.Background(), filerClient, util.FullPath(dirPath), "", func(entry *filer_pb.Entry, isLast bool) error {
  3701. if entry.Name == fileName {
  3702. fileEntry = entry
  3703. }
  3704. return nil
  3705. })
  3706. if err != nil {
  3707. return fmt.Errorf("failed to find file %s: %v", filePath, err)
  3708. }
  3709. if fileEntry == nil {
  3710. return fmt.Errorf("file not found: %s", filePath)
  3711. }
  3712. lookupFileIdFn := filer.LookupFn(filerClient)
  3713. // eachChunkFn processes each chunk's data (pattern from countRowsInLogFile)
  3714. eachChunkFn := func(buf []byte) error {
  3715. for pos := 0; pos+4 < len(buf); {
  3716. size := util.BytesToUint32(buf[pos : pos+4])
  3717. if pos+4+int(size) > len(buf) {
  3718. break
  3719. }
  3720. entryData := buf[pos+4 : pos+4+int(size)]
  3721. logEntry := &filer_pb.LogEntry{}
  3722. if err := proto.Unmarshal(entryData, logEntry); err != nil {
  3723. pos += 4 + int(size)
  3724. continue // Skip corrupted entries
  3725. }
  3726. // Call the provided function for each log entry
  3727. if err := fn(logEntry); err != nil {
  3728. return err
  3729. }
  3730. pos += 4 + int(size)
  3731. }
  3732. return nil
  3733. }
  3734. // Read file chunks and process them (pattern from countRowsInLogFile)
  3735. fileSize := filer.FileSize(fileEntry)
  3736. visibleIntervals, _ := filer.NonOverlappingVisibleIntervals(context.Background(), lookupFileIdFn, fileEntry.Chunks, 0, int64(fileSize))
  3737. chunkViews := filer.ViewFromVisibleIntervals(visibleIntervals, 0, int64(fileSize))
  3738. for x := chunkViews.Front(); x != nil; x = x.Next {
  3739. chunk := x.Value
  3740. urlStrings, err := lookupFileIdFn(context.Background(), chunk.FileId)
  3741. if err != nil {
  3742. fmt.Printf("Warning: failed to lookup chunk %s: %v\n", chunk.FileId, err)
  3743. continue
  3744. }
  3745. if len(urlStrings) == 0 {
  3746. continue
  3747. }
  3748. // Read chunk data
  3749. // urlStrings[0] is already a complete URL (http://server:port/fileId)
  3750. data, _, err := util_http.Get(urlStrings[0])
  3751. if err != nil {
  3752. fmt.Printf("Warning: failed to read chunk %s from %s: %v\n", chunk.FileId, urlStrings[0], err)
  3753. continue
  3754. }
  3755. // Process this chunk
  3756. if err := eachChunkFn(data); err != nil {
  3757. return err
  3758. }
  3759. }
  3760. return nil
  3761. }
  3762. // convertLogEntryToRecordValue helper method (reuse existing logic)
  3763. func (e *SQLEngine) convertLogEntryToRecordValue(logEntry *filer_pb.LogEntry) (*schema_pb.RecordValue, string, error) {
  3764. // Parse the log entry data as Protocol Buffer (not JSON!)
  3765. recordValue := &schema_pb.RecordValue{}
  3766. if err := proto.Unmarshal(logEntry.Data, recordValue); err != nil {
  3767. return nil, "", fmt.Errorf("failed to unmarshal log entry protobuf: %v", err)
  3768. }
  3769. // Ensure Fields map exists
  3770. if recordValue.Fields == nil {
  3771. recordValue.Fields = make(map[string]*schema_pb.Value)
  3772. }
  3773. // Add system columns
  3774. recordValue.Fields[SW_COLUMN_NAME_TIMESTAMP] = &schema_pb.Value{
  3775. Kind: &schema_pb.Value_Int64Value{Int64Value: logEntry.TsNs},
  3776. }
  3777. recordValue.Fields[SW_COLUMN_NAME_KEY] = &schema_pb.Value{
  3778. Kind: &schema_pb.Value_BytesValue{BytesValue: logEntry.Key},
  3779. }
  3780. // User data fields are already present in the protobuf-deserialized recordValue
  3781. // No additional processing needed since proto.Unmarshal already populated the Fields map
  3782. return recordValue, "live_log", nil
  3783. }
  3784. // extractTimestampFromFilename extracts timestamp from parquet filename
  3785. // Format: YYYY-MM-DD-HH-MM-SS.parquet
  3786. func (e *SQLEngine) extractTimestampFromFilename(filename string) int64 {
  3787. // Remove .parquet extension
  3788. filename = strings.TrimSuffix(filename, ".parquet")
  3789. // Parse timestamp format: 2006-01-02-15-04-05
  3790. t, err := time.Parse("2006-01-02-15-04-05", filename)
  3791. if err != nil {
  3792. return 0
  3793. }
  3794. return t.UnixNano()
  3795. }
  3796. // extractParquetSourceFiles extracts source log file names from parquet file metadata for deduplication
  3797. func (e *SQLEngine) extractParquetSourceFiles(fileStats []*ParquetFileStats) map[string]bool {
  3798. sourceFiles := make(map[string]bool)
  3799. for _, fileStat := range fileStats {
  3800. // Each ParquetFileStats should have a reference to the original file entry
  3801. // but we need to get it through the hybrid scanner to access Extended metadata
  3802. // This is a simplified approach - in practice we'd need to access the filer entry
  3803. // For now, we'll use filename-based deduplication as a fallback
  3804. // Extract timestamp from parquet filename (YYYY-MM-DD-HH-MM-SS.parquet)
  3805. if strings.HasSuffix(fileStat.FileName, ".parquet") {
  3806. timeStr := strings.TrimSuffix(fileStat.FileName, ".parquet")
  3807. // Mark this timestamp range as covered by parquet
  3808. sourceFiles[timeStr] = true
  3809. }
  3810. }
  3811. return sourceFiles
  3812. }
  3813. // countLiveLogRowsExcludingParquetSources counts live log rows but excludes files that were converted to parquet and duplicate log buffer data
  3814. func (e *SQLEngine) countLiveLogRowsExcludingParquetSources(ctx context.Context, partitionPath string, parquetSourceFiles map[string]bool) (int64, error) {
  3815. debugEnabled := ctx != nil && isDebugMode(ctx)
  3816. filerClient, err := e.catalog.brokerClient.GetFilerClient()
  3817. if err != nil {
  3818. return 0, err
  3819. }
  3820. // First, get the actual source files from parquet metadata
  3821. actualSourceFiles, err := e.getParquetSourceFilesFromMetadata(partitionPath)
  3822. if err != nil {
  3823. // If we can't read parquet metadata, use filename-based fallback
  3824. fmt.Printf("Warning: failed to read parquet metadata, using filename-based deduplication: %v\n", err)
  3825. actualSourceFiles = parquetSourceFiles
  3826. }
  3827. // Second, get duplicate files from log buffer metadata
  3828. logBufferDuplicates, err := e.buildLogBufferDeduplicationMap(ctx, partitionPath)
  3829. if err != nil {
  3830. if debugEnabled {
  3831. fmt.Printf("Warning: failed to build log buffer deduplication map: %v\n", err)
  3832. }
  3833. logBufferDuplicates = make(map[string]bool)
  3834. }
  3835. // Debug: Show deduplication status (only in explain mode)
  3836. if debugEnabled {
  3837. if len(actualSourceFiles) > 0 {
  3838. fmt.Printf("Excluding %d converted log files from %s\n", len(actualSourceFiles), partitionPath)
  3839. }
  3840. if len(logBufferDuplicates) > 0 {
  3841. fmt.Printf("Excluding %d duplicate log buffer files from %s\n", len(logBufferDuplicates), partitionPath)
  3842. }
  3843. }
  3844. totalRows := int64(0)
  3845. err = filer_pb.ReadDirAllEntries(context.Background(), filerClient, util.FullPath(partitionPath), "", func(entry *filer_pb.Entry, isLast bool) error {
  3846. if entry.IsDirectory || strings.HasSuffix(entry.Name, ".parquet") {
  3847. return nil // Skip directories and parquet files
  3848. }
  3849. // Skip files that have been converted to parquet
  3850. if actualSourceFiles[entry.Name] {
  3851. if debugEnabled {
  3852. fmt.Printf("Skipping %s (already converted to parquet)\n", entry.Name)
  3853. }
  3854. return nil
  3855. }
  3856. // Skip files that are duplicated due to log buffer metadata
  3857. if logBufferDuplicates[entry.Name] {
  3858. if debugEnabled {
  3859. fmt.Printf("Skipping %s (duplicate log buffer data)\n", entry.Name)
  3860. }
  3861. return nil
  3862. }
  3863. // Count rows in live log file
  3864. rowCount, err := e.countRowsInLogFile(filerClient, partitionPath, entry)
  3865. if err != nil {
  3866. fmt.Printf("Warning: failed to count rows in %s/%s: %v\n", partitionPath, entry.Name, err)
  3867. return nil // Continue with other files
  3868. }
  3869. totalRows += rowCount
  3870. return nil
  3871. })
  3872. return totalRows, err
  3873. }
  3874. // getParquetSourceFilesFromMetadata reads parquet file metadata to get actual source log files
  3875. func (e *SQLEngine) getParquetSourceFilesFromMetadata(partitionPath string) (map[string]bool, error) {
  3876. filerClient, err := e.catalog.brokerClient.GetFilerClient()
  3877. if err != nil {
  3878. return nil, err
  3879. }
  3880. sourceFiles := make(map[string]bool)
  3881. err = filer_pb.ReadDirAllEntries(context.Background(), filerClient, util.FullPath(partitionPath), "", func(entry *filer_pb.Entry, isLast bool) error {
  3882. if entry.IsDirectory || !strings.HasSuffix(entry.Name, ".parquet") {
  3883. return nil
  3884. }
  3885. // Read source files from Extended metadata
  3886. if entry.Extended != nil && entry.Extended["sources"] != nil {
  3887. var sources []string
  3888. if err := json.Unmarshal(entry.Extended["sources"], &sources); err == nil {
  3889. for _, source := range sources {
  3890. sourceFiles[source] = true
  3891. }
  3892. }
  3893. }
  3894. return nil
  3895. })
  3896. return sourceFiles, err
  3897. }
  3898. // getLogBufferStartFromFile reads buffer start from file extended attributes
  3899. func (e *SQLEngine) getLogBufferStartFromFile(entry *filer_pb.Entry) (*LogBufferStart, error) {
  3900. if entry.Extended == nil {
  3901. return nil, nil
  3902. }
  3903. // Only support binary buffer_start format
  3904. if startData, exists := entry.Extended["buffer_start"]; exists {
  3905. if len(startData) == 8 {
  3906. startIndex := int64(binary.BigEndian.Uint64(startData))
  3907. if startIndex > 0 {
  3908. return &LogBufferStart{StartIndex: startIndex}, nil
  3909. }
  3910. } else {
  3911. return nil, fmt.Errorf("invalid buffer_start format: expected 8 bytes, got %d", len(startData))
  3912. }
  3913. }
  3914. return nil, nil
  3915. }
  3916. // buildLogBufferDeduplicationMap creates a map to track duplicate files based on buffer ranges (ultra-efficient)
  3917. func (e *SQLEngine) buildLogBufferDeduplicationMap(ctx context.Context, partitionPath string) (map[string]bool, error) {
  3918. debugEnabled := ctx != nil && isDebugMode(ctx)
  3919. if e.catalog.brokerClient == nil {
  3920. return make(map[string]bool), nil
  3921. }
  3922. filerClient, err := e.catalog.brokerClient.GetFilerClient()
  3923. if err != nil {
  3924. return make(map[string]bool), nil // Don't fail the query, just skip deduplication
  3925. }
  3926. // Track buffer ranges instead of individual indexes (much more efficient)
  3927. type BufferRange struct {
  3928. start, end int64
  3929. }
  3930. processedRanges := make([]BufferRange, 0)
  3931. duplicateFiles := make(map[string]bool)
  3932. err = filer_pb.ReadDirAllEntries(context.Background(), filerClient, util.FullPath(partitionPath), "", func(entry *filer_pb.Entry, isLast bool) error {
  3933. if entry.IsDirectory || strings.HasSuffix(entry.Name, ".parquet") {
  3934. return nil // Skip directories and parquet files
  3935. }
  3936. // Get buffer start for this file (most efficient)
  3937. bufferStart, err := e.getLogBufferStartFromFile(entry)
  3938. if err != nil || bufferStart == nil {
  3939. return nil // No buffer info, can't deduplicate
  3940. }
  3941. // Calculate range for this file: [start, start + chunkCount - 1]
  3942. chunkCount := int64(len(entry.GetChunks()))
  3943. if chunkCount == 0 {
  3944. return nil // Empty file, skip
  3945. }
  3946. fileRange := BufferRange{
  3947. start: bufferStart.StartIndex,
  3948. end: bufferStart.StartIndex + chunkCount - 1,
  3949. }
  3950. // Check if this range overlaps with any processed range
  3951. isDuplicate := false
  3952. for _, processedRange := range processedRanges {
  3953. if fileRange.start <= processedRange.end && fileRange.end >= processedRange.start {
  3954. // Ranges overlap - this file contains duplicate buffer indexes
  3955. isDuplicate = true
  3956. if debugEnabled {
  3957. fmt.Printf("Marking %s as duplicate (buffer range [%d-%d] overlaps with [%d-%d])\n",
  3958. entry.Name, fileRange.start, fileRange.end, processedRange.start, processedRange.end)
  3959. }
  3960. break
  3961. }
  3962. }
  3963. if isDuplicate {
  3964. duplicateFiles[entry.Name] = true
  3965. } else {
  3966. // Add this range to processed ranges
  3967. processedRanges = append(processedRanges, fileRange)
  3968. }
  3969. return nil
  3970. })
  3971. if err != nil {
  3972. return make(map[string]bool), nil // Don't fail the query
  3973. }
  3974. return duplicateFiles, nil
  3975. }
  3976. // countRowsInLogFile counts rows in a single log file using SeaweedFS patterns
  3977. func (e *SQLEngine) countRowsInLogFile(filerClient filer_pb.FilerClient, partitionPath string, entry *filer_pb.Entry) (int64, error) {
  3978. lookupFileIdFn := filer.LookupFn(filerClient)
  3979. rowCount := int64(0)
  3980. // eachChunkFn processes each chunk's data (pattern from read_log_from_disk.go)
  3981. eachChunkFn := func(buf []byte) error {
  3982. for pos := 0; pos+4 < len(buf); {
  3983. size := util.BytesToUint32(buf[pos : pos+4])
  3984. if pos+4+int(size) > len(buf) {
  3985. break
  3986. }
  3987. entryData := buf[pos+4 : pos+4+int(size)]
  3988. logEntry := &filer_pb.LogEntry{}
  3989. if err := proto.Unmarshal(entryData, logEntry); err != nil {
  3990. pos += 4 + int(size)
  3991. continue // Skip corrupted entries
  3992. }
  3993. // Skip control messages (publisher control, empty key, or no data)
  3994. if isControlLogEntry(logEntry) {
  3995. pos += 4 + int(size)
  3996. continue
  3997. }
  3998. rowCount++
  3999. pos += 4 + int(size)
  4000. }
  4001. return nil
  4002. }
  4003. // Read file chunks and process them (pattern from read_log_from_disk.go)
  4004. fileSize := filer.FileSize(entry)
  4005. visibleIntervals, _ := filer.NonOverlappingVisibleIntervals(context.Background(), lookupFileIdFn, entry.Chunks, 0, int64(fileSize))
  4006. chunkViews := filer.ViewFromVisibleIntervals(visibleIntervals, 0, int64(fileSize))
  4007. for x := chunkViews.Front(); x != nil; x = x.Next {
  4008. chunk := x.Value
  4009. urlStrings, err := lookupFileIdFn(context.Background(), chunk.FileId)
  4010. if err != nil {
  4011. fmt.Printf("Warning: failed to lookup chunk %s: %v\n", chunk.FileId, err)
  4012. continue
  4013. }
  4014. if len(urlStrings) == 0 {
  4015. continue
  4016. }
  4017. // Read chunk data
  4018. // urlStrings[0] is already a complete URL (http://server:port/fileId)
  4019. data, _, err := util_http.Get(urlStrings[0])
  4020. if err != nil {
  4021. fmt.Printf("Warning: failed to read chunk %s from %s: %v\n", chunk.FileId, urlStrings[0], err)
  4022. continue
  4023. }
  4024. // Process this chunk
  4025. if err := eachChunkFn(data); err != nil {
  4026. return rowCount, err
  4027. }
  4028. }
  4029. return rowCount, nil
  4030. }
  4031. // isControlLogEntry checks if a log entry is a control entry without actual user data
  4032. // Control entries include:
  4033. // - DataMessages with populated Ctrl field (publisher control signals)
  4034. // - Entries with empty keys (filtered by subscriber)
  4035. // - Entries with no data
  4036. func isControlLogEntry(logEntry *filer_pb.LogEntry) bool {
  4037. // No data: control or placeholder
  4038. if len(logEntry.Data) == 0 {
  4039. return true
  4040. }
  4041. // Empty keys are treated as control entries (consistent with subscriber filtering)
  4042. if len(logEntry.Key) == 0 {
  4043. return true
  4044. }
  4045. // Check if the payload is a DataMessage carrying a control signal
  4046. dataMessage := &mq_pb.DataMessage{}
  4047. if err := proto.Unmarshal(logEntry.Data, dataMessage); err == nil {
  4048. if dataMessage.Ctrl != nil {
  4049. return true
  4050. }
  4051. }
  4052. return false
  4053. }
  4054. // discoverTopicPartitions discovers all partitions for a given topic using centralized logic
  4055. func (e *SQLEngine) discoverTopicPartitions(namespace, topicName string) ([]string, error) {
  4056. // Use centralized topic partition discovery
  4057. t := topic.NewTopic(namespace, topicName)
  4058. // Get FilerClient from BrokerClient
  4059. filerClient, err := e.catalog.brokerClient.GetFilerClient()
  4060. if err != nil {
  4061. return nil, err
  4062. }
  4063. return t.DiscoverPartitions(context.Background(), filerClient)
  4064. }
  4065. // getTopicTotalRowCount returns the total number of rows in a topic (combining parquet and live logs)
  4066. func (e *SQLEngine) getTopicTotalRowCount(ctx context.Context, namespace, topicName string) (int64, error) {
  4067. // Create a hybrid scanner to access parquet statistics
  4068. var filerClient filer_pb.FilerClient
  4069. if e.catalog.brokerClient != nil {
  4070. var filerClientErr error
  4071. filerClient, filerClientErr = e.catalog.brokerClient.GetFilerClient()
  4072. if filerClientErr != nil {
  4073. return 0, filerClientErr
  4074. }
  4075. }
  4076. hybridScanner, err := NewHybridMessageScanner(filerClient, e.catalog.brokerClient, namespace, topicName, e)
  4077. if err != nil {
  4078. return 0, err
  4079. }
  4080. // Get all partitions for this topic
  4081. // Note: discoverTopicPartitions always returns absolute paths
  4082. partitions, err := e.discoverTopicPartitions(namespace, topicName)
  4083. if err != nil {
  4084. return 0, err
  4085. }
  4086. totalRowCount := int64(0)
  4087. // For each partition, count both parquet and live log rows
  4088. for _, partition := range partitions {
  4089. // Count parquet rows
  4090. parquetStats, parquetErr := hybridScanner.ReadParquetStatistics(partition)
  4091. if parquetErr == nil {
  4092. for _, stats := range parquetStats {
  4093. totalRowCount += stats.RowCount
  4094. }
  4095. }
  4096. // Count live log rows (with deduplication)
  4097. parquetSourceFiles := make(map[string]bool)
  4098. if parquetErr == nil {
  4099. parquetSourceFiles = e.extractParquetSourceFiles(parquetStats)
  4100. }
  4101. liveLogCount, liveLogErr := e.countLiveLogRowsExcludingParquetSources(ctx, partition, parquetSourceFiles)
  4102. if liveLogErr == nil {
  4103. totalRowCount += liveLogCount
  4104. }
  4105. }
  4106. return totalRowCount, nil
  4107. }
  4108. // getActualRowsScannedForFastPath returns only the rows that need to be scanned for fast path aggregations
  4109. // (i.e., live log rows that haven't been converted to parquet - parquet uses metadata only)
  4110. func (e *SQLEngine) getActualRowsScannedForFastPath(ctx context.Context, namespace, topicName string) (int64, error) {
  4111. // Create a hybrid scanner to access parquet statistics
  4112. var filerClient filer_pb.FilerClient
  4113. if e.catalog.brokerClient != nil {
  4114. var filerClientErr error
  4115. filerClient, filerClientErr = e.catalog.brokerClient.GetFilerClient()
  4116. if filerClientErr != nil {
  4117. return 0, filerClientErr
  4118. }
  4119. }
  4120. hybridScanner, err := NewHybridMessageScanner(filerClient, e.catalog.brokerClient, namespace, topicName, e)
  4121. if err != nil {
  4122. return 0, err
  4123. }
  4124. // Get all partitions for this topic
  4125. // Note: discoverTopicPartitions always returns absolute paths
  4126. partitions, err := e.discoverTopicPartitions(namespace, topicName)
  4127. if err != nil {
  4128. return 0, err
  4129. }
  4130. totalScannedRows := int64(0)
  4131. // For each partition, count ONLY the live log rows that need scanning
  4132. // (parquet files use metadata/statistics, so they contribute 0 to scan count)
  4133. for _, partition := range partitions {
  4134. // Get parquet files to determine what was converted
  4135. parquetStats, parquetErr := hybridScanner.ReadParquetStatistics(partition)
  4136. parquetSourceFiles := make(map[string]bool)
  4137. if parquetErr == nil {
  4138. parquetSourceFiles = e.extractParquetSourceFiles(parquetStats)
  4139. }
  4140. // Count only live log rows that haven't been converted to parquet
  4141. liveLogCount, liveLogErr := e.countLiveLogRowsExcludingParquetSources(ctx, partition, parquetSourceFiles)
  4142. if liveLogErr == nil {
  4143. totalScannedRows += liveLogCount
  4144. }
  4145. // Note: Parquet files contribute 0 to scan count since we use their metadata/statistics
  4146. }
  4147. return totalScannedRows, nil
  4148. }
  4149. // findColumnValue performs case-insensitive lookup of column values
  4150. // Now includes support for system columns stored in HybridScanResult
  4151. func (e *SQLEngine) findColumnValue(result HybridScanResult, columnName string) *schema_pb.Value {
  4152. // Check system columns first (stored separately in HybridScanResult)
  4153. lowerColumnName := strings.ToLower(columnName)
  4154. switch lowerColumnName {
  4155. case SW_COLUMN_NAME_TIMESTAMP, SW_DISPLAY_NAME_TIMESTAMP:
  4156. // For timestamp column, format as proper timestamp instead of raw nanoseconds
  4157. timestamp := time.Unix(result.Timestamp/1e9, result.Timestamp%1e9)
  4158. timestampStr := timestamp.UTC().Format("2006-01-02T15:04:05.000000000Z")
  4159. return &schema_pb.Value{Kind: &schema_pb.Value_StringValue{StringValue: timestampStr}}
  4160. case SW_COLUMN_NAME_KEY:
  4161. return &schema_pb.Value{Kind: &schema_pb.Value_BytesValue{BytesValue: result.Key}}
  4162. case SW_COLUMN_NAME_SOURCE:
  4163. return &schema_pb.Value{Kind: &schema_pb.Value_StringValue{StringValue: result.Source}}
  4164. }
  4165. // Then check regular columns in Values map
  4166. // First try exact match
  4167. if value, exists := result.Values[columnName]; exists {
  4168. return value
  4169. }
  4170. // Then try case-insensitive match
  4171. for key, value := range result.Values {
  4172. if strings.ToLower(key) == lowerColumnName {
  4173. return value
  4174. }
  4175. }
  4176. return nil
  4177. }
  4178. // discoverAndRegisterTopic attempts to discover an existing topic and register it in the SQL catalog
  4179. func (e *SQLEngine) discoverAndRegisterTopic(ctx context.Context, database, tableName string) error {
  4180. // First, check if topic exists by trying to get its schema from the broker/filer
  4181. recordType, err := e.catalog.brokerClient.GetTopicSchema(ctx, database, tableName)
  4182. if err != nil {
  4183. return fmt.Errorf("topic %s.%s not found or no schema available: %v", database, tableName, err)
  4184. }
  4185. // Create a schema object from the discovered record type
  4186. mqSchema := &schema.Schema{
  4187. Namespace: database,
  4188. Name: tableName,
  4189. RecordType: recordType,
  4190. RevisionId: 1, // Default to revision 1 for discovered topics
  4191. }
  4192. // Register the topic in the SQL catalog
  4193. err = e.catalog.RegisterTopic(database, tableName, mqSchema)
  4194. if err != nil {
  4195. return fmt.Errorf("failed to register discovered topic %s.%s: %v", database, tableName, err)
  4196. }
  4197. // Note: This is a discovery operation, not query execution, so it's okay to always log
  4198. return nil
  4199. }
  4200. // getArithmeticExpressionAlias generates a display alias for arithmetic expressions
  4201. func (e *SQLEngine) getArithmeticExpressionAlias(expr *ArithmeticExpr) string {
  4202. leftAlias := e.getExpressionAlias(expr.Left)
  4203. rightAlias := e.getExpressionAlias(expr.Right)
  4204. return leftAlias + expr.Operator + rightAlias
  4205. }
  4206. // getExpressionAlias generates an alias for any expression node
  4207. func (e *SQLEngine) getExpressionAlias(expr ExprNode) string {
  4208. switch exprType := expr.(type) {
  4209. case *ColName:
  4210. return exprType.Name.String()
  4211. case *ArithmeticExpr:
  4212. return e.getArithmeticExpressionAlias(exprType)
  4213. case *SQLVal:
  4214. return e.getSQLValAlias(exprType)
  4215. default:
  4216. return "expr"
  4217. }
  4218. }
  4219. // evaluateArithmeticExpression evaluates an arithmetic expression for a given record
  4220. func (e *SQLEngine) evaluateArithmeticExpression(expr *ArithmeticExpr, result HybridScanResult) (*schema_pb.Value, error) {
  4221. // Check for timestamp arithmetic with intervals first
  4222. if e.isTimestampArithmetic(expr.Left, expr.Right) && (expr.Operator == "+" || expr.Operator == "-") {
  4223. return e.evaluateTimestampArithmetic(expr.Left, expr.Right, expr.Operator)
  4224. }
  4225. // Get left operand value
  4226. leftValue, err := e.evaluateExpressionValue(expr.Left, result)
  4227. if err != nil {
  4228. return nil, fmt.Errorf("error evaluating left operand: %v", err)
  4229. }
  4230. // Get right operand value
  4231. rightValue, err := e.evaluateExpressionValue(expr.Right, result)
  4232. if err != nil {
  4233. return nil, fmt.Errorf("error evaluating right operand: %v", err)
  4234. }
  4235. // Handle string concatenation operator
  4236. if expr.Operator == "||" {
  4237. return e.Concat(leftValue, rightValue)
  4238. }
  4239. // Perform arithmetic operation
  4240. var op ArithmeticOperator
  4241. switch expr.Operator {
  4242. case "+":
  4243. op = OpAdd
  4244. case "-":
  4245. op = OpSub
  4246. case "*":
  4247. op = OpMul
  4248. case "/":
  4249. op = OpDiv
  4250. case "%":
  4251. op = OpMod
  4252. default:
  4253. return nil, fmt.Errorf("unsupported arithmetic operator: %s", expr.Operator)
  4254. }
  4255. return e.EvaluateArithmeticExpression(leftValue, rightValue, op)
  4256. }
  4257. // isTimestampArithmetic checks if an arithmetic operation involves timestamps and intervals
  4258. func (e *SQLEngine) isTimestampArithmetic(left, right ExprNode) bool {
  4259. // Check if left is a timestamp function (NOW, CURRENT_TIMESTAMP, etc.)
  4260. leftIsTimestamp := e.isTimestampFunction(left)
  4261. // Check if right is an interval
  4262. rightIsInterval := e.isIntervalExpression(right)
  4263. return leftIsTimestamp && rightIsInterval
  4264. }
  4265. // isTimestampFunction checks if an expression is a timestamp function
  4266. func (e *SQLEngine) isTimestampFunction(expr ExprNode) bool {
  4267. if funcExpr, ok := expr.(*FuncExpr); ok {
  4268. funcName := strings.ToUpper(funcExpr.Name.String())
  4269. return funcName == "NOW" || funcName == "CURRENT_TIMESTAMP" || funcName == "CURRENT_DATE" || funcName == "CURRENT_TIME"
  4270. }
  4271. return false
  4272. }
  4273. // isIntervalExpression checks if an expression is an interval
  4274. func (e *SQLEngine) isIntervalExpression(expr ExprNode) bool {
  4275. _, ok := expr.(*IntervalExpr)
  4276. return ok
  4277. }
  4278. // evaluateExpressionValue evaluates any expression to get its value from a record
  4279. func (e *SQLEngine) evaluateExpressionValue(expr ExprNode, result HybridScanResult) (*schema_pb.Value, error) {
  4280. switch exprType := expr.(type) {
  4281. case *ColName:
  4282. columnName := exprType.Name.String()
  4283. upperColumnName := strings.ToUpper(columnName)
  4284. // Check if this is actually a string literal that was parsed as ColName
  4285. if (strings.HasPrefix(columnName, "'") && strings.HasSuffix(columnName, "'")) ||
  4286. (strings.HasPrefix(columnName, "\"") && strings.HasSuffix(columnName, "\"")) {
  4287. // This is a string literal that was incorrectly parsed as a column name
  4288. literal := strings.Trim(strings.Trim(columnName, "'"), "\"")
  4289. return &schema_pb.Value{Kind: &schema_pb.Value_StringValue{StringValue: literal}}, nil
  4290. }
  4291. // Check if this is actually a function call that was parsed as ColName
  4292. if strings.Contains(columnName, "(") && strings.Contains(columnName, ")") {
  4293. // This is a function call that was parsed incorrectly as a column name
  4294. // We need to manually evaluate it as a function
  4295. return e.evaluateColumnNameAsFunction(columnName, result)
  4296. }
  4297. // Check if this is a datetime constant
  4298. if upperColumnName == FuncCURRENT_DATE || upperColumnName == FuncCURRENT_TIME ||
  4299. upperColumnName == FuncCURRENT_TIMESTAMP || upperColumnName == FuncNOW {
  4300. switch upperColumnName {
  4301. case FuncCURRENT_DATE:
  4302. return e.CurrentDate()
  4303. case FuncCURRENT_TIME:
  4304. return e.CurrentTime()
  4305. case FuncCURRENT_TIMESTAMP:
  4306. return e.CurrentTimestamp()
  4307. case FuncNOW:
  4308. return e.Now()
  4309. }
  4310. }
  4311. // Check if this is actually a numeric literal disguised as a column name
  4312. if val, err := strconv.ParseInt(columnName, 10, 64); err == nil {
  4313. return &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: val}}, nil
  4314. }
  4315. if val, err := strconv.ParseFloat(columnName, 64); err == nil {
  4316. return &schema_pb.Value{Kind: &schema_pb.Value_DoubleValue{DoubleValue: val}}, nil
  4317. }
  4318. // Otherwise, treat as a regular column lookup
  4319. value := e.findColumnValue(result, columnName)
  4320. if value == nil {
  4321. return nil, nil
  4322. }
  4323. return value, nil
  4324. case *ArithmeticExpr:
  4325. return e.evaluateArithmeticExpression(exprType, result)
  4326. case *SQLVal:
  4327. // Handle literal values
  4328. return e.convertSQLValToSchemaValue(exprType), nil
  4329. case *FuncExpr:
  4330. // Handle function calls that are part of arithmetic expressions
  4331. funcName := strings.ToUpper(exprType.Name.String())
  4332. // Route to appropriate function evaluator based on function type
  4333. if e.isDateTimeFunction(funcName) {
  4334. // Use datetime function evaluator
  4335. return e.evaluateDateTimeFunction(exprType, result)
  4336. } else {
  4337. // Use string function evaluator
  4338. return e.evaluateStringFunction(exprType, result)
  4339. }
  4340. case *IntervalExpr:
  4341. // Handle interval expressions - evaluate as duration in nanoseconds
  4342. nanos, err := e.evaluateInterval(exprType.Value)
  4343. if err != nil {
  4344. return nil, err
  4345. }
  4346. return &schema_pb.Value{
  4347. Kind: &schema_pb.Value_Int64Value{Int64Value: nanos},
  4348. }, nil
  4349. default:
  4350. return nil, fmt.Errorf("unsupported expression type: %T", expr)
  4351. }
  4352. }
  4353. // convertSQLValToSchemaValue converts SQLVal literal to schema_pb.Value
  4354. func (e *SQLEngine) convertSQLValToSchemaValue(sqlVal *SQLVal) *schema_pb.Value {
  4355. switch sqlVal.Type {
  4356. case IntVal:
  4357. if val, err := strconv.ParseInt(string(sqlVal.Val), 10, 64); err == nil {
  4358. return &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: val}}
  4359. }
  4360. case FloatVal:
  4361. if val, err := strconv.ParseFloat(string(sqlVal.Val), 64); err == nil {
  4362. return &schema_pb.Value{Kind: &schema_pb.Value_DoubleValue{DoubleValue: val}}
  4363. }
  4364. case StrVal:
  4365. return &schema_pb.Value{Kind: &schema_pb.Value_StringValue{StringValue: string(sqlVal.Val)}}
  4366. }
  4367. // Default to string if parsing fails
  4368. return &schema_pb.Value{Kind: &schema_pb.Value_StringValue{StringValue: string(sqlVal.Val)}}
  4369. }
  4370. // ConvertToSQLResultWithExpressions converts HybridScanResults to SQL query results with expression evaluation
  4371. func (e *SQLEngine) ConvertToSQLResultWithExpressions(hms *HybridMessageScanner, results []HybridScanResult, selectExprs []SelectExpr) *QueryResult {
  4372. if len(results) == 0 {
  4373. columns := make([]string, 0, len(selectExprs))
  4374. for _, selectExpr := range selectExprs {
  4375. switch expr := selectExpr.(type) {
  4376. case *AliasedExpr:
  4377. // Check if alias is available and use it
  4378. if expr.As != nil && !expr.As.IsEmpty() {
  4379. columns = append(columns, expr.As.String())
  4380. } else {
  4381. // Fall back to expression-based column naming
  4382. switch col := expr.Expr.(type) {
  4383. case *ColName:
  4384. columnName := col.Name.String()
  4385. upperColumnName := strings.ToUpper(columnName)
  4386. // Check if this is an arithmetic expression embedded in a ColName
  4387. if arithmeticExpr := e.parseColumnLevelCalculation(columnName); arithmeticExpr != nil {
  4388. columns = append(columns, e.getArithmeticExpressionAlias(arithmeticExpr))
  4389. } else if upperColumnName == FuncCURRENT_DATE || upperColumnName == FuncCURRENT_TIME ||
  4390. upperColumnName == FuncCURRENT_TIMESTAMP || upperColumnName == FuncNOW {
  4391. // Use lowercase for datetime constants in column headers
  4392. columns = append(columns, strings.ToLower(columnName))
  4393. } else {
  4394. // Use display name for system columns
  4395. displayName := e.getSystemColumnDisplayName(columnName)
  4396. columns = append(columns, displayName)
  4397. }
  4398. case *ArithmeticExpr:
  4399. columns = append(columns, e.getArithmeticExpressionAlias(col))
  4400. case *FuncExpr:
  4401. columns = append(columns, e.getStringFunctionAlias(col))
  4402. case *SQLVal:
  4403. columns = append(columns, e.getSQLValAlias(col))
  4404. default:
  4405. columns = append(columns, "expr")
  4406. }
  4407. }
  4408. }
  4409. }
  4410. return &QueryResult{
  4411. Columns: columns,
  4412. Rows: [][]sqltypes.Value{},
  4413. Database: hms.topic.Namespace,
  4414. Table: hms.topic.Name,
  4415. }
  4416. }
  4417. // Build columns from SELECT expressions
  4418. columns := make([]string, 0, len(selectExprs))
  4419. for _, selectExpr := range selectExprs {
  4420. switch expr := selectExpr.(type) {
  4421. case *AliasedExpr:
  4422. // Check if alias is available and use it
  4423. if expr.As != nil && !expr.As.IsEmpty() {
  4424. columns = append(columns, expr.As.String())
  4425. } else {
  4426. // Fall back to expression-based column naming
  4427. switch col := expr.Expr.(type) {
  4428. case *ColName:
  4429. columnName := col.Name.String()
  4430. upperColumnName := strings.ToUpper(columnName)
  4431. // Check if this is an arithmetic expression embedded in a ColName
  4432. if arithmeticExpr := e.parseColumnLevelCalculation(columnName); arithmeticExpr != nil {
  4433. columns = append(columns, e.getArithmeticExpressionAlias(arithmeticExpr))
  4434. } else if upperColumnName == FuncCURRENT_DATE || upperColumnName == FuncCURRENT_TIME ||
  4435. upperColumnName == FuncCURRENT_TIMESTAMP || upperColumnName == FuncNOW {
  4436. // Use lowercase for datetime constants in column headers
  4437. columns = append(columns, strings.ToLower(columnName))
  4438. } else {
  4439. columns = append(columns, columnName)
  4440. }
  4441. case *ArithmeticExpr:
  4442. columns = append(columns, e.getArithmeticExpressionAlias(col))
  4443. case *FuncExpr:
  4444. columns = append(columns, e.getStringFunctionAlias(col))
  4445. case *SQLVal:
  4446. columns = append(columns, e.getSQLValAlias(col))
  4447. default:
  4448. columns = append(columns, "expr")
  4449. }
  4450. }
  4451. }
  4452. }
  4453. // Convert to SQL rows with expression evaluation
  4454. rows := make([][]sqltypes.Value, len(results))
  4455. for i, result := range results {
  4456. row := make([]sqltypes.Value, len(selectExprs))
  4457. for j, selectExpr := range selectExprs {
  4458. switch expr := selectExpr.(type) {
  4459. case *AliasedExpr:
  4460. switch col := expr.Expr.(type) {
  4461. case *ColName:
  4462. // Handle regular column, datetime constants, or arithmetic expressions
  4463. columnName := col.Name.String()
  4464. upperColumnName := strings.ToUpper(columnName)
  4465. // Check if this is an arithmetic expression embedded in a ColName
  4466. if arithmeticExpr := e.parseColumnLevelCalculation(columnName); arithmeticExpr != nil {
  4467. // Handle as arithmetic expression
  4468. if value, err := e.evaluateArithmeticExpression(arithmeticExpr, result); err == nil && value != nil {
  4469. row[j] = convertSchemaValueToSQL(value)
  4470. } else {
  4471. row[j] = sqltypes.NULL
  4472. }
  4473. } else if upperColumnName == "CURRENT_DATE" || upperColumnName == "CURRENT_TIME" ||
  4474. upperColumnName == "CURRENT_TIMESTAMP" || upperColumnName == "NOW" {
  4475. // Handle as datetime function
  4476. var value *schema_pb.Value
  4477. var err error
  4478. switch upperColumnName {
  4479. case FuncCURRENT_DATE:
  4480. value, err = e.CurrentDate()
  4481. case FuncCURRENT_TIME:
  4482. value, err = e.CurrentTime()
  4483. case FuncCURRENT_TIMESTAMP:
  4484. value, err = e.CurrentTimestamp()
  4485. case FuncNOW:
  4486. value, err = e.Now()
  4487. }
  4488. if err == nil && value != nil {
  4489. row[j] = convertSchemaValueToSQL(value)
  4490. } else {
  4491. row[j] = sqltypes.NULL
  4492. }
  4493. } else {
  4494. // Handle as regular column
  4495. if value := e.findColumnValue(result, columnName); value != nil {
  4496. row[j] = convertSchemaValueToSQL(value)
  4497. } else {
  4498. row[j] = sqltypes.NULL
  4499. }
  4500. }
  4501. case *ArithmeticExpr:
  4502. // Handle arithmetic expression
  4503. if value, err := e.evaluateArithmeticExpression(col, result); err == nil && value != nil {
  4504. row[j] = convertSchemaValueToSQL(value)
  4505. } else {
  4506. row[j] = sqltypes.NULL
  4507. }
  4508. case *FuncExpr:
  4509. // Handle function - route to appropriate evaluator
  4510. funcName := strings.ToUpper(col.Name.String())
  4511. var value *schema_pb.Value
  4512. var err error
  4513. // Check if it's a datetime function
  4514. if e.isDateTimeFunction(funcName) {
  4515. value, err = e.evaluateDateTimeFunction(col, result)
  4516. } else {
  4517. // Default to string function evaluator
  4518. value, err = e.evaluateStringFunction(col, result)
  4519. }
  4520. if err == nil && value != nil {
  4521. row[j] = convertSchemaValueToSQL(value)
  4522. } else {
  4523. row[j] = sqltypes.NULL
  4524. }
  4525. case *SQLVal:
  4526. // Handle literal value
  4527. value := e.convertSQLValToSchemaValue(col)
  4528. row[j] = convertSchemaValueToSQL(value)
  4529. default:
  4530. row[j] = sqltypes.NULL
  4531. }
  4532. default:
  4533. row[j] = sqltypes.NULL
  4534. }
  4535. }
  4536. rows[i] = row
  4537. }
  4538. return &QueryResult{
  4539. Columns: columns,
  4540. Rows: rows,
  4541. Database: hms.topic.Namespace,
  4542. Table: hms.topic.Name,
  4543. }
  4544. }
  4545. // extractBaseColumns recursively extracts base column names from arithmetic expressions
  4546. func (e *SQLEngine) extractBaseColumns(expr *ArithmeticExpr, baseColumnsSet map[string]bool) {
  4547. // Extract columns from left operand
  4548. e.extractBaseColumnsFromExpression(expr.Left, baseColumnsSet)
  4549. // Extract columns from right operand
  4550. e.extractBaseColumnsFromExpression(expr.Right, baseColumnsSet)
  4551. }
  4552. // extractBaseColumnsFromExpression extracts base column names from any expression node
  4553. func (e *SQLEngine) extractBaseColumnsFromExpression(expr ExprNode, baseColumnsSet map[string]bool) {
  4554. switch exprType := expr.(type) {
  4555. case *ColName:
  4556. columnName := exprType.Name.String()
  4557. // Check if it's a literal number disguised as a column name
  4558. if _, err := strconv.ParseInt(columnName, 10, 64); err != nil {
  4559. if _, err := strconv.ParseFloat(columnName, 64); err != nil {
  4560. // Not a numeric literal, treat as actual column name
  4561. baseColumnsSet[columnName] = true
  4562. }
  4563. }
  4564. case *ArithmeticExpr:
  4565. // Recursively handle nested arithmetic expressions
  4566. e.extractBaseColumns(exprType, baseColumnsSet)
  4567. }
  4568. }
  4569. // isAggregationFunction checks if a function name is an aggregation function
  4570. func (e *SQLEngine) isAggregationFunction(funcName string) bool {
  4571. // Convert to uppercase for case-insensitive comparison
  4572. upperFuncName := strings.ToUpper(funcName)
  4573. switch upperFuncName {
  4574. case FuncCOUNT, FuncSUM, FuncAVG, FuncMIN, FuncMAX:
  4575. return true
  4576. default:
  4577. return false
  4578. }
  4579. }
  4580. // isStringFunction checks if a function name is a string function
  4581. func (e *SQLEngine) isStringFunction(funcName string) bool {
  4582. switch funcName {
  4583. case FuncUPPER, FuncLOWER, FuncLENGTH, FuncTRIM, FuncBTRIM, FuncLTRIM, FuncRTRIM, FuncSUBSTRING, FuncLEFT, FuncRIGHT, FuncCONCAT:
  4584. return true
  4585. default:
  4586. return false
  4587. }
  4588. }
  4589. // isDateTimeFunction checks if a function name is a datetime function
  4590. func (e *SQLEngine) isDateTimeFunction(funcName string) bool {
  4591. switch funcName {
  4592. case FuncCURRENT_DATE, FuncCURRENT_TIME, FuncCURRENT_TIMESTAMP, FuncNOW, FuncEXTRACT, FuncDATE_TRUNC:
  4593. return true
  4594. default:
  4595. return false
  4596. }
  4597. }
  4598. // getStringFunctionAlias generates an alias for string functions
  4599. func (e *SQLEngine) getStringFunctionAlias(funcExpr *FuncExpr) string {
  4600. funcName := funcExpr.Name.String()
  4601. if len(funcExpr.Exprs) == 1 {
  4602. if aliasedExpr, ok := funcExpr.Exprs[0].(*AliasedExpr); ok {
  4603. if colName, ok := aliasedExpr.Expr.(*ColName); ok {
  4604. return fmt.Sprintf("%s(%s)", funcName, colName.Name.String())
  4605. }
  4606. }
  4607. }
  4608. return fmt.Sprintf("%s(...)", funcName)
  4609. }
  4610. // getDateTimeFunctionAlias generates an alias for datetime functions
  4611. func (e *SQLEngine) getDateTimeFunctionAlias(funcExpr *FuncExpr) string {
  4612. funcName := funcExpr.Name.String()
  4613. // Handle zero-argument functions like CURRENT_DATE, NOW
  4614. if len(funcExpr.Exprs) == 0 {
  4615. // Use lowercase for datetime constants in column headers
  4616. return strings.ToLower(funcName)
  4617. }
  4618. // Handle EXTRACT function specially to create unique aliases
  4619. if strings.ToUpper(funcName) == "EXTRACT" && len(funcExpr.Exprs) == 2 {
  4620. // Try to extract the date part to make the alias unique
  4621. if aliasedExpr, ok := funcExpr.Exprs[0].(*AliasedExpr); ok {
  4622. if sqlVal, ok := aliasedExpr.Expr.(*SQLVal); ok && sqlVal.Type == StrVal {
  4623. datePart := strings.ToLower(string(sqlVal.Val))
  4624. return fmt.Sprintf("extract_%s", datePart)
  4625. }
  4626. }
  4627. // Fallback to generic if we can't extract the date part
  4628. return fmt.Sprintf("%s(...)", funcName)
  4629. }
  4630. // Handle other multi-argument functions like DATE_TRUNC
  4631. if len(funcExpr.Exprs) == 2 {
  4632. return fmt.Sprintf("%s(...)", funcName)
  4633. }
  4634. return fmt.Sprintf("%s(...)", funcName)
  4635. }
  4636. // extractBaseColumnsFromFunction extracts base columns needed by a string function
  4637. func (e *SQLEngine) extractBaseColumnsFromFunction(funcExpr *FuncExpr, baseColumnsSet map[string]bool) {
  4638. for _, expr := range funcExpr.Exprs {
  4639. if aliasedExpr, ok := expr.(*AliasedExpr); ok {
  4640. e.extractBaseColumnsFromExpression(aliasedExpr.Expr, baseColumnsSet)
  4641. }
  4642. }
  4643. }
  4644. // getSQLValAlias generates an alias for SQL literal values
  4645. func (e *SQLEngine) getSQLValAlias(sqlVal *SQLVal) string {
  4646. switch sqlVal.Type {
  4647. case StrVal:
  4648. // Escape single quotes by replacing ' with '' (SQL standard escaping)
  4649. escapedVal := strings.ReplaceAll(string(sqlVal.Val), "'", "''")
  4650. return fmt.Sprintf("'%s'", escapedVal)
  4651. case IntVal:
  4652. return string(sqlVal.Val)
  4653. case FloatVal:
  4654. return string(sqlVal.Val)
  4655. default:
  4656. return "literal"
  4657. }
  4658. }
  4659. // evaluateStringFunction evaluates a string function for a given record
  4660. func (e *SQLEngine) evaluateStringFunction(funcExpr *FuncExpr, result HybridScanResult) (*schema_pb.Value, error) {
  4661. funcName := strings.ToUpper(funcExpr.Name.String())
  4662. // Most string functions require exactly 1 argument
  4663. if len(funcExpr.Exprs) != 1 {
  4664. return nil, fmt.Errorf("function %s expects exactly 1 argument", funcName)
  4665. }
  4666. // Get the argument value
  4667. var argValue *schema_pb.Value
  4668. if aliasedExpr, ok := funcExpr.Exprs[0].(*AliasedExpr); ok {
  4669. var err error
  4670. argValue, err = e.evaluateExpressionValue(aliasedExpr.Expr, result)
  4671. if err != nil {
  4672. return nil, fmt.Errorf("error evaluating function argument: %v", err)
  4673. }
  4674. } else {
  4675. return nil, fmt.Errorf("unsupported function argument type")
  4676. }
  4677. if argValue == nil {
  4678. return nil, nil // NULL input produces NULL output
  4679. }
  4680. // Call the appropriate string function
  4681. switch funcName {
  4682. case FuncUPPER:
  4683. return e.Upper(argValue)
  4684. case FuncLOWER:
  4685. return e.Lower(argValue)
  4686. case FuncLENGTH:
  4687. return e.Length(argValue)
  4688. case FuncTRIM, FuncBTRIM: // CockroachDB converts TRIM to BTRIM
  4689. return e.Trim(argValue)
  4690. case FuncLTRIM:
  4691. return e.LTrim(argValue)
  4692. case FuncRTRIM:
  4693. return e.RTrim(argValue)
  4694. default:
  4695. return nil, fmt.Errorf("unsupported string function: %s", funcName)
  4696. }
  4697. }
  4698. // evaluateDateTimeFunction evaluates a datetime function for a given record
  4699. func (e *SQLEngine) evaluateDateTimeFunction(funcExpr *FuncExpr, result HybridScanResult) (*schema_pb.Value, error) {
  4700. funcName := strings.ToUpper(funcExpr.Name.String())
  4701. switch funcName {
  4702. case FuncEXTRACT:
  4703. // EXTRACT requires exactly 2 arguments: date part and value
  4704. if len(funcExpr.Exprs) != 2 {
  4705. return nil, fmt.Errorf("EXTRACT function expects exactly 2 arguments (date_part, value), got %d", len(funcExpr.Exprs))
  4706. }
  4707. // Get the first argument (date part)
  4708. var datePartValue *schema_pb.Value
  4709. if aliasedExpr, ok := funcExpr.Exprs[0].(*AliasedExpr); ok {
  4710. var err error
  4711. datePartValue, err = e.evaluateExpressionValue(aliasedExpr.Expr, result)
  4712. if err != nil {
  4713. return nil, fmt.Errorf("error evaluating EXTRACT date part argument: %v", err)
  4714. }
  4715. } else {
  4716. return nil, fmt.Errorf("unsupported EXTRACT date part argument type")
  4717. }
  4718. if datePartValue == nil {
  4719. return nil, fmt.Errorf("EXTRACT date part cannot be NULL")
  4720. }
  4721. // Convert date part to string
  4722. var datePart string
  4723. if stringVal, ok := datePartValue.Kind.(*schema_pb.Value_StringValue); ok {
  4724. datePart = strings.ToUpper(stringVal.StringValue)
  4725. } else {
  4726. return nil, fmt.Errorf("EXTRACT date part must be a string")
  4727. }
  4728. // Get the second argument (value to extract from)
  4729. var extractValue *schema_pb.Value
  4730. if aliasedExpr, ok := funcExpr.Exprs[1].(*AliasedExpr); ok {
  4731. var err error
  4732. extractValue, err = e.evaluateExpressionValue(aliasedExpr.Expr, result)
  4733. if err != nil {
  4734. return nil, fmt.Errorf("error evaluating EXTRACT value argument: %v", err)
  4735. }
  4736. } else {
  4737. return nil, fmt.Errorf("unsupported EXTRACT value argument type")
  4738. }
  4739. if extractValue == nil {
  4740. return nil, nil // NULL input produces NULL output
  4741. }
  4742. // Call the Extract function
  4743. return e.Extract(DatePart(datePart), extractValue)
  4744. case FuncDATE_TRUNC:
  4745. // DATE_TRUNC requires exactly 2 arguments: precision and value
  4746. if len(funcExpr.Exprs) != 2 {
  4747. return nil, fmt.Errorf("DATE_TRUNC function expects exactly 2 arguments (precision, value), got %d", len(funcExpr.Exprs))
  4748. }
  4749. // Get the first argument (precision)
  4750. var precisionValue *schema_pb.Value
  4751. if aliasedExpr, ok := funcExpr.Exprs[0].(*AliasedExpr); ok {
  4752. var err error
  4753. precisionValue, err = e.evaluateExpressionValue(aliasedExpr.Expr, result)
  4754. if err != nil {
  4755. return nil, fmt.Errorf("error evaluating DATE_TRUNC precision argument: %v", err)
  4756. }
  4757. } else {
  4758. return nil, fmt.Errorf("unsupported DATE_TRUNC precision argument type")
  4759. }
  4760. if precisionValue == nil {
  4761. return nil, fmt.Errorf("DATE_TRUNC precision cannot be NULL")
  4762. }
  4763. // Convert precision to string
  4764. var precision string
  4765. if stringVal, ok := precisionValue.Kind.(*schema_pb.Value_StringValue); ok {
  4766. precision = stringVal.StringValue
  4767. } else {
  4768. return nil, fmt.Errorf("DATE_TRUNC precision must be a string")
  4769. }
  4770. // Get the second argument (value to truncate)
  4771. var truncateValue *schema_pb.Value
  4772. if aliasedExpr, ok := funcExpr.Exprs[1].(*AliasedExpr); ok {
  4773. var err error
  4774. truncateValue, err = e.evaluateExpressionValue(aliasedExpr.Expr, result)
  4775. if err != nil {
  4776. return nil, fmt.Errorf("error evaluating DATE_TRUNC value argument: %v", err)
  4777. }
  4778. } else {
  4779. return nil, fmt.Errorf("unsupported DATE_TRUNC value argument type")
  4780. }
  4781. if truncateValue == nil {
  4782. return nil, nil // NULL input produces NULL output
  4783. }
  4784. // Call the DateTrunc function
  4785. return e.DateTrunc(precision, truncateValue)
  4786. case FuncCURRENT_DATE:
  4787. // CURRENT_DATE is a zero-argument function
  4788. if len(funcExpr.Exprs) != 0 {
  4789. return nil, fmt.Errorf("CURRENT_DATE function expects no arguments, got %d", len(funcExpr.Exprs))
  4790. }
  4791. return e.CurrentDate()
  4792. case FuncCURRENT_TIME:
  4793. // CURRENT_TIME is a zero-argument function
  4794. if len(funcExpr.Exprs) != 0 {
  4795. return nil, fmt.Errorf("CURRENT_TIME function expects no arguments, got %d", len(funcExpr.Exprs))
  4796. }
  4797. return e.CurrentTime()
  4798. case FuncCURRENT_TIMESTAMP:
  4799. // CURRENT_TIMESTAMP is a zero-argument function
  4800. if len(funcExpr.Exprs) != 0 {
  4801. return nil, fmt.Errorf("CURRENT_TIMESTAMP function expects no arguments, got %d", len(funcExpr.Exprs))
  4802. }
  4803. return e.CurrentTimestamp()
  4804. case FuncNOW:
  4805. // NOW is a zero-argument function (but often used with () syntax)
  4806. if len(funcExpr.Exprs) != 0 {
  4807. return nil, fmt.Errorf("NOW function expects no arguments, got %d", len(funcExpr.Exprs))
  4808. }
  4809. return e.Now()
  4810. // PostgreSQL uses EXTRACT(part FROM date) instead of convenience functions like YEAR(date)
  4811. default:
  4812. return nil, fmt.Errorf("unsupported datetime function: %s", funcName)
  4813. }
  4814. }
  4815. // evaluateInterval parses an interval string and returns duration in nanoseconds
  4816. func (e *SQLEngine) evaluateInterval(intervalValue string) (int64, error) {
  4817. // Parse interval strings like "1 hour", "30 minutes", "2 days"
  4818. parts := strings.Fields(strings.TrimSpace(intervalValue))
  4819. if len(parts) != 2 {
  4820. return 0, fmt.Errorf("invalid interval format: %s (expected 'number unit')", intervalValue)
  4821. }
  4822. // Parse the numeric value
  4823. value, err := strconv.ParseInt(parts[0], 10, 64)
  4824. if err != nil {
  4825. return 0, fmt.Errorf("invalid interval value: %s", parts[0])
  4826. }
  4827. // Parse the unit and convert to nanoseconds
  4828. unit := strings.ToLower(parts[1])
  4829. var multiplier int64
  4830. switch unit {
  4831. case "nanosecond", "nanoseconds", "ns":
  4832. multiplier = 1
  4833. case "microsecond", "microseconds", "us":
  4834. multiplier = 1000
  4835. case "millisecond", "milliseconds", "ms":
  4836. multiplier = 1000000
  4837. case "second", "seconds", "s":
  4838. multiplier = 1000000000
  4839. case "minute", "minutes", "m":
  4840. multiplier = 60 * 1000000000
  4841. case "hour", "hours", "h":
  4842. multiplier = 60 * 60 * 1000000000
  4843. case "day", "days", "d":
  4844. multiplier = 24 * 60 * 60 * 1000000000
  4845. case "week", "weeks", "w":
  4846. multiplier = 7 * 24 * 60 * 60 * 1000000000
  4847. default:
  4848. return 0, fmt.Errorf("unsupported interval unit: %s", unit)
  4849. }
  4850. return value * multiplier, nil
  4851. }
  4852. // convertValueForTimestampColumn converts string timestamp values to nanoseconds for system timestamp columns
  4853. func (e *SQLEngine) convertValueForTimestampColumn(columnName string, value interface{}, expr ExprNode) interface{} {
  4854. // Special handling for timestamp system columns
  4855. if columnName == SW_COLUMN_NAME_TIMESTAMP {
  4856. if _, ok := value.(string); ok {
  4857. if timeNanos := e.extractTimeValue(expr); timeNanos != 0 {
  4858. return timeNanos
  4859. }
  4860. }
  4861. }
  4862. return value
  4863. }
  4864. // evaluateTimestampArithmetic performs arithmetic operations with timestamps and intervals
  4865. func (e *SQLEngine) evaluateTimestampArithmetic(left, right ExprNode, operator string) (*schema_pb.Value, error) {
  4866. // Handle timestamp arithmetic: NOW() - INTERVAL '1 hour'
  4867. // For timestamp arithmetic, we don't need the result context, so we pass an empty one
  4868. emptyResult := HybridScanResult{}
  4869. leftValue, err := e.evaluateExpressionValue(left, emptyResult)
  4870. if err != nil {
  4871. return nil, fmt.Errorf("failed to evaluate left operand: %v", err)
  4872. }
  4873. rightValue, err := e.evaluateExpressionValue(right, emptyResult)
  4874. if err != nil {
  4875. return nil, fmt.Errorf("failed to evaluate right operand: %v", err)
  4876. }
  4877. // Convert left operand (should be timestamp)
  4878. var leftTimestamp int64
  4879. if leftValue.Kind != nil {
  4880. switch leftKind := leftValue.Kind.(type) {
  4881. case *schema_pb.Value_Int64Value:
  4882. leftTimestamp = leftKind.Int64Value
  4883. case *schema_pb.Value_TimestampValue:
  4884. // Convert microseconds to nanoseconds
  4885. leftTimestamp = leftKind.TimestampValue.TimestampMicros * 1000
  4886. case *schema_pb.Value_StringValue:
  4887. // Parse timestamp string
  4888. if ts, err := time.Parse(time.RFC3339, leftKind.StringValue); err == nil {
  4889. leftTimestamp = ts.UnixNano()
  4890. } else if ts, err := time.Parse("2006-01-02 15:04:05", leftKind.StringValue); err == nil {
  4891. leftTimestamp = ts.UnixNano()
  4892. } else {
  4893. return nil, fmt.Errorf("invalid timestamp format: %s", leftKind.StringValue)
  4894. }
  4895. default:
  4896. return nil, fmt.Errorf("left operand must be a timestamp, got: %T", leftKind)
  4897. }
  4898. } else {
  4899. return nil, fmt.Errorf("left operand value is nil")
  4900. }
  4901. // Convert right operand (should be interval in nanoseconds)
  4902. var intervalNanos int64
  4903. if rightValue.Kind != nil {
  4904. switch rightKind := rightValue.Kind.(type) {
  4905. case *schema_pb.Value_Int64Value:
  4906. intervalNanos = rightKind.Int64Value
  4907. default:
  4908. return nil, fmt.Errorf("right operand must be an interval duration")
  4909. }
  4910. } else {
  4911. return nil, fmt.Errorf("right operand value is nil")
  4912. }
  4913. // Perform arithmetic
  4914. var resultTimestamp int64
  4915. switch operator {
  4916. case "+":
  4917. resultTimestamp = leftTimestamp + intervalNanos
  4918. case "-":
  4919. resultTimestamp = leftTimestamp - intervalNanos
  4920. default:
  4921. return nil, fmt.Errorf("unsupported timestamp arithmetic operator: %s", operator)
  4922. }
  4923. // Return as timestamp
  4924. return &schema_pb.Value{
  4925. Kind: &schema_pb.Value_Int64Value{Int64Value: resultTimestamp},
  4926. }, nil
  4927. }
  4928. // evaluateColumnNameAsFunction handles function calls that were incorrectly parsed as column names
  4929. func (e *SQLEngine) evaluateColumnNameAsFunction(columnName string, result HybridScanResult) (*schema_pb.Value, error) {
  4930. // Simple parser for basic function calls like TRIM('hello world')
  4931. // Extract function name and argument
  4932. parenPos := strings.Index(columnName, "(")
  4933. if parenPos == -1 {
  4934. return nil, fmt.Errorf("invalid function format: %s", columnName)
  4935. }
  4936. funcName := strings.ToUpper(strings.TrimSpace(columnName[:parenPos]))
  4937. argsString := columnName[parenPos+1:]
  4938. // Find the closing parenthesis (handling nested quotes)
  4939. closeParen := strings.LastIndex(argsString, ")")
  4940. if closeParen == -1 {
  4941. return nil, fmt.Errorf("missing closing parenthesis in function: %s", columnName)
  4942. }
  4943. argString := strings.TrimSpace(argsString[:closeParen])
  4944. // Parse the argument - for now handle simple cases
  4945. var argValue *schema_pb.Value
  4946. var err error
  4947. if strings.HasPrefix(argString, "'") && strings.HasSuffix(argString, "'") {
  4948. // String literal argument
  4949. literal := strings.Trim(argString, "'")
  4950. argValue = &schema_pb.Value{Kind: &schema_pb.Value_StringValue{StringValue: literal}}
  4951. } else if strings.Contains(argString, "(") && strings.Contains(argString, ")") {
  4952. // Nested function call - recursively evaluate it
  4953. argValue, err = e.evaluateColumnNameAsFunction(argString, result)
  4954. if err != nil {
  4955. return nil, fmt.Errorf("error evaluating nested function argument: %v", err)
  4956. }
  4957. } else {
  4958. // Column name or other expression
  4959. return nil, fmt.Errorf("unsupported argument type in function: %s", argString)
  4960. }
  4961. if argValue == nil {
  4962. return nil, nil
  4963. }
  4964. // Call the appropriate function
  4965. switch funcName {
  4966. case FuncUPPER:
  4967. return e.Upper(argValue)
  4968. case FuncLOWER:
  4969. return e.Lower(argValue)
  4970. case FuncLENGTH:
  4971. return e.Length(argValue)
  4972. case FuncTRIM, FuncBTRIM: // CockroachDB converts TRIM to BTRIM
  4973. return e.Trim(argValue)
  4974. case FuncLTRIM:
  4975. return e.LTrim(argValue)
  4976. case FuncRTRIM:
  4977. return e.RTrim(argValue)
  4978. // PostgreSQL-only: Use EXTRACT(YEAR FROM date) instead of YEAR(date)
  4979. default:
  4980. return nil, fmt.Errorf("unsupported function in column name: %s", funcName)
  4981. }
  4982. }
  4983. // parseColumnLevelCalculation detects and parses arithmetic expressions that contain function calls
  4984. // This handles cases where the SQL parser incorrectly treats "LENGTH('hello') + 10" as a single ColName
  4985. func (e *SQLEngine) parseColumnLevelCalculation(expression string) *ArithmeticExpr {
  4986. // First check if this looks like an arithmetic expression
  4987. if !e.containsArithmeticOperator(expression) {
  4988. return nil
  4989. }
  4990. // Build AST for the arithmetic expression
  4991. return e.buildArithmeticAST(expression)
  4992. }
  4993. // containsArithmeticOperator checks if the expression contains arithmetic operators outside of function calls
  4994. func (e *SQLEngine) containsArithmeticOperator(expr string) bool {
  4995. operators := []string{"+", "-", "*", "/", "%", "||"}
  4996. parenLevel := 0
  4997. quoteLevel := false
  4998. for i, char := range expr {
  4999. switch char {
  5000. case '(':
  5001. if !quoteLevel {
  5002. parenLevel++
  5003. }
  5004. case ')':
  5005. if !quoteLevel {
  5006. parenLevel--
  5007. }
  5008. case '\'':
  5009. quoteLevel = !quoteLevel
  5010. default:
  5011. // Only check for operators outside of parentheses and quotes
  5012. if parenLevel == 0 && !quoteLevel {
  5013. for _, op := range operators {
  5014. if strings.HasPrefix(expr[i:], op) {
  5015. return true
  5016. }
  5017. }
  5018. }
  5019. }
  5020. }
  5021. return false
  5022. }
  5023. // buildArithmeticAST builds an Abstract Syntax Tree for arithmetic expressions containing function calls
  5024. func (e *SQLEngine) buildArithmeticAST(expr string) *ArithmeticExpr {
  5025. // Remove leading/trailing spaces
  5026. expr = strings.TrimSpace(expr)
  5027. // Find the main operator (outside of parentheses)
  5028. operators := []string{"||", "+", "-", "*", "/", "%"} // Order matters for precedence
  5029. for _, op := range operators {
  5030. opPos := e.findMainOperator(expr, op)
  5031. if opPos != -1 {
  5032. leftExpr := strings.TrimSpace(expr[:opPos])
  5033. rightExpr := strings.TrimSpace(expr[opPos+len(op):])
  5034. if leftExpr != "" && rightExpr != "" {
  5035. return &ArithmeticExpr{
  5036. Left: e.parseASTExpressionNode(leftExpr),
  5037. Right: e.parseASTExpressionNode(rightExpr),
  5038. Operator: op,
  5039. }
  5040. }
  5041. }
  5042. }
  5043. return nil
  5044. }
  5045. // findMainOperator finds the position of an operator that's not inside parentheses or quotes
  5046. func (e *SQLEngine) findMainOperator(expr string, operator string) int {
  5047. parenLevel := 0
  5048. quoteLevel := false
  5049. for i := 0; i <= len(expr)-len(operator); i++ {
  5050. char := expr[i]
  5051. switch char {
  5052. case '(':
  5053. if !quoteLevel {
  5054. parenLevel++
  5055. }
  5056. case ')':
  5057. if !quoteLevel {
  5058. parenLevel--
  5059. }
  5060. case '\'':
  5061. quoteLevel = !quoteLevel
  5062. default:
  5063. // Check for operator only at top level (not inside parentheses or quotes)
  5064. if parenLevel == 0 && !quoteLevel && strings.HasPrefix(expr[i:], operator) {
  5065. return i
  5066. }
  5067. }
  5068. }
  5069. return -1
  5070. }
  5071. // parseASTExpressionNode parses an expression into the appropriate ExprNode type
  5072. func (e *SQLEngine) parseASTExpressionNode(expr string) ExprNode {
  5073. expr = strings.TrimSpace(expr)
  5074. // Check if it's a function call (contains parentheses)
  5075. if strings.Contains(expr, "(") && strings.Contains(expr, ")") {
  5076. // This should be parsed as a function expression, but since our SQL parser
  5077. // has limitations, we'll create a special ColName that represents the function
  5078. return &ColName{Name: stringValue(expr)}
  5079. }
  5080. // Check if it's a numeric literal
  5081. if _, err := strconv.ParseInt(expr, 10, 64); err == nil {
  5082. return &SQLVal{Type: IntVal, Val: []byte(expr)}
  5083. }
  5084. if _, err := strconv.ParseFloat(expr, 64); err == nil {
  5085. return &SQLVal{Type: FloatVal, Val: []byte(expr)}
  5086. }
  5087. // Check if it's a string literal
  5088. if strings.HasPrefix(expr, "'") && strings.HasSuffix(expr, "'") {
  5089. return &SQLVal{Type: StrVal, Val: []byte(strings.Trim(expr, "'"))}
  5090. }
  5091. // Check for nested arithmetic expressions
  5092. if nestedArithmetic := e.buildArithmeticAST(expr); nestedArithmetic != nil {
  5093. return nestedArithmetic
  5094. }
  5095. // Default to column name
  5096. return &ColName{Name: stringValue(expr)}
  5097. }