diff --git a/Jenkinsfile b/Jenkinsfile index c93350f2f6..ef40b113ac 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -41,72 +41,72 @@ def pre_test(){ killall -9 taosd ||echo "no taosd running" killall -9 gdb || echo "no gdb running" killall -9 python3.8 || echo "no python program running" - cd ${WORKSPACE} + cd ${WKC} git reset --hard HEAD~10 >/dev/null ''' script { if (env.CHANGE_TARGET == 'master') { sh ''' - cd ${WORKSPACE} + cd ${WKC} git checkout master ''' } else if(env.CHANGE_TARGET == '2.0'){ sh ''' - cd ${WORKSPACE} + cd ${WKC} git checkout 2.0 ''' } else if(env.CHANGE_TARGET == '3.0'){ sh ''' - cd ${WORKSPACE} + cd ${WKC} git checkout 3.0 ''' } else{ sh ''' - cd ${WORKSPACE} + cd ${WKC} git checkout develop ''' } } sh''' - cd ${WORKSPACE} + cd ${WKC} git pull >/dev/null git fetch origin +refs/pull/${CHANGE_ID}/merge git checkout -qf FETCH_HEAD git clean -dfx ''' - // script { - // if (env.CHANGE_TARGET == 'master') { - // sh ''' - // cd ${WK} - // git checkout master - // ''' - // } - // else if(env.CHANGE_TARGET == '2.0'){ - // sh ''' - // cd ${WK} - // git checkout 2.0 - // ''' - // } - // else if(env.CHANGE_TARGET == '3.0'){ - // sh ''' - // cd ${WK} - // git checkout 3.0 - // ''' - // } - // else{ - // sh ''' - // cd ${WK} - // git checkout develop - // ''' - // } - // } - // sh ''' - // cd ${WK} - // git pull >/dev/null + script { + if (env.CHANGE_TARGET == 'master') { + sh ''' + cd ${WK} + git checkout master + ''' + } + else if(env.CHANGE_TARGET == '2.0'){ + sh ''' + cd ${WK} + git checkout 2.0 + ''' + } + else if(env.CHANGE_TARGET == '3.0'){ + sh ''' + cd ${WK} + git checkout 3.0 + ''' + } + else{ + sh ''' + cd ${WK} + git checkout develop + ''' + } + } sh ''' + cd ${WK} + git pull >/dev/null + git clean -dfx export TZ=Asia/Harbin date mkdir debug @@ -138,270 +138,228 @@ pipeline { abort_previous() abortPreviousBuilds() } - sh''' - rm -rf ${WORKSPACE}.tes - cp -r ${WORKSPACE} ${WORKSPACE}.tes - cd ${WORKSPACE}.tes - git fetch - ''' - script { - if (env.CHANGE_TARGET == 'master') { - sh ''' - git checkout master - ''' - } - else if(env.CHANGE_TARGET == '2.0'){ - sh ''' - git checkout 2.0 - ''' - } - else if(env.CHANGE_TARGET == '3.0'){ - sh ''' - git checkout 3.0 - ''' - } - else{ - sh ''' - git checkout develop - ''' - } - } - sh''' - git fetch origin +refs/pull/${CHANGE_ID}/merge - git checkout -qf FETCH_HEAD - ''' - - script{ - skipbuild='2' - skipbuild=sh(script: "git log -2 --pretty=%B | fgrep -ie '[skip ci]' -e '[ci skip]' && echo 1 || echo 2", returnStdout:true) - println skipbuild - } - sh''' - rm -rf ${WORKSPACE}.tes - ''' + pre_test() } } - stage('Parallel test stage') { - //only build pr - options { skipDefaultCheckout() } - when { - allOf{ - changeRequest() - expression{ - return skipbuild.trim() == '2' - } - } - } - parallel { - stage('python_1_s1') { - agent{label " slave1 || slave11 "} - steps { + // stage('Parallel test stage') { + // //only build pr + // options { skipDefaultCheckout() } + // when { + // allOf{ + // changeRequest() + + // } + // } + // parallel { + // stage('python_1_s1') { + // agent{label " slave1 || slave11 "} + // steps { - pre_test() - // timeout(time: 55, unit: 'MINUTES'){ - // sh ''' - // date - // cd ${WKC}/tests - // ./test-all.sh p1 - // date''' - // } + // pre_test() + // // timeout(time: 55, unit: 'MINUTES'){ + // // sh ''' + // // date + // // cd ${WKC}/tests + // // ./test-all.sh p1 + // // date''' + // // } - } - } - stage('python_2_s5') { - agent{label " slave5 || slave15 "} - steps { + // } + // } + // stage('python_2_s5') { + // agent{label " slave5 || slave15 "} + // steps { - pre_test() - // timeout(time: 55, unit: 'MINUTES'){ - // sh ''' - // date - // cd ${WKC}/tests - // ./test-all.sh p2 - // date''' - // } - } - } - stage('python_3_s6') { - agent{label " slave6 || slave16 "} - steps { - pre_test() - // timeout(time: 55, unit: 'MINUTES'){ + // pre_test() + // // timeout(time: 55, unit: 'MINUTES'){ + // // sh ''' + // // date + // // cd ${WKC}/tests + // // ./test-all.sh p2 + // // date''' + // // } + // } + // } + // stage('python_3_s6') { + // agent{label " slave6 || slave16 "} + // steps { + // pre_test() + // // timeout(time: 55, unit: 'MINUTES'){ - // sh ''' - // date - // cd ${WKC}/tests - // ./test-all.sh p3 - // date''' - // } - } - } - stage('test_b1_s2') { - agent{label " slave2 || slave12 "} - steps { - pre_test() - // timeout(time: 55, unit: 'MINUTES'){ + // // sh ''' + // // date + // // cd ${WKC}/tests + // // ./test-all.sh p3 + // // date''' + // // } + // } + // } + // stage('test_b1_s2') { + // agent{label " slave2 || slave12 "} + // steps { + // pre_test() + // // timeout(time: 55, unit: 'MINUTES'){ - // sh ''' - // rm -rf /var/lib/taos/* - // rm -rf /var/log/taos/* - // nohup taosd >/dev/null & - // sleep 10 - // ''' - // sh ''' - // cd ${WKC}/tests/examples/nodejs - // npm install td2.0-connector > /dev/null 2>&1 - // node nodejsChecker.js host=localhost - // node test1970.js - // cd ${WKC}/tests/connectorTest/nodejsTest/nanosupport - // npm install td2.0-connector > /dev/null 2>&1 - // node nanosecondTest.js + // // sh ''' + // // rm -rf /var/lib/taos/* + // // rm -rf /var/log/taos/* + // // nohup taosd >/dev/null & + // // sleep 10 + // // ''' + // // sh ''' + // // cd ${WKC}/tests/examples/nodejs + // // npm install td2.0-connector > /dev/null 2>&1 + // // node nodejsChecker.js host=localhost + // // node test1970.js + // // cd ${WKC}/tests/connectorTest/nodejsTest/nanosupport + // // npm install td2.0-connector > /dev/null 2>&1 + // // node nanosecondTest.js - // ''' - // sh ''' - // cd ${WKC}/tests/examples/C#/taosdemo - // mcs -out:taosdemo *.cs > /dev/null 2>&1 - // echo '' |./taosdemo -c /etc/taos - // cd ${WKC}/tests/connectorTest/C#Test/nanosupport - // mcs -out:nano *.cs > /dev/null 2>&1 - // echo '' |./nano - // ''' - // sh ''' - // cd ${WKC}/tests/gotest - // bash batchtest.sh - // ''' - // sh ''' - // cd ${WKC}/tests - // ./test-all.sh b1fq - // date''' - // } - } - } - stage('test_crash_gen_s3') { - agent{label " slave3 || slave13 "} + // // ''' + // // sh ''' + // // cd ${WKC}/tests/examples/C#/taosdemo + // // mcs -out:taosdemo *.cs > /dev/null 2>&1 + // // echo '' |./taosdemo -c /etc/taos + // // cd ${WKC}/tests/connectorTest/C#Test/nanosupport + // // mcs -out:nano *.cs > /dev/null 2>&1 + // // echo '' |./nano + // // ''' + // // sh ''' + // // cd ${WKC}/tests/gotest + // // bash batchtest.sh + // // ''' + // // sh ''' + // // cd ${WKC}/tests + // // ./test-all.sh b1fq + // // date''' + // // } + // } + // } + // stage('test_crash_gen_s3') { + // agent{label " slave3 || slave13 "} - steps { - pre_test() - // timeout(time: 60, unit: 'MINUTES'){ - // sh ''' - // cd ${WKC}/tests/pytest - // ./crash_gen.sh -a -p -t 4 -s 2000 - // ''' - // } - // timeout(time: 60, unit: 'MINUTES'){ - // // sh ''' - // // cd ${WKC}/tests/pytest - // // rm -rf /var/lib/taos/* - // // rm -rf /var/log/taos/* - // // ./handle_crash_gen_val_log.sh - // // ''' - // sh ''' - // cd ${WKC}/tests/pytest - // rm -rf /var/lib/taos/* - // rm -rf /var/log/taos/* - // ./handle_taosd_val_log.sh - // ''' - // } - // timeout(time: 55, unit: 'MINUTES'){ - // sh ''' - // date - // cd ${WKC}/tests - // ./test-all.sh b2fq - // date - // ''' - // } - } - } - stage('test_valgrind_s4') { - agent{label " slave4 || slave14 "} + // steps { + // pre_test() + // // timeout(time: 60, unit: 'MINUTES'){ + // // sh ''' + // // cd ${WKC}/tests/pytest + // // ./crash_gen.sh -a -p -t 4 -s 2000 + // // ''' + // // } + // // timeout(time: 60, unit: 'MINUTES'){ + // // // sh ''' + // // // cd ${WKC}/tests/pytest + // // // rm -rf /var/lib/taos/* + // // // rm -rf /var/log/taos/* + // // // ./handle_crash_gen_val_log.sh + // // // ''' + // // sh ''' + // // cd ${WKC}/tests/pytest + // // rm -rf /var/lib/taos/* + // // rm -rf /var/log/taos/* + // // ./handle_taosd_val_log.sh + // // ''' + // // } + // // timeout(time: 55, unit: 'MINUTES'){ + // // sh ''' + // // date + // // cd ${WKC}/tests + // // ./test-all.sh b2fq + // // date + // // ''' + // // } + // } + // } + // stage('test_valgrind_s4') { + // agent{label " slave4 || slave14 "} - steps { - pre_test() - // catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE') { - // sh ''' - // cd ${WKC}/tests/pytest - // ./valgrind-test.sh 2>&1 > mem-error-out.log - // ./handle_val_log.sh - // ''' - // } - // timeout(time: 55, unit: 'MINUTES'){ - // sh ''' - // date - // cd ${WKC}/tests - // ./test-all.sh b3fq - // date''' - // sh ''' - // date - // cd ${WKC}/tests - // ./test-all.sh full example - // date''' - // } - } - } - stage('test_b4_s7') { - agent{label " slave7 || slave17 "} - steps { - pre_test() - // timeout(time: 55, unit: 'MINUTES'){ + // steps { + // pre_test() + // // catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE') { + // // sh ''' + // // cd ${WKC}/tests/pytest + // // ./valgrind-test.sh 2>&1 > mem-error-out.log + // // ./handle_val_log.sh + // // ''' + // // } + // // timeout(time: 55, unit: 'MINUTES'){ + // // sh ''' + // // date + // // cd ${WKC}/tests + // // ./test-all.sh b3fq + // // date''' + // // sh ''' + // // date + // // cd ${WKC}/tests + // // ./test-all.sh full example + // // date''' + // // } + // } + // } + // stage('test_b4_s7') { + // agent{label " slave7 || slave17 "} + // steps { + // pre_test() + // // timeout(time: 55, unit: 'MINUTES'){ - // sh ''' - // date - // cd ${WKC}/tests - // ./test-all.sh b4fq - // cd ${WKC}/tests - // ./test-all.sh p4 - // cd ${WKC}/tests - // ./test-all.sh full jdbc - // cd ${WKC}/tests - // ./test-all.sh full unit - // date''' - // } - } - } - stage('test_b5_s8') { - agent{label " slave8 || slave18 "} - steps { - pre_test() - // timeout(time: 55, unit: 'MINUTES'){ + // // sh ''' + // // date + // // cd ${WKC}/tests + // // ./test-all.sh b4fq + // // cd ${WKC}/tests + // // ./test-all.sh p4 + // // cd ${WKC}/tests + // // ./test-all.sh full jdbc + // // cd ${WKC}/tests + // // ./test-all.sh full unit + // // date''' + // // } + // } + // } + // stage('test_b5_s8') { + // agent{label " slave8 || slave18 "} + // steps { + // pre_test() + // // timeout(time: 55, unit: 'MINUTES'){ - // sh ''' - // date - // cd ${WKC}/tests - // ./test-all.sh b5fq - // date''' - // } - } - } - stage('test_b6_s9') { - agent{label " slave9 || slave19 "} - steps { - pre_test() - // timeout(time: 55, unit: 'MINUTES'){ + // // sh ''' + // // date + // // cd ${WKC}/tests + // // ./test-all.sh b5fq + // // date''' + // // } + // } + // } + // stage('test_b6_s9') { + // agent{label " slave9 || slave19 "} + // steps { + // pre_test() + // // timeout(time: 55, unit: 'MINUTES'){ - // sh ''' - // date - // cd ${WKC}/tests - // ./test-all.sh b6fq - // date''' - // } - } - } - stage('test_b7_s10') { - agent{label " slave10 || slave20 "} - steps { - pre_test() - // timeout(time: 55, unit: 'MINUTES'){ + // // sh ''' + // // date + // // cd ${WKC}/tests + // // ./test-all.sh b6fq + // // date''' + // // } + // } + // } + // stage('test_b7_s10') { + // agent{label " slave10 || slave20 "} + // steps { + // pre_test() + // // timeout(time: 55, unit: 'MINUTES'){ - // sh ''' - // date - // cd ${WKC}/tests - // ./test-all.sh b7fq - // date''' - // } - } - } - } - } + // // sh ''' + // // date + // // cd ${WKC}/tests + // // ./test-all.sh b7fq + // // date''' + // // } + // } + // } + // } + // } } post { success { diff --git a/cmake/lucene_CMakeLists.txt.in b/cmake/lucene_CMakeLists.txt.in index 91e144dced..fc7bec2dd2 100644 --- a/cmake/lucene_CMakeLists.txt.in +++ b/cmake/lucene_CMakeLists.txt.in @@ -2,7 +2,7 @@ # lucene ExternalProject_Add(lucene GIT_REPOSITORY https://github.com/taosdata-contrib/LucenePlusPlus.git - GIT_TAG rel_3.0.8 + GIT_TAG rel_3.0.8_td SOURCE_DIR "${CMAKE_SOURCE_DIR}/deps/lucene" BINARY_DIR "" #BUILD_IN_SOURCE TRUE diff --git a/deps/test/CMakeLists.txt b/deps/test/CMakeLists.txt index 4547431ca7..e571146b86 100644 --- a/deps/test/CMakeLists.txt +++ b/deps/test/CMakeLists.txt @@ -2,3 +2,7 @@ if(${BUILD_WITH_ROCKSDB}) add_subdirectory(rocksdb) endif(${BUILD_WITH_ROCKSDB}) + +if(${BUILD_WITH_LUCENE}) + add_subdirectory(lucene) +endif(${BUILD_WITH_LUCENE}) diff --git a/deps/test/lucene/CMakeLists.txt b/deps/test/lucene/CMakeLists.txt new file mode 100644 index 0000000000..8539210948 --- /dev/null +++ b/deps/test/lucene/CMakeLists.txt @@ -0,0 +1,6 @@ +add_executable(luceneTest "") +target_sources(luceneTest + PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/main.cpp" +) +target_link_libraries(luceneTest lucene++) \ No newline at end of file diff --git a/deps/test/lucene/main.cpp b/deps/test/lucene/main.cpp new file mode 100644 index 0000000000..9ea2fbbec4 --- /dev/null +++ b/deps/test/lucene/main.cpp @@ -0,0 +1,6 @@ +#include + +int main(int argc, char const *argv[]) { + std::cout << "Hello, this is lucene test" << std::endl; + return 0; +} diff --git a/docs/scratch.md b/docs/scratch.md new file mode 100644 index 0000000000..fc0239d727 --- /dev/null +++ b/docs/scratch.md @@ -0,0 +1,236 @@ +```plantuml + @startuml create_table + skinparam sequenceMessageAlign center + skinparam responseMessageBelowArrow true + + participant APP as app + box "dnode1" + participant RPC as rpc + participant VNODE as vnode + participant SYNC as sync + end box + + box "dnode2" + participant SYNC as sync2 + participant VNODE as vnode2 + end box + + box "dnode3" + participant SYNC as sync3 + participant VNODE as vnode3 + end box + + ' APP send request to dnode and RPC in dnode recv the request + app ->rpc: create table req + + ' RPC call vnodeProcessReq() function to process the request + rpc -> vnode: vnodeProcessReq + note right + callback function + run in RPC module + threads. The function + only puts the request + to a vnode queue. + end note + + ' VNODE call vnodeProcessReqs() function to integrate requests and process as a whole + vnode -> vnode: vnodeProcessReqs() + note right + integrate reqs and + process as a whole + end note + + + ' sync the request to other nodes + vnode -> sync: syncProcessReqs() + + ' make request persistent + ' sync -->vnode: walWrite()\n(callback function) + + ' replicate requests to other DNODES + sync -> sync2: replication req + sync -> sync3: replication req + sync2 -> vnode2: walWrite()\n(callback function) + sync2 --> sync: replication rsp\n(confirm) + sync3 -> vnode3: walWrite()\n(callback function) + + sync3 --> sync: replication rsp\n(confirm) + + ' send apply request + sync -> sync2: apply req + sync -> sync3: apply req + + ' vnode apply + sync2 -> vnode2: vnodeApplyReqs() + sync3 -> vnode3: vnodeApplyReqs() + + ' call apply request + sync --> vnode: vnodeApplyReqs()\n(callback function) + + ' send response + vnode --> rpc: rpcSendRsp() + + ' dnode send response to APP + rpc --> app: create table rsp + @enduml +``` + +## Leader处理强一致写入请求 +```plantuml + @startuml leader_process_stict_consistency + box "dnode1" + participant CRPC as crpc + participant VNODE as vnode + participant SYNC as sync + end box + + -> crpc: create table/submit req + + ' In CRPC threads + group #pink "In CRPC threads" + crpc -> vnode:vnodeProcessReq() + note right + A callback function + run by CRPC thread + to put the request + to a vnode queue + end note + end + + ' In VNODE worker threads + group #lightblue "In VNODE worker threads" + vnode -> vnode: vnodeProcessReqs() + note right + VNODE process requests + accumulated in a + vnode write queue and + process the batch reqs + as a whole + end note + + vnode -> sync: syncProcessReqs() + + sync -> : replication req1 + sync -> : replication req2 + end + + group #red "SYNC threads" + sync <- : replication rsp1 + sync <- : replication rsp2 + sync -> vnode: notify apply + sync -> : apply rsp1 + sync -> : apply rsp2 + end + + group #lightblue "In VNODE worker threads" + vnode -> vnode: vnodeApplyReqs() + vnode -> crpc: + end + + <- crpc: create table/submit rsp + + @enduml +``` + +## Follower处理强一致写入请求 +```plantuml + @startuml follower_process_strict_consistency + participant SYNC as sync + participant VNODE as vnode + + group #pink "SYNC threads" + -> sync: replication req + + sync -> sync: syncProcessReqs() + note right + In the replication + only data is + persisted and response + is sent back + end note + + <- sync: replication rsp + + -> sync: apply req + + sync -> vnode: notify apply + end + + group #lightblue "VNODE worker threads" + vnode -> vnode: vnodeApplyReqs() + end + + @enduml +``` + +## Leader处理最终一致写入请求 +```plantuml + @startuml leader_process_eventual_consistency + box "dnode1" + participant CRPC as crpc + participant VNODE as vnode + participant SYNC as sync + end box + + -> crpc: create table/submit req + + ' In CRPC threads + group #pink "In CRPC threads" + crpc -> vnode:vnodeProcessReq() + note right + A callback function + run by CRPC thread + to put the request + to a vnode queue + end note + end + + ' In VNODE worker threads + group #lightblue "In VNODE worker threads" + vnode -> vnode: vnodeProcessReqs() + note right + VNODE process requests + accumulated in a + vnode write queue and + process the batch reqs + as a whole + end note + + vnode -> sync: syncProcessReqs() + + sync -> : replication req1 + sync -> : replication req2 + + sync -> vnode: notify apply + end + + + group #lightblue "In VNODE worker threads" + vnode -> vnode: vnodeApplyReqs() + vnode -> crpc: + end + + <- crpc: create table/submit rsp + + @enduml +``` + +## Follower处理最终一致写入请求 +```plantuml + @startuml follower_process_eventual_consistency + participant SYNC as sync + participant VNODE as vnode + + group #pink "SYNC threads" + -> sync: replication rsp + + sync -> sync: syncProcessReqs() + + sync -> vnode: notify VNODE \nthread to process\n the reqs + end + + group #lightblue "VNODE worker threads" + vnode -> vnode: vnodeApplyReqs() + end + @enduml +``` \ No newline at end of file diff --git a/docs/vnode_write.md b/docs/vnode_write.md new file mode 100644 index 0000000000..7b8dbd6535 --- /dev/null +++ b/docs/vnode_write.md @@ -0,0 +1,311 @@ +

VNODE Write Processes

+ +## META Operations +META data write operations including: + +1. create table +2. drop table +3. alter table + +We take create table as an example to figure out the whole process. +```plantuml +@startuml create_table +skinparam sequenceMessageAlign center +skinparam responseMessageBelowArrow true + +participant APP as app +box "dnode1" + participant RPC as rpc + participant VNODE as vnode + participant SYNC as sync +end box + +box "dnode2" + participant SYNC as sync2 + participant VNODE as vnode2 +end box + +box "dnode3" + participant SYNC as sync3 + participant VNODE as vnode3 +end box + +' APP send request to dnode and RPC in dnode recv the request +app ->rpc: create table req + +' RPC call vnodeProcessReq() function to process the request +rpc -> vnode: vnodeProcessReq +note right +callback function +run in RPC module +threads. The function +only puts the request +to a vnode queue. +end note + +' VNODE call vnodeProcessReqs() function to integrate requests and process as a whole +vnode -> vnode: vnodeProcessReqs() +note right +integrate reqs and +process as a whole +end note + + +' sync the request to other nodes +vnode -> sync: syncProcessReqs() + +' make request persistent +' sync -->vnode: walWrite()\n(callback function) + +' replicate requests to other DNODES +sync -> sync2: replication req +sync -> sync3: replication req +sync2 -> vnode2: walWrite()\n(callback function) +sync2 --> sync: replication rsp\n(confirm) +sync3 -> vnode3: walWrite()\n(callback function) + +sync3 --> sync: replication rsp\n(confirm) + +' send apply request +sync -> sync2: apply req +sync -> sync3: apply req + +' vnode apply +sync2 -> vnode2: vnodeApplyReqs() +sync3 -> vnode3: vnodeApplyReqs() + +' call apply request +sync --> vnode: vnodeApplyReqs()\n(callback function) + +' send response +vnode --> rpc: rpcSendRsp() + +' dnode send response to APP +rpc --> app: create table rsp +@enduml +``` + +## Time-series data Operations +There are only one operations for time-series data: data insert. We will figure out the whole process. + +```plantuml +@startuml create_table +skinparam sequenceMessageAlign center +skinparam responseMessageBelowArrow true + +participant APP as app +box "dnode1" + participant RPC as rpc + participant VNODE as vnode + participant SYNC as sync +end box + +box "dnode2" + participant SYNC as sync2 + participant VNODE as vnode2 +end box + +box "dnode3" + participant SYNC as sync3 + participant VNODE as vnode3 +end box + +' APP send request to dnode and RPC in dnode recv the request +app ->rpc: insert data req + +' RPC call vnodeProcessReq() function to process the request +rpc -> vnode: vnodeProcessReq +note right +callback function +run in RPC module +threads. The function +only puts the request +to a vnode queue. +end note + +' VNODE call vnodeProcessReqs() function to integrate requests and process as a whole +vnode -> vnode: vnodeProcessReqs() +note right +integrate reqs and +process as a whole +end note + + +' sync the request to other nodes +vnode -> sync: syncProcessReqs() + +' ' make request persistent +' ' sync -->vnode: walWrite()\n(callback function) + +' ' replicate requests to other DNODES +sync -> sync2: replication req +sync -> sync3: replication req + +' vnode apply +sync2 -> vnode2: vnodeApplyReqs() +sync3 -> vnode3: vnodeApplyReqs() + +' call apply request +sync --> vnode: vnodeApplyReqs()\n(callback function) + +' send response +vnode --> rpc: rpcSendRsp() + +' dnode send response to APP +rpc --> app: insert data rsp +@enduml +``` + +## vnodeProcessReqs() +```plantuml +@startuml vnodeProcessReqs() +participant VNODE as v +participant SYNC as s + +group vnodeProcessReqs() + ' Group requests and get a request batch to process as a whole + v -> v: vnodeGetReqsFromQueue() + note right + integrate all write + requests as a batch + to process as a whole + end note + + ' VNODE call syncProcessReqs() function to process the batch request + v -> s: syncProcessReqs() + + group syncProcessReqs() + ' Check if current node is leader + alt not leader + return NOT_LEADER + end + + s -> s: syncAppendReqsToLogStore() + group syncAppendReqsToLogStore() + s -> v: walWrite() + note right + There must be a + callback function + provided by VNODE + to persist the + requests in WAL + end note + + alt (no unapplied reqs) AND (only one node OR no meta requests) + s -> v: vnodeApplyReqs() + note right + just use the woker + thread to apply + the requests. This + is a callback function + provided by VNODE + end note + else other cases need to wait response + s -> s: + note right + save the requests in log store + and wait for comfirmation or + other cases + end note + + s ->]: send replication requests + s ->]: send replication requests + end + end + end +end +@enduml +``` + + + + +## vnodeApplyReqs() +The function *vnodeApplyReqs()* is the actual function running by a vnode to process the requests. +```plantuml +@startuml vnodeApplyReqs() +skinparam sequenceMessageAlign left +skinparam responseMessageBelowArrow true + +participant VNODE as vnode +participant TQ as tq +participant TSDB as tsdb +participant META as meta + +group vnodeApplyReqs() + autonumber + loop nReqs + ' Copy request message to vnode buffer pool + vnode -> vnode: vnodeCopyReq() + note right + copy request to + vnode buffer pool + end note + + vnode -> tq: tqPush() + note right + push the request + to TQ so consumers + can consume + end note + alt META_REQ + autonumber 3 + vnode -> meta: metaApplyReq() + else TS_REQ + autonumber 3 + vnode -> tsdb: tsdbApplyReq() + end + + end + + ' Check if need to commit + alt vnode buffer pool is full + group vnodeCommit() + autonumber 4.1 + vnode -> tq: tqCommit() + note right + tqCommit may renew wal + end note + vnode -> meta: metaCommit(); + note right + commit meta data + end note + vnode -> tsdb: tsdbCommit(); + note right + commit time-series data + end note + end + end +end +@enduml +``` + diff --git a/include/client/taos.h b/include/client/taos.h index 6fa30737e7..0f7edc9fed 100644 --- a/include/client/taos.h +++ b/include/client/taos.h @@ -46,6 +46,11 @@ typedef void **TAOS_ROW; #define TSDB_DATA_TYPE_USMALLINT 12 // 2 bytes #define TSDB_DATA_TYPE_UINT 13 // 4 bytes #define TSDB_DATA_TYPE_UBIGINT 14 // 8 bytes +#define TSDB_DATA_TYPE_VARCHAR 15 // string +#define TSDB_DATA_TYPE_JSON 16 // json +#define TSDB_DATA_TYPE_DECIMAL 17 // decimal +#define TSDB_DATA_TYPE_BLOB 18 // binary string +#define TSDB_DATA_TYPE_LONGBLOB 19 // long binary string typedef enum { TSDB_OPTION_LOCALE, @@ -68,7 +73,7 @@ typedef struct taosField { #define DLL_EXPORT #endif -DLL_EXPORT int taos_init(); +DLL_EXPORT int taos_init(); DLL_EXPORT void taos_cleanup(void); DLL_EXPORT int taos_options(TSDB_OPTION option, const void *arg, ...); DLL_EXPORT TAOS *taos_connect(const char *ip, const char *user, const char *pass, const char *db, uint16_t port); @@ -157,7 +162,6 @@ DLL_EXPORT int taos_errno(TAOS_RES *tres); DLL_EXPORT void taos_query_a(TAOS *taos, const char *sql, void (*fp)(void *param, TAOS_RES *, int code), void *param); DLL_EXPORT void taos_fetch_rows_a(TAOS_RES *res, void (*fp)(void *param, TAOS_RES *, int numOfRows), void *param); -//DLL_EXPORT void taos_fetch_row_a(TAOS_RES *res, void (*fp)(void *param, TAOS_RES *, TAOS_ROW row), void *param); typedef void (*TAOS_SUBSCRIBE_CALLBACK)(TAOS_SUB* tsub, TAOS_RES *res, void* param, int code); DLL_EXPORT TAOS_SUB *taos_subscribe(TAOS* taos, int restart, const char* topic, const char *sql, TAOS_SUBSCRIBE_CALLBACK fp, void *param, int interval); diff --git a/include/common/common.h b/include/common/common.h index b438316f57..981614afc2 100644 --- a/include/common/common.h +++ b/include/common/common.h @@ -17,6 +17,8 @@ #define TDENGINE_COMMON_H #include "taosdef.h" +#include "taosmsg.h" +#include "tarray.h" //typedef struct STimeWindow { // TSKEY skey; @@ -36,4 +38,32 @@ // int16_t bytes; //} SSchema; +typedef struct SColumnDataAgg { + int16_t colId; + int64_t sum; + int64_t max; + int64_t min; + int16_t maxIndex; + int16_t minIndex; + int16_t numOfNull; +} SColumnDataAgg; + +typedef struct SDataBlockInfo { + STimeWindow window; + int32_t rows; + int32_t numOfCols; + int64_t uid; +} SDataBlockInfo; + +typedef struct SSDataBlock { + SColumnDataAgg *pBlockAgg; + SArray *pDataBlock; // SArray + SDataBlockInfo info; +} SSDataBlock; + +typedef struct SColumnInfoData { + SColumnInfo info; // TODO filter info needs to be removed + char *pData; // the corresponding block data in memory +} SColumnInfoData; + #endif // TDENGINE_COMMON_H diff --git a/include/common/taosmsg.h b/include/common/taosmsg.h index 561a81167f..b238bfa566 100644 --- a/include/common/taosmsg.h +++ b/include/common/taosmsg.h @@ -456,7 +456,6 @@ typedef struct SColumnInfo { typedef struct STableIdInfo { uint64_t uid; - int32_t tid; TSKEY key; // last accessed ts, for subscription } STableIdInfo; diff --git a/include/common/tvariant.h b/include/common/tvariant.h index 13c8aff8e7..7143d7b8fd 100644 --- a/include/common/tvariant.h +++ b/include/common/tvariant.h @@ -36,6 +36,8 @@ typedef struct SVariant { }; } SVariant; +int32_t toInteger(const char* z, int32_t n, int32_t base, int64_t* value, bool* issigned); + bool taosVariantIsValid(SVariant *pVar); void taosVariantCreate(SVariant *pVar, char* z, int32_t n, int32_t type); diff --git a/include/libs/catalog/catalog.h b/include/libs/catalog/catalog.h index c4a244adc4..050b9c904f 100644 --- a/include/libs/catalog/catalog.h +++ b/include/libs/catalog/catalog.h @@ -33,7 +33,6 @@ struct SCatalog; typedef struct SMetaReq { char clusterId[TSDB_CLUSTER_ID_LEN]; SArray *pTableName; // table full name - SArray *pVgroup; // vgroup id SArray *pUdf; // udf name bool qNodeEpset; // valid qnode } SMetaReq; @@ -60,7 +59,6 @@ typedef struct STableComInfo { typedef struct SCTableMeta { int32_t vgId:24; int8_t tableType; - uint32_t tid; uint64_t uid; uint64_t suid; } SCTableMeta; @@ -71,7 +69,6 @@ typedef struct SCTableMeta { typedef struct STableMeta { int32_t vgId:24; int8_t tableType; - uint32_t tid; uint64_t uid; uint64_t suid; // if the table is TSDB_CHILD_TABLE, the following information is acquired from the corresponding super table meta info diff --git a/include/libs/function/function.h b/include/libs/function/function.h new file mode 100644 index 0000000000..8c290dbced --- /dev/null +++ b/include/libs/function/function.h @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TDENGINE_TFUNCTION_H +#define TDENGINE_TFUNCTION_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "common.h" +#include "tvariant.h" +#include "tbuffer.h" + +#define FUNCTION_SCALAR 1 +#define FUNCTION_AGG 2 + +#define TOP_BOTTOM_QUERY_LIMIT 100 +#define FUNCTIONS_NAME_MAX_LENGTH 16 + +#define FUNCTION_INVALID_ID -1 +#define FUNCTION_COUNT 0 +#define FUNCTION_SUM 1 +#define FUNCTION_AVG 2 +#define FUNCTION_MIN 3 +#define FUNCTION_MAX 4 +#define FUNCTION_STDDEV 5 +#define FUNCTION_PERCT 6 +#define FUNCTION_APERCT 7 +#define FUNCTION_FIRST 8 +#define FUNCTION_LAST 9 +#define FUNCTION_LAST_ROW 10 +#define FUNCTION_TOP 11 +#define FUNCTION_BOTTOM 12 +#define FUNCTION_SPREAD 13 +#define FUNCTION_TWA 14 +#define FUNCTION_LEASTSQR 15 + +#define FUNCTION_TS 16 +#define FUNCTION_TS_DUMMY 17 +#define FUNCTION_TAG_DUMMY 18 +#define FUNCTION_TS_COMP 19 + +#define FUNCTION_TAG 20 +#define FUNCTION_PRJ 21 + +#define FUNCTION_TAGPRJ 22 +#define FUNCTION_ARITHM 23 +#define FUNCTION_DIFF 24 + +#define FUNCTION_FIRST_DST 25 +#define FUNCTION_LAST_DST 26 +#define FUNCTION_STDDEV_DST 27 +#define FUNCTION_INTERP 28 + +#define FUNCTION_RATE 29 +#define FUNCTION_IRATE 30 +#define FUNCTION_TID_TAG 31 +#define FUNCTION_DERIVATIVE 32 +#define FUNCTION_BLKINFO 33 + +#define FUNCTION_HISTOGRAM 34 +#define FUNCTION_HLL 35 +#define FUNCTION_MODE 36 +#define FUNCTION_SAMPLE 37 + +typedef struct SPoint1 { + int64_t key; + union{double val; char* ptr;}; +} SPoint1; + +struct SQLFunctionCtx; +struct SResultRowCellInfo; + +//for selectivity query, the corresponding tag value is assigned if the data is qualified +typedef struct SExtTagsInfo { + int16_t tagsLen; // keep the tags data for top/bottom query result + int16_t numOfTagCols; + struct SQLFunctionCtx **pTagCtxList; +} SExtTagsInfo; + +// sql function runtime context +typedef struct SQLFunctionCtx { + int32_t size; // number of rows + void * pInput; // input data buffer + uint32_t order; // asc|desc + int16_t inputType; + int16_t inputBytes; + + int16_t outputType; + int16_t outputBytes; // size of results, determined by function and input column data type + int32_t interBufBytes; // internal buffer size + bool hasNull; // null value exist in current block + bool requireNull; // require null in some function + bool stableQuery; + int16_t functionId; // function id + char * pOutput; // final result output buffer, point to sdata->data + uint8_t currentStage; // record current running step, default: 0 + int64_t startTs; // timestamp range of current query when function is executed on a specific data block + int32_t numOfParams; + SVariant param[4]; // input parameter, e.g., top(k, 20), the number of results for top query is kept in param + int64_t *ptsList; // corresponding timestamp array list + void *ptsOutputBuf; // corresponding output buffer for timestamp of each result, e.g., top/bottom*/ + SVariant tag; + + bool isSmaSet; + SColumnDataAgg sma; + struct SResultRowCellInfo *resultInfo; + SExtTagsInfo tagInfo; + SPoint1 start; + SPoint1 end; +} SQLFunctionCtx; + +enum { + TEXPR_NODE_DUMMY = 0x0, + TEXPR_BINARYEXPR_NODE= 0x1, + TEXPR_UNARYEXPR_NODE = 0x2, + TEXPR_COL_NODE = 0x4, + TEXPR_VALUE_NODE = 0x8, +}; + +typedef struct tExprNode { + uint8_t nodeType; + union { + struct { + union { + int32_t optr; // binary operator + int32_t functionId;// unary operator + }; + void *info; // support filter operation on this expression only available for leaf node + struct tExprNode *pLeft; // left child pointer + struct tExprNode *pRight; // right child pointer + } _node; + + SSchema *pSchema;// column node + struct SVariant *pVal; // value node + }; +} tExprNode; + +void exprTreeToBinary(SBufferWriter* bw, tExprNode* pExprTree); +void tExprTreeDestroy(tExprNode *pNode, void (*fp)(void *)); + +typedef struct SAggFunctionInfo { + char name[FUNCTIONS_NAME_MAX_LENGTH]; + int8_t type; // Scalar function or aggregation function + uint8_t functionId; // Function Id + int8_t sFunctionId; // Transfer function for super table query + uint16_t status; + + bool (*init)(SQLFunctionCtx *pCtx, struct SResultRowCellInfo* pResultCellInfo); // setup the execute environment + void (*exec)(SQLFunctionCtx *pCtx); + + // finalizer must be called after all exec has been executed to generated final result. + void (*xFinalize)(SQLFunctionCtx *pCtx); + void (*mergeFunc)(SQLFunctionCtx *pCtx); + + int32_t (*dataReqFunc)(SQLFunctionCtx *pCtx, STimeWindow* w, int32_t colId); +} SAggFunctionInfo; + +typedef struct SScalarFunctionInfo { + char name[FUNCTIONS_NAME_MAX_LENGTH]; + int8_t type; // scalar function or aggregation function + uint8_t functionId; // index of scalar function + + bool (*init)(SQLFunctionCtx *pCtx, struct SResultRowCellInfo* pResultCellInfo); // setup the execute environment + void (*exec)(SQLFunctionCtx *pCtx); +} SScalarFunctionInfo; + +typedef struct SResultDataInfo { + int16_t type; + int16_t bytes; + int32_t intermediateBytes; +} SResultDataInfo; + +int32_t getResultDataInfo(int32_t dataType, int32_t dataBytes, int32_t functionId, int32_t param, SResultDataInfo* pInfo, int16_t extLength, + bool isSuperTable); + +/** + * If the given name is a valid built-in sql function, the value of true will be returned. + * @param name + * @param len + * @return + */ +int32_t qIsBuiltinFunction(const char* name, int32_t len); + +bool qIsValidUdf(SArray* pUdfInfo, const char* name, int32_t len, int32_t* functionId); + +const char* qGetFunctionName(int32_t functionId); + +#ifdef __cplusplus +} +#endif + +#endif // TDENGINE_TFUNCTION_H diff --git a/include/libs/parser/parser.h b/include/libs/parser/parser.h index eeb5f4cd8e..b80eda0b86 100644 --- a/include/libs/parser/parser.h +++ b/include/libs/parser/parser.h @@ -23,6 +23,29 @@ extern "C" { #include "catalog.h" #include "common.h" #include "tname.h" +#include "tvariant.h" + +typedef struct SColumn { + uint64_t tableUid; + int32_t columnIndex; + SColumnInfo info; +} SColumn; + +// the structure for sql function in select clause +typedef struct SSqlExpr { + char token[TSDB_COL_NAME_LEN]; // original token + SSchema resSchema; + SColIndex colInfo; + uint64_t uid; // table uid, todo refactor use the pointer + int32_t interBytes; // inter result buffer size + int16_t numOfParams; // argument value of each function + SVariant param[3]; // parameters are not more than 3 +} SSqlExpr; + +typedef struct SExprInfo { + SSqlExpr base; + struct tExprNode *pExpr; +} SExprInfo; //typedef struct SInterval { // int32_t tz; // query client timezone @@ -95,10 +118,7 @@ typedef struct STagCond { typedef struct STableMetaInfo { STableMeta *pTableMeta; // table meta, cached in client side and acquired by name - uint32_t tableMetaSize; - size_t tableMetaCapacity; SVgroupsInfo *vgroupList; - SArray *pVgroupTables; // SArray /* * 1. keep the vgroup index during the multi-vnode super table projection query @@ -110,6 +130,20 @@ typedef struct STableMetaInfo { SArray *tagColList; // SArray, involved tag columns } STableMetaInfo; +typedef struct SQueryAttrInfo { + bool stableQuery; + bool groupbyColumn; + bool simpleAgg; + bool arithmeticOnAgg; + bool projectionQuery; + bool hasFilter; + bool onlyTagQuery; + bool orderProjectQuery; + bool stateWindow; + bool globalMerge; + bool multigroupResult; +} SQueryAttrInfo; + typedef struct SQueryStmtInfo { int16_t command; // the command may be different for each subclause, so keep it seperately. uint32_t type; // query/insert type @@ -152,19 +186,15 @@ typedef struct SQueryStmtInfo { SArray *pUpstream; // SArray struct SQueryStmtInfo *pDownstream; int32_t havingFieldNum; - bool stableQuery; - bool groupbyColumn; - bool simpleAgg; - bool arithmeticOnAgg; - bool projectionQuery; - bool hasFilter; - bool onlyTagQuery; - bool orderProjectQuery; - bool stateWindow; - bool globalMerge; - bool multigroupResult; + SQueryAttrInfo info; } SQueryStmtInfo; +typedef struct SColumnIndex { + int16_t tableIndex; + int16_t columnIndex; + int16_t type; // normal column/tag/ user input constant column +} SColumnIndex; + struct SInsertStmtInfo; /** @@ -206,6 +236,17 @@ int32_t qParseInsertSql(const char* pStr, size_t length, struct SInsertStmtInfo* */ int32_t qParserConvertSql(const char* pStr, size_t length, char** pConvertSql); +void assignExprInfo(SExprInfo* dst, const SExprInfo* src); +void columnListCopy(SArray* dst, const SArray* src, uint64_t uid); +void columnListDestroy(SArray* pColumnList); + +void dropAllExprInfo(SArray* pExprInfo); +SExprInfo* createExprInfo(STableMetaInfo* pTableMetaInfo, int16_t functionId, SColumnIndex* pColIndex, struct tExprNode* pParamExpr, SSchema* pResSchema, int16_t interSize); +int32_t copyExprInfoList(SArray* dst, const SArray* src, uint64_t uid, bool deepcopy); + +STableMetaInfo* getMetaInfo(SQueryStmtInfo* pQueryInfo, int32_t tableIndex); +int32_t getNewResColId(); + #ifdef __cplusplus } #endif diff --git a/include/libs/planner/planner.h b/include/libs/planner/planner.h index cbd9b6f89e..be2179d760 100644 --- a/include/libs/planner/planner.h +++ b/include/libs/planner/planner.h @@ -24,8 +24,8 @@ extern "C" { #define QUERY_TYPE_PARTIAL 2 struct SEpSet; -struct SQueryNode; -struct SQueryPhyNode; +struct SQueryPlanNode; +struct SQueryDistPlanNode; struct SQueryStmtInfo; typedef struct SSubquery { @@ -33,7 +33,7 @@ typedef struct SSubquery { int32_t type; // QUERY_TYPE_MERGE|QUERY_TYPE_PARTIAL int32_t level; // the execution level of current subquery, starting from 0. SArray *pUpstream; // the upstream,from which to fetch the result - struct SQueryPhyNode *pNode; // physical plan of current subquery + struct SQueryDistPlanNode *pNode; // physical plan of current subquery } SSubquery; typedef struct SQueryJob { @@ -48,7 +48,7 @@ typedef struct SQueryJob { * @param pQueryNode * @return */ -int32_t qOptimizeQueryPlan(struct SQueryNode* pQueryNode); +int32_t qOptimizeQueryPlan(struct SQueryPlanNode* pQueryNode); /** * Create the query plan according to the bound AST, which is in the form of pQueryInfo @@ -56,14 +56,14 @@ int32_t qOptimizeQueryPlan(struct SQueryNode* pQueryNode); * @param pQueryNode * @return */ -int32_t qCreateQueryPlan(const struct SQueryStmtInfo* pQueryInfo, struct SQueryNode* pQueryNode); +int32_t qCreateQueryPlan(const struct SQueryStmtInfo* pQueryInfo, struct SQueryPlanNode* pQueryNode); /** * Convert the query plan to string, in order to display it in the shell. * @param pQueryNode * @return */ -int32_t qQueryPlanToString(struct SQueryNode* pQueryNode, char** str); +int32_t qQueryPlanToString(struct SQueryPlanNode* pQueryNode, char** str); /** * Restore the SQL statement according to the logic query plan. @@ -71,7 +71,7 @@ int32_t qQueryPlanToString(struct SQueryNode* pQueryNode, char** str); * @param sql * @return */ -int32_t qQueryPlanToSql(struct SQueryNode* pQueryNode, char** sql); +int32_t qQueryPlanToSql(struct SQueryPlanNode* pQueryNode, char** sql); /** * Create the physical plan for the query, according to the logic plan. @@ -79,7 +79,7 @@ int32_t qQueryPlanToSql(struct SQueryNode* pQueryNode, char** sql); * @param pPhyNode * @return */ -int32_t qCreatePhysicalPlan(struct SQueryNode* pQueryNode, struct SEpSet* pQnode, struct SQueryPhyNode *pPhyNode); +int32_t qCreatePhysicalPlan(struct SQueryPlanNode* pQueryNode, struct SEpSet* pQnode, struct SQueryDistPlanNode *pPhyNode); /** * Convert to physical plan to string to enable to print it out in the shell. @@ -87,20 +87,20 @@ int32_t qCreatePhysicalPlan(struct SQueryNode* pQueryNode, struct SEpSet* pQnode * @param str * @return */ -int32_t qPhyPlanToString(struct SQueryPhyNode *pPhyNode, char** str); +int32_t qPhyPlanToString(struct SQueryDistPlanNode *pPhyNode, char** str); /** * Destroy the query plan object. * @return */ -void* qDestroyQueryPlan(struct SQueryNode* pQueryNode); +void* qDestroyQueryPlan(struct SQueryPlanNode* pQueryNode); /** * Destroy the physical plan. * @param pQueryPhyNode * @return */ -void* qDestroyQueryPhyPlan(struct SQueryPhyNode* pQueryPhyNode); +void* qDestroyQueryPhyPlan(struct SQueryDistPlanNode* pQueryPhyNode); /** * Create the query job from the physical execution plan @@ -108,7 +108,7 @@ void* qDestroyQueryPhyPlan(struct SQueryPhyNode* pQueryPhyNode); * @param pJob * @return */ -int32_t qCreateQueryJob(const struct SQueryPhyNode* pPhyNode, struct SQueryJob** pJob); +int32_t qCreateQueryJob(const struct SQueryDistPlanNode* pPhyNode, struct SQueryJob** pJob); #ifdef __cplusplus } diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index 454a2ecfa8..a0602ec1b0 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -62,24 +62,24 @@ typedef struct SSyncFSM { void* pData; // apply committed log, bufs will be free by raft module - int (*applyLog)(struct SSyncFSM *fsm, SyncIndex index, const SSyncBuffer *buf, void *pData); + int (*applyLog)(struct SSyncFSM* fsm, SyncIndex index, const SSyncBuffer* buf, void* pData); - // cluster commit callback - int (*onClusterChanged)(struct SSyncFSM *fsm, const SSyncCluster* cluster, void *pData); + // cluster commit callback + int (*onClusterChanged)(struct SSyncFSM* fsm, const SSyncCluster* cluster, void* pData); // fsm return snapshot in ppBuf, bufs will be free by raft module // TODO: getSnapshot SHOULD be async? - int (*getSnapshot)(struct SSyncFSM *fsm, SSyncBuffer **ppBuf, int* objId, bool *isLast); + int (*getSnapshot)(struct SSyncFSM* fsm, SSyncBuffer** ppBuf, int* objId, bool* isLast); // fsm apply snapshot with pBuf data - int (*applySnapshot)(struct SSyncFSM *fsm, SSyncBuffer *pBuf, int objId, bool isLast); + int (*applySnapshot)(struct SSyncFSM* fsm, SSyncBuffer* pBuf, int objId, bool isLast); // call when restore snapshot and log done - int (*onRestoreDone)(struct SSyncFSM *fsm); + int (*onRestoreDone)(struct SSyncFSM* fsm); - void (*onRollback)(struct SSyncFSM *fsm, SyncIndex index, const SSyncBuffer *buf); + void (*onRollback)(struct SSyncFSM* fsm, SyncIndex index, const SSyncBuffer* buf); - void (*onRoleChanged)(struct SSyncFSM *fsm, const SNodesRole* pRole); + void (*onRoleChanged)(struct SSyncFSM* fsm, const SNodesRole* pRole); } SSyncFSM; diff --git a/include/os/os.h b/include/os/os.h index e5731c79c7..ac36611a1b 100644 --- a/include/os/os.h +++ b/include/os/os.h @@ -44,6 +44,7 @@ extern "C" { #include #include #include +#include #include "osAtomic.h" #include "osDef.h" diff --git a/include/util/tdef.h b/include/util/tdef.h index 2cac7fc7f4..21a70c9d91 100644 --- a/include/util/tdef.h +++ b/include/util/tdef.h @@ -24,26 +24,20 @@ extern "C" { #define TSDB__packed -#ifdef TSKEY32 -#define TSKEY int32_t; -#else #define TSKEY int64_t -#endif - #define TSKEY_INITIAL_VAL INT64_MIN // Bytes for each type. extern const int32_t TYPE_BYTES[15]; // TODO: replace and remove code below -#define CHAR_BYTES sizeof(char) -#define SHORT_BYTES sizeof(int16_t) -#define INT_BYTES sizeof(int32_t) -#define LONG_BYTES sizeof(int64_t) -#define FLOAT_BYTES sizeof(float) -#define DOUBLE_BYTES sizeof(double) -#define POINTER_BYTES sizeof(void *) // 8 by default assert(sizeof(ptrdiff_t) == sizseof(void*) - +#define CHAR_BYTES sizeof(char) +#define SHORT_BYTES sizeof(int16_t) +#define INT_BYTES sizeof(int32_t) +#define LONG_BYTES sizeof(int64_t) +#define FLOAT_BYTES sizeof(float) +#define DOUBLE_BYTES sizeof(double) +#define POINTER_BYTES sizeof(void *) // 8 by default assert(sizeof(ptrdiff_t) == sizseof(void*) #define TSDB_KEYSIZE sizeof(TSKEY) #define TSDB_NCHAR_SIZE sizeof(int32_t) @@ -88,10 +82,11 @@ extern const int32_t TYPE_BYTES[15]; #define TSDB_ERR -1 #define TS_PATH_DELIMITER "." +#define TS_ESCAPE_CHAR '`' -#define TSDB_TIME_PRECISION_MILLI 0 -#define TSDB_TIME_PRECISION_MICRO 1 -#define TSDB_TIME_PRECISION_NANO 2 +#define TSDB_TIME_PRECISION_MILLI 0 +#define TSDB_TIME_PRECISION_MICRO 1 +#define TSDB_TIME_PRECISION_NANO 2 #define TSDB_TIME_PRECISION_MILLI_STR "ms" #define TSDB_TIME_PRECISION_MICRO_STR "us" @@ -132,11 +127,12 @@ do { \ #define TSDB_RELATION_MATCH 14 #define TSDB_RELATION_NMATCH 15 -#define TSDB_BINARY_OP_ADD 30 -#define TSDB_BINARY_OP_SUBTRACT 31 -#define TSDB_BINARY_OP_MULTIPLY 32 -#define TSDB_BINARY_OP_DIVIDE 33 -#define TSDB_BINARY_OP_REMAINDER 34 +#define TSDB_BINARY_OP_ADD 4000 +#define TSDB_BINARY_OP_SUBTRACT 4001 +#define TSDB_BINARY_OP_MULTIPLY 4002 +#define TSDB_BINARY_OP_DIVIDE 4003 +#define TSDB_BINARY_OP_REMAINDER 4004 +#define TSDB_BINARY_OP_CONCAT 4005 #define IS_RELATION_OPTR(op) (((op) >= TSDB_RELATION_LESS) && ((op) < TSDB_RELATION_IN)) @@ -386,44 +382,6 @@ do { \ #define TSDB_DATA_TYPE_UINT 13 // 4 bytes #define TSDB_DATA_TYPE_UBIGINT 14 // 8 bytes -// ----------------- For variable data types such as TSDB_DATA_TYPE_BINARY and TSDB_DATA_TYPE_NCHAR - -//typedef int32_t VarDataOffsetT; -//typedef int16_t VarDataLenT; // maxVarDataLen: 32767 -//typedef uint16_t TDRowLenT; // not including overhead: 0 ~ 65535 -//typedef uint32_t TDRowTLenT; // total length, including overhead -// -//typedef struct tstr { -// VarDataLenT len; -// char data[]; -//} tstr; -// -//#pragma pack(push, 1) -//typedef struct { -// VarDataLenT len; -// uint8_t data; -//} SBinaryNullT; -// -//typedef struct { -// VarDataLenT len; -// uint32_t data; -//} SNCharNullT; -//#pragma pack(pop) -// -//#define VARSTR_HEADER_SIZE sizeof(VarDataLenT) -// -//#define varDataLen(v) ((VarDataLenT *)(v))[0] -//#define varDataTLen(v) (sizeof(VarDataLenT) + varDataLen(v)) -//#define varDataVal(v) ((void *)((char *)v + VARSTR_HEADER_SIZE)) -//#define varDataCopy(dst, v) memcpy((dst), (void*) (v), varDataTLen(v)) -//#define varDataLenByData(v) (*(VarDataLenT *)(((char*)(v)) - VARSTR_HEADER_SIZE)) -//#define varDataSetLen(v, _len) (((VarDataLenT *)(v))[0] = (VarDataLenT) (_len)) -//#define IS_VAR_DATA_TYPE(t) (((t) == TSDB_DATA_TYPE_BINARY) || ((t) == TSDB_DATA_TYPE_NCHAR)) -// -//#define varDataNetLen(v) (htons(((VarDataLenT *)(v))[0])) -//#define varDataNetTLen(v) (sizeof(VarDataLenT) + varDataNetLen(v)) - - #ifdef __cplusplus } #endif diff --git a/include/util/tpagedfile.h b/include/util/tpagedfile.h new file mode 100644 index 0000000000..5bc4dc92a0 --- /dev/null +++ b/include/util/tpagedfile.h @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TDENGINE_TPAGEDFILE_H +#define TDENGINE_TPAGEDFILE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "tlist.h" +#include "thash.h" +#include "os.h" +#include "tlockfree.h" + +typedef struct SArray* SIDList; + +typedef struct SPageDiskInfo { + int32_t offset; + int32_t length; +} SPageDiskInfo; + +typedef struct SPageInfo { + SListNode* pn; // point to list node + int32_t pageId; + SPageDiskInfo info; + void* pData; + bool used; // set current page is in used +} SPageInfo; + +typedef struct SFreeListItem { + int32_t offset; + int32_t len; +} SFreeListItem; + +typedef struct SResultBufStatis { + int32_t flushBytes; + int32_t loadBytes; + int32_t getPages; + int32_t releasePages; + int32_t flushPages; +} SResultBufStatis; + +typedef struct SDiskbasedResultBuf { + int32_t numOfPages; + int64_t totalBufSize; + int64_t fileSize; // disk file size + FILE* file; + int32_t allocateId; // allocated page id + char* path; // file path + int32_t pageSize; // current used page size + int32_t inMemPages; // numOfPages that are allocated in memory + SHashObj* groupSet; // id hash table + SHashObj* all; + SList* lruList; + void* emptyDummyIdList; // dummy id list + void* assistBuf; // assistant buffer for compress/decompress data + SArray* pFree; // free area in file + bool comp; // compressed before flushed to disk + int32_t nextPos; // next page flush position + + uint64_t qId; // for debug purpose + SResultBufStatis statis; +} SDiskbasedResultBuf; + +#define DEFAULT_INTERN_BUF_PAGE_SIZE (1024L) // in bytes +#define PAGE_INFO_INITIALIZER (SPageDiskInfo){-1, -1} +#define DEFAULT_PAGE_SIZE (16384L) + +typedef struct SFilePage { + int64_t num; + char data[]; +} SFilePage; + +/** + * create disk-based result buffer + * @param pResultBuf + * @param rowSize + * @param pagesize + * @param inMemPages + * @param handle + * @return + */ +int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t pagesize, int32_t inMemBufSize, uint64_t qId, const char* dir); + +/** + * + * @param pResultBuf + * @param groupId + * @param pageId + * @return + */ +SFilePage* getNewDataBuf(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t* pageId); + +/** + * + * @param pResultBuf + * @param groupId + * @return + */ +SIDList getDataBufPagesIdList(SDiskbasedResultBuf* pResultBuf, int32_t groupId); + +/** + * get the specified buffer page by id + * @param pResultBuf + * @param id + * @return + */ +SFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id); + +/** + * release the referenced buf pages + * @param pResultBuf + * @param page + */ +void releaseResBufPage(SDiskbasedResultBuf* pResultBuf, void* page); + +/** + * + * @param pResultBuf + * @param pi + */ +void releaseResBufPageInfo(SDiskbasedResultBuf* pResultBuf, SPageInfo* pi); + + +/** + * get the total buffer size in the format of disk file + * @param pResultBuf + * @return + */ +size_t getResBufSize(const SDiskbasedResultBuf* pResultBuf); + +/** + * get the number of groups in the result buffer + * @param pResultBuf + * @return + */ +size_t getNumOfResultBufGroupId(const SDiskbasedResultBuf* pResultBuf); + +/** + * destroy result buffer + * @param pResultBuf + */ +void destroyResultBuf(SDiskbasedResultBuf* pResultBuf); + +/** + * + * @param pList + * @return + */ +SPageInfo* getLastPageInfo(SIDList pList); + +#ifdef __cplusplus +} +#endif + +#endif // TDENGINE_TPAGEDFILE_H diff --git a/include/util/tskiplist.h b/include/util/tskiplist.h index 8e991727a7..02db8cb534 100644 --- a/include/util/tskiplist.h +++ b/include/util/tskiplist.h @@ -21,7 +21,7 @@ extern "C" { #endif #include "os.h" -#include "tdef.h" +//#include "tdef.h" #include "tarray.h" #include "tfunctional.h" diff --git a/include/util/tutil.h b/include/util/tutil.h index 202c55ab9c..8dbcb7e8d5 100644 --- a/include/util/tutil.h +++ b/include/util/tutil.h @@ -26,6 +26,7 @@ extern "C" { #include "tdef.h" int32_t strdequote(char *src); +int32_t strndequote(char *dst, const char* z, int32_t len); int32_t strRmquote(char *z, int32_t len); size_t strtrim(char *src); char * strnchr(char *haystack, char needle, int32_t len, bool skipquote); @@ -40,9 +41,6 @@ char * paGetToken(char *src, char **token, int32_t *tokenLen); int32_t taosByteArrayToHexStr(char bytes[], int32_t len, char hexstr[]); int32_t taosHexStrToByteArray(char hexstr[], char bytes[]); -//bool taosGetVersionNumber(char *versionStr, int *versionNubmer); -//int taosCheckVersion(char *input_client_version, char *input_server_version, int compared_segments); - char * taosIpStr(uint32_t ipInt); uint32_t ip2uint(const char *const ip_addr); diff --git a/source/common/CMakeLists.txt b/source/common/CMakeLists.txt index d03231fcfb..1ff83b091d 100644 --- a/source/common/CMakeLists.txt +++ b/source/common/CMakeLists.txt @@ -10,4 +10,6 @@ target_link_libraries( PUBLIC os PUBLIC util INTERFACE api -) \ No newline at end of file +) + +ADD_SUBDIRECTORY(test) diff --git a/source/common/src/sqlcmdstr.c b/source/common/src/tmsgtype.c similarity index 100% rename from source/common/src/sqlcmdstr.c rename to source/common/src/tmsgtype.c diff --git a/source/common/src/tname.c b/source/common/src/tname.c index c5bbda3c8c..c290a04ebc 100644 --- a/source/common/src/tname.c +++ b/source/common/src/tname.c @@ -120,29 +120,6 @@ int64_t taosGetIntervalStartTimestamp(int64_t startTime, int64_t slidingTime, in #endif -/* - * tablePrefix.columnName - * extract table name and save it in pTable, with only column name in pToken - */ -//void extractTableNameFromToken(SStrToken* pToken, SStrToken* pTable) { -// const char sep = TS_PATH_DELIMITER[0]; -// -// if (pToken == pTable || pToken == NULL || pTable == NULL) { -// return; -// } -// -// char* r = strnchr(pToken->z, sep, pToken->n, false); -// -// if (r != NULL) { // record the table name token -// pTable->n = (uint32_t)(r - pToken->z); -// pTable->z = pToken->z; -// -// r += 1; -// pToken->n -= (uint32_t)(r - pToken->z); -// pToken->z = r; -// } -//} - static struct SSchema _s = { .colId = TSDB_TBNAME_COLUMN_INDEX, .type = TSDB_DATA_TYPE_BINARY, diff --git a/source/common/src/ttime.c b/source/common/src/ttime.c index 0aa2aef4be..2686dcd205 100644 --- a/source/common/src/ttime.c +++ b/source/common/src/ttime.c @@ -416,12 +416,14 @@ static int32_t getDuration(int64_t val, char unit, int64_t* result, int32_t time return -1; } } - - /* get the value in microsecond */ return 0; } /* + * n - months + * y - Years + * is not allowed, since the duration of month or year are both variable. + * * b - nanoseconds; * u - microseconds; * a - Millionseconds @@ -430,8 +432,6 @@ static int32_t getDuration(int64_t val, char unit, int64_t* result, int32_t time * h - Hours * d - Days (24 hours) * w - Weeks (7 days) - * n - Months (30 days) - * y - Years (365 days) */ int32_t parseAbsoluteDuration(char* token, int32_t tokenlen, int64_t* duration, char* unit, int32_t timePrecision) { errno = 0; diff --git a/source/common/src/tvariant.c b/source/common/src/tvariant.c index 9216183b88..a218353ae3 100644 --- a/source/common/src/tvariant.c +++ b/source/common/src/tvariant.c @@ -15,20 +15,65 @@ #include "os.h" #include "taos.h" -#include "thash.h" #include "taosdef.h" +#include "thash.h" #include "ttime.h" #include "ttokendef.h" #include "ttypes.h" #include "tutil.h" #include "tvariant.h" -#define SET_EXT_INFO(converted, res, minv, maxv, exti) do { \ - if (converted == NULL || exti == NULL || *converted == false) { break; } \ - if ((res) < (minv)) { *exti = -1; break; } \ - if ((res) > (maxv)) { *exti = 1; break; } \ - assert(0); \ - } while (0) +#define SET_EXT_INFO(converted, res, minv, maxv, exti) \ + do { \ + if (converted == NULL || exti == NULL || *converted == false) { \ + break; \ + } \ + if ((res) < (minv)) { \ + *exti = -1; \ + break; \ + } \ + if ((res) > (maxv)) { \ + *exti = 1; \ + break; \ + } \ + assert(0); \ + } while (0) + +int32_t toInteger(const char* z, int32_t n, int32_t base, int64_t* value, bool* isSigned) { + errno = 0; + char* endPtr = NULL; + + int32_t index = 0; + + bool specifiedSign = (z[0] == '+' || z[0] == '-'); + if (specifiedSign) { + *isSigned = true; + index = 1; + } + + uint64_t val = strtoull(&z[index], &endPtr, base); + if (errno == ERANGE || errno == EINVAL) { + errno = 0; + return -1; + } + + if (specifiedSign && val > INT64_MAX) { + return -1; + } + + if (endPtr - &z[index] != n - index) { + return -1; + } + + *isSigned = specifiedSign || (val <= INT64_MAX); + if (*isSigned) { + *value = (z[0] == '-')? -val:val; + } else { + *(uint64_t*) value = val; + } + + return 0; +} void taosVariantCreate(SVariant *pVar, char* z, int32_t n, int32_t type) { int32_t ret = 0; @@ -43,7 +88,6 @@ void taosVariantCreate(SVariant *pVar, char* z, int32_t n, int32_t type) { } else { return; } - break; } @@ -51,38 +95,38 @@ void taosVariantCreate(SVariant *pVar, char* z, int32_t n, int32_t type) { case TSDB_DATA_TYPE_SMALLINT: case TSDB_DATA_TYPE_BIGINT: case TSDB_DATA_TYPE_INT:{ -// ret = tStrToInteger(token->z, token->type, token->n, &pVar->i64, true); -// if (ret != 0) { -// SToken t = {0}; -// tGetToken(token->z, &t.type); -// if (t.type == TK_MINUS) { // it is a signed number which is greater than INT64_MAX or less than INT64_MIN -// pVar->nType = -1; // -1 means error type -// return; -// } -// -// // data overflow, try unsigned parse the input number -// ret = tStrToInteger(token->z, token->type, token->n, &pVar->i64, false); -// if (ret != 0) { -// pVar->nType = -1; // -1 means error type -// return; -// } -// } + bool sign = true; + int32_t base = 10; + if (type == TK_HEX) { + base = 16; + } else if (type == TK_OCT) { + base = 8; + } else if (type == TK_BIN) { + base = 2; + } + + ret = toInteger(z, n, base, &pVar->i64, &sign); + if (ret != 0) { + pVar->nType = -1; // -1 means error type + return; + } + + pVar->nType = (sign)? TSDB_DATA_TYPE_BIGINT:TSDB_DATA_TYPE_UBIGINT; break; } - case TSDB_DATA_TYPE_DOUBLE: case TSDB_DATA_TYPE_FLOAT: { pVar->d = strtod(z, NULL); break; } - case TSDB_DATA_TYPE_BINARY: { pVar->pz = strndup(z, n); pVar->nLen = strRmquote(pVar->pz, n); break; } case TSDB_DATA_TYPE_TIMESTAMP: { + assert(0); pVar->i64 = taosGetTimestamp(TSDB_TIME_PRECISION_NANO); break; } @@ -95,7 +139,6 @@ void taosVariantCreate(SVariant *pVar, char* z, int32_t n, int32_t type) { pVar->nType = type; } - /** * create SVariant from binary string, not ascii data * @param pVar @@ -901,7 +944,7 @@ int32_t taosVariantDump(SVariant *pVariant, char *payload, int16_t type, bool in * * It is actually the bigint/binary/bool/nchar type transfer */ -int32_t tVariantTypeSetType(SVariant *pVariant, char type) { +int32_t taosVariantTypeSetType(SVariant *pVariant, char type) { if (pVariant == NULL || pVariant->nType == 0) { // value is not set return 0; } diff --git a/source/common/src/versionUtil.c b/source/common/src/versionUtil.c new file mode 100644 index 0000000000..22c50fa5cf --- /dev/null +++ b/source/common/src/versionUtil.c @@ -0,0 +1,63 @@ +#include "os.h" +#include "tdef.h" +#include "ulog.h" +#include "taoserror.h" + +bool taosGetVersionNumber(char *versionStr, int *versionNubmer) { + if (versionStr == NULL || versionNubmer == NULL) { + return false; + } + + int versionNumberPos[5] = {0}; + int len = (int)strlen(versionStr); + int dot = 0; + for (int pos = 0; pos < len && dot < 4; ++pos) { + if (versionStr[pos] == '.') { + versionStr[pos] = 0; + versionNumberPos[++dot] = pos + 1; + } + } + + if (dot != 3) { + return false; + } + + for (int pos = 0; pos < 4; ++pos) { + versionNubmer[pos] = atoi(versionStr + versionNumberPos[pos]); + } + versionStr[versionNumberPos[1] - 1] = '.'; + versionStr[versionNumberPos[2] - 1] = '.'; + versionStr[versionNumberPos[3] - 1] = '.'; + + return true; +} + +int taosCheckVersion(char *input_client_version, char *input_server_version, int comparedSegments) { + char client_version[TSDB_VERSION_LEN] = {0}; + char server_version[TSDB_VERSION_LEN] = {0}; + int clientVersionNumber[4] = {0}; + int serverVersionNumber[4] = {0}; + + tstrncpy(client_version, input_client_version, sizeof(client_version)); + tstrncpy(server_version, input_server_version, sizeof(server_version)); + + if (!taosGetVersionNumber(client_version, clientVersionNumber)) { + uError("invalid client version:%s", client_version); + return TSDB_CODE_TSC_INVALID_VERSION; + } + + if (!taosGetVersionNumber(server_version, serverVersionNumber)) { + uError("invalid server version:%s", server_version); + return TSDB_CODE_TSC_INVALID_VERSION; + } + + for(int32_t i = 0; i < comparedSegments; ++i) { + if (clientVersionNumber[i] != serverVersionNumber[i]) { + uError("the %d-th number of server version:%s not matched with client version:%s", i, server_version, + client_version); + return TSDB_CODE_TSC_INVALID_VERSION; + } + } + + return 0; +} diff --git a/source/common/test/CMakeLists.txt b/source/common/test/CMakeLists.txt new file mode 100644 index 0000000000..f4c849780d --- /dev/null +++ b/source/common/test/CMakeLists.txt @@ -0,0 +1,18 @@ + +MESSAGE(STATUS "build parser unit test") + +# GoogleTest requires at least C++11 +SET(CMAKE_CXX_STANDARD 11) +AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} SOURCE_LIST) + +ADD_EXECUTABLE(commonTest ${SOURCE_LIST}) +TARGET_LINK_LIBRARIES( + commonTest + PUBLIC os util common gtest +) + +TARGET_INCLUDE_DIRECTORIES( + commonTest + PUBLIC "${CMAKE_SOURCE_DIR}/include/libs/common/" + PRIVATE "${CMAKE_SOURCE_DIR}/source/libs/common/inc" +) diff --git a/source/common/test/commonTests.cpp b/source/common/test/commonTests.cpp index e69de29bb2..2f821ee8b9 100644 --- a/source/common/test/commonTests.cpp +++ b/source/common/test/commonTests.cpp @@ -0,0 +1,96 @@ +#include +#include +#pragma GCC diagnostic ignored "-Wwrite-strings" + +#pragma GCC diagnostic ignored "-Wunused-function" +#pragma GCC diagnostic ignored "-Wunused-variable" +#pragma GCC diagnostic ignored "-Wunused-but-set-variable" +#pragma GCC diagnostic ignored "-Wsign-compare" +#include "os.h" + +#include "taos.h" +#include "tvariant.h" +#include "tdef.h" + +namespace { +// +} // namespace + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + +TEST(testCase, toInteger_test) { + char* s = "123"; + uint32_t type = 0; + + int64_t val = 0; + bool sign = true; + + int32_t ret = toInteger(s, strlen(s), 10, &val, &sign); + ASSERT_EQ(ret, 0); + ASSERT_EQ(val, 123); + ASSERT_EQ(sign, true); + + s = "9223372036854775807"; + ret = toInteger(s, strlen(s), 10, &val, &sign); + ASSERT_EQ(ret, 0); + ASSERT_EQ(val, 9223372036854775807); + ASSERT_EQ(sign, true); + + s = "9323372036854775807"; + ret = toInteger(s, strlen(s), 10, &val, &sign); + ASSERT_EQ(ret, 0); + ASSERT_EQ(val, 9323372036854775807u); + ASSERT_EQ(sign, false); + + s = "-9323372036854775807"; + ret = toInteger(s, strlen(s), 10, &val, &sign); + ASSERT_EQ(ret, -1); + + s = "-1"; + ret = toInteger(s, strlen(s), 10, &val, &sign); + ASSERT_EQ(ret, 0); + ASSERT_EQ(val, -1); + ASSERT_EQ(sign, true); + + s = "-9223372036854775807"; + ret = toInteger(s, strlen(s), 10, &val, &sign); + ASSERT_EQ(ret, 0); + ASSERT_EQ(val, -9223372036854775807); + ASSERT_EQ(sign, true); + + s = "1000u"; + ret = toInteger(s, strlen(s), 10, &val, &sign); + ASSERT_EQ(ret, -1); + + s = "0x10"; + ret = toInteger(s, strlen(s), 16, &val, &sign); + ASSERT_EQ(ret, 0); + ASSERT_EQ(val, 16); + ASSERT_EQ(sign, true); + + s = "110"; + ret = toInteger(s, strlen(s), 2, &val, &sign); + ASSERT_EQ(ret, 0); + ASSERT_EQ(val, 6); + ASSERT_EQ(sign, true); + + s = "110"; + ret = toInteger(s, strlen(s), 8, &val, &sign); + ASSERT_EQ(ret, 0); + ASSERT_EQ(val, 72); + ASSERT_EQ(sign, true); + + //18446744073709551615 UINT64_MAX + s = "18446744073709551615"; + ret = toInteger(s, strlen(s), 10, &val, &sign); + ASSERT_EQ(ret, 0); + ASSERT_EQ(val, 18446744073709551615u); + ASSERT_EQ(sign, false); + + s = "18446744073709551616"; + ret = toInteger(s, strlen(s), 10, &val, &sign); + ASSERT_EQ(ret, -1); +} diff --git a/source/libs/CMakeLists.txt b/source/libs/CMakeLists.txt index 3a975e679b..eeaac61e21 100644 --- a/source/libs/CMakeLists.txt +++ b/source/libs/CMakeLists.txt @@ -8,4 +8,5 @@ add_subdirectory(scheduler) add_subdirectory(lru) add_subdirectory(catalog) add_subdirectory(executor) -add_subdirectory(planner) \ No newline at end of file +add_subdirectory(planner) +add_subdirectory(function) \ No newline at end of file diff --git a/source/libs/catalog/src/catalog.c b/source/libs/catalog/src/catalog.c index c553e1bfbf..21d46090e5 100644 --- a/source/libs/catalog/src/catalog.c +++ b/source/libs/catalog/src/catalog.c @@ -14,3 +14,11 @@ */ #include "catalogInt.h" + +struct SCatalog* getCatalogHandle(const SEpSet* pMgmtEps) { + return NULL; +} + +int32_t catalogGetMetaData(struct SCatalog* pCatalog, const SMetaReq* pMetaReq, SMetaData* pMetaData) { + return 0; +} diff --git a/source/libs/function/CMakeLists.txt b/source/libs/function/CMakeLists.txt new file mode 100644 index 0000000000..9fbfc82e3c --- /dev/null +++ b/source/libs/function/CMakeLists.txt @@ -0,0 +1,12 @@ +aux_source_directory(src FUNCTION_SRC) +add_library(function ${FUNCTION_SRC}) +target_include_directories( + function + PUBLIC "${CMAKE_SOURCE_DIR}/include/libs/function" + PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/inc" +) + +target_link_libraries( + function + PRIVATE os util common +) \ No newline at end of file diff --git a/source/libs/function/inc/taggfunction.h b/source/libs/function/inc/taggfunction.h new file mode 100644 index 0000000000..0ebba4cd8e --- /dev/null +++ b/source/libs/function/inc/taggfunction.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TDENGINE_TAGGFUNCTION_H +#define TDENGINE_TAGGFUNCTION_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "os.h" + +#include "tname.h" +#include "taosdef.h" +#include "tvariant.h" +#include "function.h" +#include "tudf.h" + +extern SAggFunctionInfo aggFunc[34]; + +typedef struct SResultRowCellInfo { + int8_t hasResult; // result generated, not NULL value + bool initialized; // output buffer has been initialized + bool complete; // query has completed + uint32_t numOfRes; // num of output result in current buffer +} SResultRowCellInfo; + +#define FUNCSTATE_SO 0x0u +#define FUNCSTATE_MO 0x1u // dynamic number of output, not multinumber of output e.g., TOP/BOTTOM +#define FUNCSTATE_STREAM 0x2u // function avail for stream +#define FUNCSTATE_STABLE 0x4u // function avail for super table +#define FUNCSTATE_NEED_TS 0x8u // timestamp is required during query processing +#define FUNCSTATE_SELECTIVITY 0x10u // selectivity functions, can exists along with tag columns + +#define BASIC_FUNC_SO FUNCSTATE_SO | FUNCSTATE_STREAM | FUNCSTATE_STABLE +#define BASIC_FUNC_MO FUNCSTATE_MO | FUNCSTATE_STREAM | FUNCSTATE_STABLE + +#define AVG_FUNCTION_INTER_BUFFER_SIZE 50 + +#define DATA_SET_FLAG ',' // to denote the output area has data, not null value +#define DATA_SET_FLAG_SIZE sizeof(DATA_SET_FLAG) + +#define QUERY_ASC_FORWARD_STEP 1 +#define QUERY_DESC_FORWARD_STEP -1 + +#define GET_FORWARD_DIRECTION_FACTOR(ord) (((ord) == TSDB_ORDER_ASC) ? QUERY_ASC_FORWARD_STEP : QUERY_DESC_FORWARD_STEP) + +#define MAX_INTERVAL_TIME_WINDOW 1000000 // maximum allowed time windows in final results +#define TOP_BOTTOM_QUERY_LIMIT 100 + +enum { + MASTER_SCAN = 0x0u, + REVERSE_SCAN = 0x1u, + REPEAT_SCAN = 0x2u, //repeat scan belongs to the master scan + MERGE_STAGE = 0x20u, +}; + +#define QUERY_IS_STABLE_QUERY(type) (((type)&TSDB_QUERY_TYPE_STABLE_QUERY) != 0) +#define QUERY_IS_JOIN_QUERY(type) (TSDB_QUERY_HAS_TYPE(type, TSDB_QUERY_TYPE_JOIN_QUERY)) +#define QUERY_IS_PROJECTION_QUERY(type) (((type)&TSDB_QUERY_TYPE_PROJECTION_QUERY) != 0) +#define QUERY_IS_FREE_RESOURCE(type) (((type)&TSDB_QUERY_TYPE_FREE_RESOURCE) != 0) + +typedef struct SArithmeticSupport { + struct SExprInfo *pExprInfo; + int32_t numOfCols; + SColumnInfo *colList; + void *exprList; // client side used + int32_t offset; + char** data; +} SArithmeticSupport; + +typedef struct SInterpInfoDetail { + TSKEY ts; // interp specified timestamp + int8_t type; + int8_t primaryCol; +} SInterpInfoDetail; + +#define GET_ROWCELL_INTERBUF(_c) ((void*) ((char*)(_c) + sizeof(SResultRowCellInfo))) + +#define GET_RES_INFO(ctx) ((ctx)->resultInfo) + +#define IS_STREAM_QUERY_VALID(x) (((x)&TSDB_FUNCSTATE_STREAM) != 0) +#define IS_MULTIOUTPUT(x) (((x)&TSDB_FUNCSTATE_MO) != 0) + +// determine the real data need to calculated the result +enum { + BLK_DATA_NO_NEEDED = 0x0, + BLK_DATA_STATIS_NEEDED = 0x1, + BLK_DATA_ALL_NEEDED = 0x3, + BLK_DATA_DISCARD = 0x4, // discard current data block since it is not qualified for filter +}; + +typedef struct STwaInfo { + int8_t hasResult; // flag to denote has value + double dOutput; + SPoint1 p; + STimeWindow win; +} STwaInfo; + +extern int32_t functionCompatList[]; // compatible check array list + +bool topbot_datablock_filter(SQLFunctionCtx *pCtx, const char *minval, const char *maxval); + +/** + * the numOfRes should be kept, since it may be used later + * and allow the ResultInfo to be re initialized + */ +#define RESET_RESULT_INFO(_r) \ + do { \ + (_r)->initialized = false; \ + } while (0) + +static FORCE_INLINE void initResultInfo(SResultRowCellInfo *pResInfo, int32_t bufLen) { + pResInfo->initialized = true; // the this struct has been initialized flag + + pResInfo->complete = false; + pResInfo->hasResult = false; + pResInfo->numOfRes = 0; + + memset(GET_ROWCELL_INTERBUF(pResInfo), 0, bufLen); +} + +#ifdef __cplusplus +} +#endif + +#endif // TDENGINE_TAGGFUNCTION_H diff --git a/source/libs/executor/inc/texpr.h b/source/libs/function/inc/texpr.h similarity index 74% rename from source/libs/executor/inc/texpr.h rename to source/libs/function/inc/texpr.h index d723549d49..fb4ddd476c 100644 --- a/source/libs/executor/inc/texpr.h +++ b/source/libs/function/inc/texpr.h @@ -25,7 +25,7 @@ extern "C" { #include "taosmsg.h" #include "taosdef.h" #include "tskiplist.h" -#include "tbuffer.h" +#include "function.h" struct tExprNode; struct SSchema; @@ -43,13 +43,6 @@ struct SSchema; typedef bool (*__result_filter_fn_t)(const void *, void *); typedef void (*__do_filter_suppl_fn_t)(void *, void *); -enum { - TSQL_NODE_DUMMY = 0x0, - TSQL_NODE_EXPR = 0x1, - TSQL_NODE_COL = 0x2, - TSQL_NODE_VALUE = 0x4, -}; - /** * this structure is used to filter data in tags, so the offset of filtered tag column in tagdata string is required */ @@ -61,37 +54,16 @@ typedef struct tQueryInfo { bool indexed; // indexed columns } tQueryInfo; -typedef struct tExprNode { - uint8_t nodeType; - union { - struct { - uint8_t optr; // filter operator - uint8_t hasPK; // 0: do not contain primary filter, 1: contain - void *info; // support filter operation on this expression only available for leaf node - struct tExprNode *pLeft; // left child pointer - struct tExprNode *pRight; // right child pointer - } _node; - - SSchema *pSchema; - struct SVariant *pVal; - }; -} tExprNode; - typedef struct SExprTraverseSupp { __result_filter_fn_t nodeFilterFn; __do_filter_suppl_fn_t setupInfoFn; void *pExtInfo; } SExprTraverseSupp; -void tExprTreeDestroy(tExprNode *pNode, void (*fp)(void *)); - -void exprTreeToBinary(SBufferWriter* bw, tExprNode* pExprTree); tExprNode* exprTreeFromBinary(const void* data, size_t size); tExprNode* exprTreeFromTableName(const char* tbnameCond); tExprNode* exprdup(tExprNode* pTree); -void exprTreeToBinary(SBufferWriter* bw, tExprNode* pExprTree); - bool exprTreeApplyFilter(tExprNode *pExpr, const void *pItem, SExprTraverseSupp *param); void arithmeticTreeTraverse(tExprNode *pExprs, int32_t numOfRows, char *pOutput, void *param, int32_t order, diff --git a/source/libs/function/inc/tfill.h b/source/libs/function/inc/tfill.h new file mode 100644 index 0000000000..978feb001d --- /dev/null +++ b/source/libs/function/inc/tfill.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TDENGINE_TFILL_H +#define TDENGINE_TFILL_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "os.h" +#include "taosdef.h" + +struct SSDataBlock; + +typedef struct { + STColumn col; // column info + int16_t functionId; // sql function id + int16_t flag; // column flag: TAG COLUMN|NORMAL COLUMN + int16_t tagIndex; // index of current tag in SFillTagColInfo array list + union {int64_t i; double d;} fillVal; +} SFillColInfo; + +typedef struct { + SSchema col; + char* tagVal; +} SFillTagColInfo; + +typedef struct SFillInfo { + TSKEY start; // start timestamp + TSKEY end; // endKey for fill + TSKEY currentKey; // current active timestamp, the value may be changed during the fill procedure. + int32_t order; // order [TSDB_ORDER_ASC|TSDB_ORDER_DESC] + int32_t type; // fill type + int32_t numOfRows; // number of rows in the input data block + int32_t index; // active row index + int32_t numOfTotal; // number of filled rows in one round + int32_t numOfCurrent; // number of filled rows in current results + + int32_t numOfTags; // number of tags + int32_t numOfCols; // number of columns, including the tags columns + int32_t rowSize; // size of each row + SInterval interval; + char * prevValues; // previous row of data, to generate the interpolation results + char * nextValues; // next row of data + char** pData; // original result data block involved in filling data + int32_t alloc; // data buffer size in rows + int8_t precision; // time resoluation + + SFillColInfo* pFillCol; // column info for fill operations + SFillTagColInfo* pTags; // tags value for filling gap + void* handle; // for debug purpose +} SFillInfo; + +typedef struct SPoint { + int64_t key; + void * val; +} SPoint; + +SFillInfo* taosCreateFillInfo(int32_t order, TSKEY skey, int32_t numOfTags, int32_t capacity, int32_t numOfCols, + int64_t slidingTime, int8_t slidingUnit, int8_t precision, int32_t fillType, + SFillColInfo* pFillCol, void* handle); + +void taosResetFillInfo(SFillInfo* pFillInfo, TSKEY startTimestamp); + +void* taosDestroyFillInfo(SFillInfo *pFillInfo); + +void taosFillSetStartInfo(SFillInfo* pFillInfo, int32_t numOfRows, TSKEY endKey); + +void taosFillSetInputDataBlock(SFillInfo* pFillInfo, const struct SSDataBlock* pInput); + +bool taosFillHasMoreResults(SFillInfo* pFillInfo); + +int64_t getNumOfResultsAfterFillGap(SFillInfo* pFillInfo, int64_t ekey, int32_t maxNumOfRows); + +int32_t taosGetLinearInterpolationVal(SPoint* point, int32_t outputType, SPoint* point1, SPoint* point2, int32_t inputType); + +int64_t taosFillResultDataBlock(SFillInfo* pFillInfo, void** output, int32_t capacity); + +#ifdef __cplusplus +} +#endif + +#endif // TDENGINE_TFILL_H diff --git a/source/libs/function/inc/thistogram.h b/source/libs/function/inc/thistogram.h new file mode 100644 index 0000000000..3b5c2b4cfb --- /dev/null +++ b/source/libs/function/inc/thistogram.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TDENGINE_HISTOGRAM_H +#define TDENGINE_HISTOGRAM_H + +#ifdef __cplusplus +extern "C" { +#endif + +#define USE_ARRAYLIST + +#define MAX_HISTOGRAM_BIN 500 + +typedef struct SHistBin { + double val; + int64_t num; + +#if !defined(USE_ARRAYLIST) + double delta; + int32_t index; // index in min-heap list +#endif +} SHistBin; + +typedef struct SHeapEntry { + void* pData; + double val; +} SHeapEntry; + +typedef struct SHistogramInfo { + int64_t numOfElems; + int32_t numOfEntries; + int32_t maxEntries; + double min; + double max; +#if defined(USE_ARRAYLIST) + SHistBin* elems; +#else + tSkipList* pList; + SLoserTreeInfo* pLoserTree; + int32_t maxIndex; + bool ordered; +#endif +} SHistogramInfo; + +SHistogramInfo* tHistogramCreate(int32_t numOfBins); +SHistogramInfo* tHistogramCreateFrom(void* pBuf, int32_t numOfBins); + +int32_t tHistogramAdd(SHistogramInfo** pHisto, double val); +int64_t tHistogramSum(SHistogramInfo* pHisto, double v); + +double* tHistogramUniform(SHistogramInfo* pHisto, double* ratio, int32_t num); +SHistogramInfo* tHistogramMerge(SHistogramInfo* pHisto1, SHistogramInfo* pHisto2, int32_t numOfEntries); +void tHistogramDestroy(SHistogramInfo** pHisto); + +void tHistogramPrint(SHistogramInfo* pHisto); + +int32_t histoBinarySearch(SHistBin* pEntry, int32_t len, double val); + +SHeapEntry* tHeapCreate(int32_t numOfEntries); +void tHeapSort(SHeapEntry* pEntry, int32_t len); + +#ifdef __cplusplus +} +#endif + +#endif // TDENGINE_HISTOGRAM_H diff --git a/source/libs/function/inc/tpercentile.h b/source/libs/function/inc/tpercentile.h new file mode 100644 index 0000000000..563a63f6a5 --- /dev/null +++ b/source/libs/function/inc/tpercentile.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TDENGINE_TPERCENTILE_H +#define TDENGINE_TPERCENTILE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "tpagedfile.h" +#include "ttszip.h" + +typedef struct MinMaxEntry { + union { + double dMinVal; + int64_t i64MinVal; + uint64_t u64MinVal; + }; + union { + double dMaxVal; + int64_t i64MaxVal; + int64_t u64MaxVal; + }; +} MinMaxEntry; + +typedef struct { + int32_t size; + int32_t pageId; + SFilePage *data; +} SSlotInfo; + +typedef struct tMemBucketSlot { + SSlotInfo info; + MinMaxEntry range; +} tMemBucketSlot; + +struct tMemBucket; +typedef int32_t (*__perc_hash_func_t)(struct tMemBucket *pBucket, const void *value); + +typedef struct tMemBucket { + int16_t numOfSlots; + int16_t type; + int16_t bytes; + int32_t total; + int32_t elemPerPage; // number of elements for each object + int32_t maxCapacity; // maximum allowed number of elements that can be sort directly to get the result + int32_t bufPageSize; // disk page size + MinMaxEntry range; // value range + int32_t times; // count that has been checked for deciding the correct data value buckets. + __compar_fn_t comparFn; + + tMemBucketSlot * pSlots; + SDiskbasedResultBuf *pBuffer; + __perc_hash_func_t hashFunc; +} tMemBucket; + +tMemBucket *tMemBucketCreate(int16_t nElemSize, int16_t dataType, double minval, double maxval); + +void tMemBucketDestroy(tMemBucket *pBucket); + +int32_t tMemBucketPut(tMemBucket *pBucket, const void *data, size_t size); + +double getPercentile(tMemBucket *pMemBucket, double percent); + +#endif // TDENGINE_TPERCENTILE_H + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/source/libs/function/inc/tscalarfunction.h b/source/libs/function/inc/tscalarfunction.h new file mode 100644 index 0000000000..6959bc883d --- /dev/null +++ b/source/libs/function/inc/tscalarfunction.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +#ifndef TDENGINE_TSCALARFUNCTION_H +#define TDENGINE_TSCALARFUNCTION_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "function.h" + +extern struct SScalarFunctionInfo scalarFunc[1]; + +#define FUNCTION_CEIL 38 +#define FUNCTION_FLOOR 39 +#define FUNCTION_ROUND 40 +#define FUNCTION_MAVG 41 +#define FUNCTION_CSUM 42 + +#ifdef __cplusplus +} +#endif + +#endif // TDENGINE_TSCALARFUNCTION_H diff --git a/source/libs/function/inc/ttszip.h b/source/libs/function/inc/ttszip.h new file mode 100644 index 0000000000..95be18f4ea --- /dev/null +++ b/source/libs/function/inc/ttszip.h @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TDENGINE_TTSZIP_H +#define TDENGINE_TTSZIP_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "os.h" +#include "taosdef.h" +#include "tvariant.h" + +#define MEM_BUF_SIZE (1 << 20) +#define TS_COMP_FILE_MAGIC 0x87F5EC4C +#define TS_COMP_FILE_GROUP_MAX 512 + +typedef struct STSList { + char* rawBuf; + int32_t allocSize; + int32_t threshold; + int32_t len; +} STSList; + +typedef struct STSElem { + TSKEY ts; + SVariant* tag; + int32_t id; +} STSElem; + +typedef struct STSCursor { + int32_t vgroupIndex; + int32_t blockIndex; + int32_t tsIndex; + uint32_t order; +} STSCursor; + +typedef struct STSBlock { + SVariant tag; // tag value + int32_t numOfElem; // number of elements + int32_t compLen; // size after compressed + int32_t padding; // 0xFFFFFFFF by default, after the payload + char* payload; // actual data that is compressed +} STSBlock; + +/* + * The size of buffer file should not be greater than 2G, + * and the offset of int32_t type is enough + */ +typedef struct STSGroupBlockInfo { + int32_t id; // group id + int32_t offset; // offset set value in file + int32_t numOfBlocks; // number of total blocks + int32_t compLen; // compressed size +} STSGroupBlockInfo; + +typedef struct STSGroupBlockInfoEx { + STSGroupBlockInfo info; + int32_t len; // length before compress +} STSGroupBlockInfoEx; + +typedef struct STSBuf { + FILE* f; + char path[PATH_MAX]; + uint32_t fileSize; + + // todo use array + STSGroupBlockInfoEx* pData; + uint32_t numOfAlloc; + uint32_t numOfGroups; + + char* assistBuf; + int32_t bufSize; + STSBlock block; + STSList tsData; // uncompressed raw ts data + uint64_t numOfTotal; + bool autoDelete; + bool remainOpen; + int32_t tsOrder; // order of timestamp in ts comp buffer + STSCursor cur; +} STSBuf; + +typedef struct STSBufFileHeader { + uint32_t magic; // file magic number + uint32_t numOfGroup; // number of group stored in current file + int32_t tsOrder; // timestamp order in current file +} STSBufFileHeader; + +STSBuf* tsBufCreate(bool autoDelete, int32_t order); +STSBuf* tsBufCreateFromFile(const char* path, bool autoDelete); +STSBuf* tsBufCreateFromCompBlocks(const char* pData, int32_t numOfBlocks, int32_t len, int32_t tsOrder, int32_t id); + +void* tsBufDestroy(STSBuf* pTSBuf); + +void tsBufAppend(STSBuf* pTSBuf, int32_t id, SVariant* tag, const char* pData, int32_t len); +int32_t tsBufMerge(STSBuf* pDestBuf, const STSBuf* pSrcBuf); + +STSBuf* tsBufClone(STSBuf* pTSBuf); + +STSGroupBlockInfo* tsBufGetGroupBlockInfo(STSBuf* pTSBuf, int32_t id); + +void tsBufFlush(STSBuf* pTSBuf); +void tsBufResetPos(STSBuf* pTSBuf); +bool tsBufNextPos(STSBuf* pTSBuf); + +STSElem tsBufGetElem(STSBuf* pTSBuf); +STSElem tsBufGetElemStartPos(STSBuf* pTSBuf, int32_t id, SVariant* tag); + +STSCursor tsBufGetCursor(STSBuf* pTSBuf); +void tsBufSetTraverseOrder(STSBuf* pTSBuf, int32_t order); + +void tsBufSetCursor(STSBuf* pTSBuf, STSCursor* pCur); + +/** + * display all data in comp block file, for debug purpose only + * @param pTSBuf + */ +void tsBufDisplay(STSBuf* pTSBuf); + +int32_t tsBufGetNumOfGroup(STSBuf* pTSBuf); + +void tsBufGetGroupIdList(STSBuf* pTSBuf, int32_t* num, int32_t** id); + +int32_t dumpFileBlockByGroupId(STSBuf* pTSBuf, int32_t id, void* buf, int32_t* len, int32_t* numOfBlocks); + +STSElem tsBufFindElemStartPosByTag(STSBuf* pTSBuf, SVariant* pTag); + +bool tsBufIsValidElem(STSElem* pElem); + +#ifdef __cplusplus +} +#endif + +#endif // TDENGINE_TTSZIP_H diff --git a/source/libs/function/inc/tudf.h b/source/libs/function/inc/tudf.h new file mode 100644 index 0000000000..dc643ace9e --- /dev/null +++ b/source/libs/function/inc/tudf.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TDENGINE_TUDF_H +#define TDENGINE_TUDF_H + +enum { + TSDB_UDF_FUNC_NORMAL = 0, + TSDB_UDF_FUNC_INIT, + TSDB_UDF_FUNC_FINALIZE, + TSDB_UDF_FUNC_MERGE, + TSDB_UDF_FUNC_DESTROY, + TSDB_UDF_FUNC_MAX_NUM +}; + +typedef struct SUdfInit { + int32_t maybe_null; /* 1 if function can return NULL */ + uint32_t decimals; /* for real functions */ + uint64_t length; /* For string functions */ + char* ptr; /* free pointer for function data */ + int32_t const_item; /* 0 if result is independent of arguments */ + + // script like lua/javascript + void* script_ctx; + void (*destroyCtxFunc)(void* script_ctx); +} SUdfInit; + +typedef struct SUdfInfo { + int32_t functionId; // system assigned function id + int32_t funcType; // scalar function or aggregate function + int8_t resType; // result type + int16_t resBytes; // result byte + int32_t contLen; // content length + int32_t bufSize; // interbuf size + char* name; // function name + void* handle; // handle loaded in mem + void* funcs[TSDB_UDF_FUNC_MAX_NUM]; // function ptr + + // for script like lua/javascript only + int isScript; + void* pScriptCtx; + + SUdfInit init; + char* content; + char* path; +} SUdfInfo; + +// script + +typedef int32_t (*scriptInitFunc)(void* pCtx); +typedef void (*scriptNormalFunc)(void* pCtx, char* data, int16_t iType, int16_t iBytes, int32_t numOfRows, + int64_t* ptList, int64_t key, char* dataOutput, char* tsOutput, int32_t* numOfOutput, + int16_t oType, int16_t oBytes); +typedef void (*scriptFinalizeFunc)(void* pCtx, int64_t key, char* dataOutput, int32_t* numOfOutput); +typedef void (*scriptMergeFunc)(void* pCtx, char* data, int32_t numOfRows, char* dataOutput, int32_t* numOfOutput); +typedef void (*scriptDestroyFunc)(void* pCtx); + +// dynamic lib +typedef void (*udfNormalFunc)(char* data, int16_t itype, int16_t iBytes, int32_t numOfRows, int64_t* ts, + char* dataOutput, char* interBuf, char* tsOutput, int32_t* numOfOutput, int16_t oType, + int16_t oBytes, SUdfInit* buf); +typedef int32_t (*udfInitFunc)(SUdfInit* data); +typedef void (*udfFinalizeFunc)(char* dataOutput, char* interBuf, int32_t* numOfOutput, SUdfInit* buf); +typedef void (*udfMergeFunc)(char* data, int32_t numOfRows, char* dataOutput, int32_t* numOfOutput, SUdfInit* buf); +typedef void (*udfDestroyFunc)(SUdfInit* buf); + +#endif // TDENGINE_TUDF_H diff --git a/source/libs/function/src/taggfunction.c b/source/libs/function/src/taggfunction.c new file mode 100644 index 0000000000..c130e6244b --- /dev/null +++ b/source/libs/function/src/taggfunction.c @@ -0,0 +1,4777 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "os.h" +#include "taosdef.h" +#include "taosmsg.h" +//#include "texpr.h" +#include "ttypes.h" +#include "tglobal.h" +#include "thash.h" + +#include "taggfunction.h" +#include "tfill.h" +#include "thistogram.h" +#include "ttszip.h" +#include "tpercentile.h" +#include "tbuffer.h" +#include "tcompression.h" +//#include "queryLog.h" +#include "tudf.h" + +#define GET_INPUT_DATA_LIST(x) ((char *)((x)->pInput)) +#define GET_INPUT_DATA(x, y) (GET_INPUT_DATA_LIST(x) + (y) * (x)->inputBytes) + +#define GET_TS_LIST(x) ((TSKEY*)((x)->ptsList)) +#define GET_TS_DATA(x, y) (GET_TS_LIST(x)[(y)]) + +#define GET_TRUE_DATA_TYPE() \ + int32_t type = 0; \ + if (pCtx->currentStage == MERGE_STAGE) { \ + type = pCtx->outputType; \ + assert(pCtx->inputType == TSDB_DATA_TYPE_BINARY); \ + } else { \ + type = pCtx->inputType; \ + } + +#define SET_VAL(ctx, numOfElem, res) \ + do { \ + if ((numOfElem) <= 0) { \ + break; \ + } \ + GET_RES_INFO(ctx)->numOfRes = (res); \ + } while (0) + +#define INC_INIT_VAL(ctx, res) (GET_RES_INFO(ctx)->numOfRes += (res)); + +#define DO_UPDATE_TAG_COLUMNS(ctx, ts) \ + do { \ + for (int32_t _i = 0; _i < (ctx)->tagInfo.numOfTagCols; ++_i) { \ + SQLFunctionCtx *__ctx = (ctx)->tagInfo.pTagCtxList[_i]; \ + if (__ctx->functionId == FUNCTION_TS_DUMMY) { \ + __ctx->tag.i64 = (ts); \ + __ctx->tag.nType = TSDB_DATA_TYPE_BIGINT; \ + } \ + aggFunc[FUNCTION_TAG].exec(__ctx); \ + } \ + } while (0) + +#define DO_UPDATE_TAG_COLUMNS_WITHOUT_TS(ctx) \ + do { \ + for (int32_t _i = 0; _i < (ctx)->tagInfo.numOfTagCols; ++_i) { \ + SQLFunctionCtx *__ctx = (ctx)->tagInfo.pTagCtxList[_i]; \ + aggFunc[FUNCTION_TAG].exec(__ctx); \ + } \ + } while (0); + +void noop1(SQLFunctionCtx *UNUSED_PARAM(pCtx)) {} + +void doFinalizer(SQLFunctionCtx *pCtx) { RESET_RESULT_INFO(GET_RES_INFO(pCtx)); } + +typedef struct tValuePair { + SVariant v; + int64_t timestamp; + char * pTags; // the corresponding tags of each record in the final result +} tValuePair; + +typedef struct SSpreadInfo { + double min; + double max; + int8_t hasResult; +} SSpreadInfo; + +typedef struct SSumInfo { + union { + int64_t isum; + uint64_t usum; + double dsum; + }; + int8_t hasResult; +} SSumInfo; + +// the attribute of hasResult is not needed since the num attribute would server as this purpose +typedef struct SAvgInfo { + double sum; + int64_t num; +} SAvgInfo; + +typedef struct SStddevInfo { + double avg; + int64_t num; + double res; + int8_t stage; +} SStddevInfo; + +typedef struct SStddevdstInfo { + int64_t num; + double res; +} SStddevdstInfo; + +typedef struct SFirstLastInfo { + int8_t hasResult; + TSKEY ts; +} SFirstLastInfo; + +typedef struct SFirstLastInfo SLastrowInfo; +typedef struct SPercentileInfo { + tMemBucket *pMemBucket; + int32_t stage; + double minval; + double maxval; + int64_t numOfElems; +} SPercentileInfo; + +typedef struct STopBotInfo { + int32_t num; + tValuePair **res; +} STopBotInfo; + +// leastsquares do not apply to super table +typedef struct SLeastsquaresInfo { + double mat[2][3]; + double startVal; + int64_t num; +} SLeastsquaresInfo; + +typedef struct SAPercentileInfo { + SHistogramInfo *pHisto; +} SAPercentileInfo; + +typedef struct STSCompInfo { + STSBuf *pTSBuf; +} STSCompInfo; + +typedef struct SRateInfo { + double correctionValue; + double firstValue; + TSKEY firstKey; + double lastValue; + TSKEY lastKey; + int8_t hasResult; // flag to denote has value + bool isIRate; // true for IRate functions, false for Rate functions +} SRateInfo; + +typedef struct SDerivInfo { + double prevValue; // previous value + TSKEY prevTs; // previous timestamp + bool ignoreNegative;// ignore the negative value + int64_t tsWindow; // time window for derivative + bool valueSet; // the value has been set already +} SDerivInfo; + +typedef struct SResPair { + TSKEY key; + double avg; +} SResPair; + +#define TSDB_BLOCK_DIST_STEP_ROWS 16 + +typedef struct STableBlockDist { + uint16_t rowSize; + uint16_t numOfFiles; + uint32_t numOfTables; + uint64_t totalSize; + uint64_t totalRows; + int32_t maxRows; + int32_t minRows; + int32_t firstSeekTimeUs; + uint32_t numOfRowsInMemTable; + uint32_t numOfSmallBlocks; + SArray *dataBlockInfos; +} STableBlockDist; + +typedef struct SFileBlockInfo { + int32_t numBlocksOfStep; +} SFileBlockInfo; + +int32_t getResultDataInfo(int32_t dataType, int32_t dataBytes, int32_t functionId, int32_t param, SResultDataInfo* pInfo, int16_t extLength, + bool isSuperTable/*, SUdfInfo* pUdfInfo*/) { + if (!isValidDataType(dataType)) { +// qError("Illegal data type %d or data type length %d", dataType, dataBytes); + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + + if (functionId == FUNCTION_TS || functionId == FUNCTION_TS_DUMMY || functionId == FUNCTION_TAG_DUMMY || + functionId == FUNCTION_DIFF || functionId == FUNCTION_PRJ || functionId == FUNCTION_TAGPRJ || + functionId == FUNCTION_TAG || functionId == FUNCTION_INTERP) { + pInfo->type = (int16_t)dataType; + pInfo->bytes = (int16_t)dataBytes; + + if (functionId == FUNCTION_INTERP) { + pInfo->intermediateBytes = sizeof(SInterpInfoDetail); + } else { + pInfo->intermediateBytes = 0; + } + + return TSDB_CODE_SUCCESS; + } + + // (uid, tid) + VGID + TAGSIZE + VARSTR_HEADER_SIZE + if (functionId == FUNCTION_TID_TAG) { // todo use struct + pInfo->type = TSDB_DATA_TYPE_BINARY; + pInfo->bytes = (int16_t)(dataBytes + sizeof(int16_t) + sizeof(int64_t) + sizeof(int32_t) + sizeof(int32_t) + VARSTR_HEADER_SIZE); + pInfo->intermediateBytes = 0; + return TSDB_CODE_SUCCESS; + } + + if (functionId == FUNCTION_BLKINFO) { + pInfo->type = TSDB_DATA_TYPE_BINARY; + pInfo->bytes = 16384; + pInfo->intermediateBytes = 0; + return TSDB_CODE_SUCCESS; + } + + if (functionId == FUNCTION_COUNT) { + pInfo->type = TSDB_DATA_TYPE_BIGINT; + pInfo->bytes = sizeof(int64_t); + pInfo->intermediateBytes = 0; + return TSDB_CODE_SUCCESS; + } + + if (functionId == FUNCTION_ARITHM) { + pInfo->type = TSDB_DATA_TYPE_DOUBLE; + pInfo->bytes = sizeof(double); + pInfo->intermediateBytes = 0; + return TSDB_CODE_SUCCESS; + } + + if (functionId == FUNCTION_TS_COMP) { + pInfo->type = TSDB_DATA_TYPE_BINARY; + pInfo->bytes = 1; // this results is compressed ts data, only one byte + pInfo->intermediateBytes = POINTER_BYTES; + return TSDB_CODE_SUCCESS; + } + + if (functionId == FUNCTION_DERIVATIVE) { + pInfo->type = TSDB_DATA_TYPE_DOUBLE; + pInfo->bytes = sizeof(double); // this results is compressed ts data, only one byte + pInfo->intermediateBytes = sizeof(SDerivInfo); + return TSDB_CODE_SUCCESS; + } + + if (isSuperTable) { +// if (functionId < 0) { +// if (pUdfInfo->bufSize > 0) { +// pInfo->type = TSDB_DATA_TYPE_BINARY; +// pInfo->bytes = pUdfInfo->bufSize; +// pInfo->intermediateBytes = pInfo->bytes; +// } else { +// pInfo->type = pUdfInfo->resType; +// pInfo->bytes = pUdfInfo->resBytes; +// pInfo->intermediateBytes = pInfo->bytes; +// } +// +// return TSDB_CODE_SUCCESS; +// } + + if (functionId == FUNCTION_MIN || functionId == FUNCTION_MAX) { + pInfo->type = TSDB_DATA_TYPE_BINARY; + pInfo->bytes = (int16_t)(dataBytes + DATA_SET_FLAG_SIZE); + pInfo->intermediateBytes = pInfo->bytes; + + return TSDB_CODE_SUCCESS; + } else if (functionId == FUNCTION_SUM) { + pInfo->type = TSDB_DATA_TYPE_BINARY; + pInfo->bytes = sizeof(SSumInfo); + pInfo->intermediateBytes = pInfo->bytes; + + return TSDB_CODE_SUCCESS; + } else if (functionId == FUNCTION_AVG) { + pInfo->type = TSDB_DATA_TYPE_BINARY; + pInfo->bytes = sizeof(SAvgInfo); + pInfo->intermediateBytes = pInfo->bytes; + return TSDB_CODE_SUCCESS; + + } else if (functionId >= FUNCTION_RATE && functionId <= FUNCTION_IRATE) { + pInfo->type = TSDB_DATA_TYPE_DOUBLE; + pInfo->bytes = sizeof(SRateInfo); + pInfo->intermediateBytes = sizeof(SRateInfo); + return TSDB_CODE_SUCCESS; + } else if (functionId == FUNCTION_TOP || functionId == FUNCTION_BOTTOM) { + pInfo->type = TSDB_DATA_TYPE_BINARY; + pInfo->bytes = (int16_t)(sizeof(STopBotInfo) + (sizeof(tValuePair) + POINTER_BYTES + extLength) * param); + pInfo->intermediateBytes = pInfo->bytes; + + return TSDB_CODE_SUCCESS; + } else if (functionId == FUNCTION_SPREAD) { + pInfo->type = TSDB_DATA_TYPE_BINARY; + pInfo->bytes = sizeof(SSpreadInfo); + pInfo->intermediateBytes = pInfo->bytes; + + return TSDB_CODE_SUCCESS; + } else if (functionId == FUNCTION_APERCT) { + pInfo->type = TSDB_DATA_TYPE_BINARY; + pInfo->bytes = sizeof(SHistBin) * (MAX_HISTOGRAM_BIN + 1) + sizeof(SHistogramInfo) + sizeof(SAPercentileInfo); + pInfo->intermediateBytes = pInfo->bytes; + + return TSDB_CODE_SUCCESS; + } else if (functionId == FUNCTION_LAST_ROW) { + pInfo->type = TSDB_DATA_TYPE_BINARY; + pInfo->bytes = (int16_t)(sizeof(SLastrowInfo) + dataBytes); + pInfo->intermediateBytes = pInfo->bytes; + + return TSDB_CODE_SUCCESS; + } else if (functionId == FUNCTION_TWA) { + pInfo->type = TSDB_DATA_TYPE_DOUBLE; + pInfo->bytes = sizeof(STwaInfo); + pInfo->intermediateBytes = pInfo->bytes; + return TSDB_CODE_SUCCESS; + } + } + + if (functionId == FUNCTION_SUM) { + if (IS_SIGNED_NUMERIC_TYPE(dataType)) { + pInfo->type = TSDB_DATA_TYPE_BIGINT; + } else if (IS_UNSIGNED_NUMERIC_TYPE(dataType)) { + pInfo->type = TSDB_DATA_TYPE_UBIGINT; + } else { + pInfo->type = TSDB_DATA_TYPE_DOUBLE; + } + + pInfo->bytes = sizeof(int64_t); + pInfo->intermediateBytes = sizeof(SSumInfo); + return TSDB_CODE_SUCCESS; + } else if (functionId == FUNCTION_APERCT) { + pInfo->type = TSDB_DATA_TYPE_DOUBLE; + pInfo->bytes = sizeof(double); + pInfo->intermediateBytes = + sizeof(SAPercentileInfo) + sizeof(SHistogramInfo) + sizeof(SHistBin) * (MAX_HISTOGRAM_BIN + 1); + return TSDB_CODE_SUCCESS; + } else if (functionId == FUNCTION_TWA) { + pInfo->type = TSDB_DATA_TYPE_DOUBLE; + pInfo->bytes = sizeof(double); + pInfo->intermediateBytes = sizeof(STwaInfo); + return TSDB_CODE_SUCCESS; + } + +// if (functionId < 0) { +// pInfo->type = pUdfInfo->resType; +// pInfo->bytes = pUdfInfo->resBytes; +// +// if (pUdfInfo->bufSize > 0) { +// pInfo->intermediateBytes = pUdfInfo->bufSize; +// } else { +// pInfo->intermediateBytes = pInfo->bytes; +// } +// +// return TSDB_CODE_SUCCESS; +// } + + if (functionId == FUNCTION_AVG) { + pInfo->type = TSDB_DATA_TYPE_DOUBLE; + pInfo->bytes = sizeof(double); + pInfo->intermediateBytes = sizeof(SAvgInfo); + } else if (functionId >= FUNCTION_RATE && functionId <= FUNCTION_IRATE) { + pInfo->type = TSDB_DATA_TYPE_DOUBLE; + pInfo->bytes = sizeof(double); + pInfo->intermediateBytes = sizeof(SRateInfo); + } else if (functionId == FUNCTION_STDDEV) { + pInfo->type = TSDB_DATA_TYPE_DOUBLE; + pInfo->bytes = sizeof(double); + pInfo->intermediateBytes = sizeof(SStddevInfo); + } else if (functionId == FUNCTION_MIN || functionId == FUNCTION_MAX) { + pInfo->type = (int16_t)dataType; + pInfo->bytes = (int16_t)dataBytes; + pInfo->intermediateBytes = dataBytes + DATA_SET_FLAG_SIZE; + } else if (functionId == FUNCTION_FIRST || functionId == FUNCTION_LAST) { + pInfo->type = (int16_t)dataType; + pInfo->bytes = (int16_t)dataBytes; + pInfo->intermediateBytes = (int16_t)(dataBytes + sizeof(SFirstLastInfo)); + } else if (functionId == FUNCTION_SPREAD) { + pInfo->type = (int16_t)TSDB_DATA_TYPE_DOUBLE; + pInfo->bytes = sizeof(double); + pInfo->intermediateBytes = sizeof(SSpreadInfo); + } else if (functionId == FUNCTION_PERCT) { + pInfo->type = (int16_t)TSDB_DATA_TYPE_DOUBLE; + pInfo->bytes = (int16_t)sizeof(double); + pInfo->intermediateBytes = (int16_t)sizeof(SPercentileInfo); + } else if (functionId == FUNCTION_LEASTSQR) { + pInfo->type = TSDB_DATA_TYPE_BINARY; + pInfo->bytes = MAX(AVG_FUNCTION_INTER_BUFFER_SIZE, sizeof(SLeastsquaresInfo)); // string + pInfo->intermediateBytes = pInfo->bytes; + } else if (functionId == FUNCTION_FIRST_DST || functionId == FUNCTION_LAST_DST) { + pInfo->type = TSDB_DATA_TYPE_BINARY; + pInfo->bytes = (int16_t)(dataBytes + sizeof(SFirstLastInfo)); + pInfo->intermediateBytes = pInfo->bytes; + } else if (functionId == FUNCTION_TOP || functionId == FUNCTION_BOTTOM) { + pInfo->type = (int16_t)dataType; + pInfo->bytes = (int16_t)dataBytes; + + size_t size = sizeof(STopBotInfo) + (sizeof(tValuePair) + POINTER_BYTES + extLength) * param; + + // the output column may be larger than sizeof(STopBotInfo) + pInfo->intermediateBytes = (int32_t)size; + } else if (functionId == FUNCTION_LAST_ROW) { + pInfo->type = (int16_t)dataType; + pInfo->bytes = (int16_t)dataBytes; + pInfo->intermediateBytes = dataBytes; + } else if (functionId == FUNCTION_STDDEV_DST) { + pInfo->type = TSDB_DATA_TYPE_BINARY; + pInfo->bytes = sizeof(SStddevdstInfo); + pInfo->intermediateBytes = (pInfo->bytes); + + } else { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + return TSDB_CODE_SUCCESS; +} + +static bool function_setup(SQLFunctionCtx *pCtx, SResultRowCellInfo* pResultInfo) { + if (pResultInfo->initialized) { + return false; + } + + memset(pCtx->pOutput, 0, (size_t)pCtx->outputBytes); + initResultInfo(pResultInfo, pCtx->interBufBytes); + return true; +} + +/** + * in handling the stable query, function_finalizer is called after the secondary + * merge being completed, during the first merge procedure, which is executed at the + * vnode side, the finalize will never be called. + * + * @param pCtx + */ +static void function_finalizer(SQLFunctionCtx *pCtx) { + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + if (pResInfo->hasResult != DATA_SET_FLAG) { + setNull(pCtx->pOutput, pCtx->outputType, pCtx->outputBytes); + } + + doFinalizer(pCtx); +} + +/* + * count function does need the finalize, if data is missing, the default value, which is 0, is used + * count function does not use the pCtx->interResBuf to keep the intermediate buffer + */ +static void count_function(SQLFunctionCtx *pCtx) { + int32_t numOfElem = 0; + + /* + * 1. column data missing (schema modified) causes pCtx->hasNull == true. pCtx->isSmaSet == true; + * 2. for general non-primary key columns, pCtx->hasNull may be true or false, pCtx->isSmaSet == true; + * 3. for primary key column, pCtx->hasNull always be false, pCtx->isSmaSet == false; + */ + if (pCtx->isSmaSet) { + numOfElem = pCtx->size - pCtx->sma.numOfNull; + } else { + if (pCtx->hasNull) { + for (int32_t i = 0; i < pCtx->size; ++i) { + char *val = GET_INPUT_DATA(pCtx, i); + if (isNull(val, pCtx->inputType)) { + continue; + } + + numOfElem += 1; + } + } else { + //when counting on the primary time stamp column and no statistics data is presented, use the size value directly. + numOfElem = pCtx->size; + } + } + + if (numOfElem > 0) { + GET_RES_INFO(pCtx)->hasResult = DATA_SET_FLAG; + } + + *((int64_t *)pCtx->pOutput) += numOfElem; + SET_VAL(pCtx, numOfElem, 1); +} + +static void count_func_merge(SQLFunctionCtx *pCtx) { + int64_t *pData = (int64_t *)GET_INPUT_DATA_LIST(pCtx); + for (int32_t i = 0; i < pCtx->size; ++i) { + *((int64_t *)pCtx->pOutput) += pData[i]; + } + + SET_VAL(pCtx, pCtx->size, 1); +} + +/** + * 1. If the column value for filter exists, we need to load the SFields, which serves + * as the pre-filter to decide if the actual data block is required or not. + * 2. If it queries on the non-primary timestamp column, SFields is also required to get the not-null value. + * + * @param colId + * @param filterCols + * @return + */ +int32_t countRequired(SQLFunctionCtx *pCtx, STimeWindow* w, int32_t colId) { + if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) { + return BLK_DATA_NO_NEEDED; + } else { + return BLK_DATA_STATIS_NEEDED; + } +} + +int32_t noDataRequired(SQLFunctionCtx *pCtx, STimeWindow* w, int32_t colId) { + return BLK_DATA_NO_NEEDED; +} +#define LIST_ADD_N_DOUBLE_FLOAT(x, ctx, p, t, numOfElem, tsdbType) \ + do { \ + t *d = (t *)(p); \ + for (int32_t i = 0; i < (ctx)->size; ++i) { \ + if (((ctx)->hasNull) && isNull((char *)&(d)[i], tsdbType)) { \ + continue; \ + }; \ + SET_DOUBLE_VAL(&(x) , GET_DOUBLE_VAL(&(x)) + GET_FLOAT_VAL(&(d)[i])); \ + (numOfElem)++; \ + } \ + } while(0) +#define LIST_ADD_N_DOUBLE(x, ctx, p, t, numOfElem, tsdbType) \ + do { \ + t *d = (t *)(p); \ + for (int32_t i = 0; i < (ctx)->size; ++i) { \ + if (((ctx)->hasNull) && isNull((char *)&(d)[i], tsdbType)) { \ + continue; \ + }; \ + SET_DOUBLE_VAL(&(x) , (x) + (d)[i]); \ + (numOfElem)++; \ + } \ + } while(0) + +#define LIST_ADD_N(x, ctx, p, t, numOfElem, tsdbType) \ + do { \ + t *d = (t *)(p); \ + for (int32_t i = 0; i < (ctx)->size; ++i) { \ + if (((ctx)->hasNull) && isNull((char *)&(d)[i], tsdbType)) { \ + continue; \ + }; \ + (x) += (d)[i]; \ + (numOfElem)++; \ + } \ + } while(0) + +#define UPDATE_DATA(ctx, left, right, num, sign, k) \ + do { \ + if (((left) < (right)) ^ (sign)) { \ + (left) = (right); \ + DO_UPDATE_TAG_COLUMNS(ctx, k); \ + (num) += 1; \ + } \ + } while (0) + +#define DUPATE_DATA_WITHOUT_TS(ctx, left, right, num, sign) \ + do { \ + if (((left) < (right)) ^ (sign)) { \ + (left) = (right); \ + DO_UPDATE_TAG_COLUMNS_WITHOUT_TS(ctx); \ + (num) += 1; \ + } \ + } while (0) + +#define LOOPCHECK_N(val, list, ctx, tsdbType, sign, num) \ + for (int32_t i = 0; i < ((ctx)->size); ++i) { \ + if ((ctx)->hasNull && isNull((char *)&(list)[i], tsdbType)) { \ + continue; \ + } \ + TSKEY key = (ctx)->ptsList != NULL? GET_TS_DATA(ctx, i):0; \ + UPDATE_DATA(ctx, val, (list)[i], num, sign, key); \ + } + +#define TYPED_LOOPCHECK_N(type, data, list, ctx, tsdbType, sign, notNullElems) \ + do { \ + type *_data = (type *)data; \ + type *_list = (type *)list; \ + LOOPCHECK_N(*_data, _list, ctx, tsdbType, sign, notNullElems); \ + } while (0) + +static void do_sum(SQLFunctionCtx *pCtx) { + int32_t notNullElems = 0; + + // Only the pre-computing information loaded and actual data does not loaded + if (pCtx->isSmaSet) { + notNullElems = pCtx->size - pCtx->sma.numOfNull; + assert(pCtx->size >= pCtx->sma.numOfNull); + + if (IS_SIGNED_NUMERIC_TYPE(pCtx->inputType)) { + int64_t *retVal = (int64_t *)pCtx->pOutput; + *retVal += pCtx->sma.sum; + } else if (IS_UNSIGNED_NUMERIC_TYPE(pCtx->inputType)) { + uint64_t *retVal = (uint64_t *)pCtx->pOutput; + *retVal += (uint64_t)pCtx->sma.sum; + } else if (IS_FLOAT_TYPE(pCtx->inputType)) { + double *retVal = (double*) pCtx->pOutput; + SET_DOUBLE_VAL(retVal, *retVal + GET_DOUBLE_VAL((const char*)&(pCtx->sma.sum))); + } + } else { // computing based on the true data block + void *pData = GET_INPUT_DATA_LIST(pCtx); + notNullElems = 0; + + if (IS_SIGNED_NUMERIC_TYPE(pCtx->inputType)) { + int64_t *retVal = (int64_t *)pCtx->pOutput; + + if (pCtx->inputType == TSDB_DATA_TYPE_TINYINT) { + LIST_ADD_N(*retVal, pCtx, pData, int8_t, notNullElems, pCtx->inputType); + } else if (pCtx->inputType == TSDB_DATA_TYPE_SMALLINT) { + LIST_ADD_N(*retVal, pCtx, pData, int16_t, notNullElems, pCtx->inputType); + } else if (pCtx->inputType == TSDB_DATA_TYPE_INT) { + LIST_ADD_N(*retVal, pCtx, pData, int32_t, notNullElems, pCtx->inputType); + } else if (pCtx->inputType == TSDB_DATA_TYPE_BIGINT) { + LIST_ADD_N(*retVal, pCtx, pData, int64_t, notNullElems, pCtx->inputType); + } + } else if (IS_UNSIGNED_NUMERIC_TYPE(pCtx->inputType)) { + uint64_t *retVal = (uint64_t *)pCtx->pOutput; + + if (pCtx->inputType == TSDB_DATA_TYPE_UTINYINT) { + LIST_ADD_N(*retVal, pCtx, pData, uint8_t, notNullElems, pCtx->inputType); + } else if (pCtx->inputType == TSDB_DATA_TYPE_USMALLINT) { + LIST_ADD_N(*retVal, pCtx, pData, uint16_t, notNullElems, pCtx->inputType); + } else if (pCtx->inputType == TSDB_DATA_TYPE_UINT) { + LIST_ADD_N(*retVal, pCtx, pData, uint32_t, notNullElems, pCtx->inputType); + } else if (pCtx->inputType == TSDB_DATA_TYPE_UBIGINT) { + LIST_ADD_N(*retVal, pCtx, pData, uint64_t, notNullElems, pCtx->inputType); + } + } else if (pCtx->inputType == TSDB_DATA_TYPE_DOUBLE) { + double *retVal = (double *)pCtx->pOutput; + LIST_ADD_N_DOUBLE(*retVal, pCtx, pData, double, notNullElems, pCtx->inputType); + } else if (pCtx->inputType == TSDB_DATA_TYPE_FLOAT) { + double *retVal = (double *)pCtx->pOutput; + LIST_ADD_N_DOUBLE_FLOAT(*retVal, pCtx, pData, float, notNullElems, pCtx->inputType); + } + } + + // data in the check operation are all null, not output + SET_VAL(pCtx, notNullElems, 1); + + if (notNullElems > 0) { + GET_RES_INFO(pCtx)->hasResult = DATA_SET_FLAG; + } +} + +static void sum_function(SQLFunctionCtx *pCtx) { + do_sum(pCtx); + + // keep the result data in output buffer, not in the intermediate buffer + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + if (pResInfo->hasResult == DATA_SET_FLAG && pCtx->stableQuery) { + // set the flag for super table query + SSumInfo *pSum = (SSumInfo *)pCtx->pOutput; + pSum->hasResult = DATA_SET_FLAG; + } +} + +static void sum_func_merge(SQLFunctionCtx *pCtx) { + int32_t notNullElems = 0; + + GET_TRUE_DATA_TYPE(); + assert(pCtx->stableQuery); + + for (int32_t i = 0; i < pCtx->size; ++i) { + char * input = GET_INPUT_DATA(pCtx, i); + SSumInfo *pInput = (SSumInfo *)input; + if (pInput->hasResult != DATA_SET_FLAG) { + continue; + } + + notNullElems++; + + if (IS_SIGNED_NUMERIC_TYPE(type)) { + *(int64_t *)pCtx->pOutput += pInput->isum; + } else if (IS_UNSIGNED_NUMERIC_TYPE(type)) { + *(uint64_t *) pCtx->pOutput += pInput->usum; + } else { + SET_DOUBLE_VAL((double *)pCtx->pOutput, *(double *)pCtx->pOutput + pInput->dsum); + } + } + + SET_VAL(pCtx, notNullElems, 1); + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + + if (notNullElems > 0) { + pResInfo->hasResult = DATA_SET_FLAG; + } +} + +static int32_t statisRequired(SQLFunctionCtx *pCtx, STimeWindow* w, int32_t colId) { + return BLK_DATA_STATIS_NEEDED; +} + +static int32_t dataBlockRequired(SQLFunctionCtx *pCtx, STimeWindow* w, int32_t colId) { + return BLK_DATA_ALL_NEEDED; +} + +// todo: if column in current data block are null, opt for this case +static int32_t firstFuncRequired(SQLFunctionCtx *pCtx, STimeWindow* w, int32_t colId) { + if (pCtx->order == TSDB_ORDER_DESC) { + return BLK_DATA_NO_NEEDED; + } + + // no result for first query, data block is required + if (GET_RES_INFO(pCtx) == NULL || GET_RES_INFO(pCtx)->numOfRes <= 0) { + return BLK_DATA_ALL_NEEDED; + } else { + return BLK_DATA_NO_NEEDED; + } +} + +static int32_t lastFuncRequired(SQLFunctionCtx *pCtx, STimeWindow* w, int32_t colId) { + if (pCtx->order != pCtx->param[0].i64) { + return BLK_DATA_NO_NEEDED; + } + + if (GET_RES_INFO(pCtx) == NULL || GET_RES_INFO(pCtx)->numOfRes <= 0) { + return BLK_DATA_ALL_NEEDED; + } else { + return BLK_DATA_NO_NEEDED; + } +} + +static int32_t firstDistFuncRequired(SQLFunctionCtx *pCtx, STimeWindow* w, int32_t colId) { + if (pCtx->order == TSDB_ORDER_DESC) { + return BLK_DATA_NO_NEEDED; + } + + // not initialized yet, it is the first block, load it. + if (pCtx->pOutput == NULL) { + return BLK_DATA_ALL_NEEDED; + } + + // the pCtx should be set to current Ctx and output buffer before call this function. Otherwise, pCtx->pOutput is + // the previous windowRes output buffer, not current unloaded block. In this case, the following filter is invalid + SFirstLastInfo *pInfo = (SFirstLastInfo*) (pCtx->pOutput + pCtx->inputBytes); + if (pInfo->hasResult != DATA_SET_FLAG) { + return BLK_DATA_ALL_NEEDED; + } else { // data in current block is not earlier than current result + return (pInfo->ts <= w->skey) ? BLK_DATA_NO_NEEDED : BLK_DATA_ALL_NEEDED; + } +} + +static int32_t lastDistFuncRequired(SQLFunctionCtx *pCtx, STimeWindow* w, int32_t colId) { + if (pCtx->order != pCtx->param[0].i64) { + return BLK_DATA_NO_NEEDED; + } + + // not initialized yet, it is the first block, load it. + if (pCtx->pOutput == NULL) { + return BLK_DATA_ALL_NEEDED; + } + + // the pCtx should be set to current Ctx and output buffer before call this function. Otherwise, pCtx->pOutput is + // the previous windowRes output buffer, not current unloaded block. In this case, the following filter is invalid + SFirstLastInfo *pInfo = (SFirstLastInfo*) (pCtx->pOutput + pCtx->inputBytes); + if (pInfo->hasResult != DATA_SET_FLAG) { + return BLK_DATA_ALL_NEEDED; + } else { + return (pInfo->ts > w->ekey) ? BLK_DATA_NO_NEEDED : BLK_DATA_ALL_NEEDED; + } +} + +////////////////////////////////////////////////////////////////////////////////////////////// +/* + * The intermediate result of average is kept in the interResultBuf. + * For super table query, once the avg_function/avg_function_f is finished, copy the intermediate + * result into output buffer. + */ +static void avg_function(SQLFunctionCtx *pCtx) { + int32_t notNullElems = 0; + + // NOTE: keep the intermediate result into the interResultBuf + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + + SAvgInfo *pAvgInfo = (SAvgInfo *)GET_ROWCELL_INTERBUF(pResInfo); + double *pVal = &pAvgInfo->sum; + + if (pCtx->isSmaSet) { // Pre-aggregation + notNullElems = pCtx->size - pCtx->sma.numOfNull; + assert(notNullElems >= 0); + + if (IS_SIGNED_NUMERIC_TYPE(pCtx->inputType)) { + *pVal += pCtx->sma.sum; + } else if (IS_UNSIGNED_NUMERIC_TYPE(pCtx->inputType)) { + *pVal += (uint64_t) pCtx->sma.sum; + } else if (pCtx->inputType == TSDB_DATA_TYPE_DOUBLE || pCtx->inputType == TSDB_DATA_TYPE_FLOAT) { + *pVal += GET_DOUBLE_VAL((const char *)&(pCtx->sma.sum)); + } + } else { + void *pData = GET_INPUT_DATA_LIST(pCtx); + + if (pCtx->inputType == TSDB_DATA_TYPE_TINYINT) { + LIST_ADD_N(*pVal, pCtx, pData, int8_t, notNullElems, pCtx->inputType); + } else if (pCtx->inputType == TSDB_DATA_TYPE_SMALLINT) { + LIST_ADD_N(*pVal, pCtx, pData, int16_t, notNullElems, pCtx->inputType); + } else if (pCtx->inputType == TSDB_DATA_TYPE_INT) { + LIST_ADD_N(*pVal, pCtx, pData, int32_t, notNullElems, pCtx->inputType); + } else if (pCtx->inputType == TSDB_DATA_TYPE_BIGINT) { + LIST_ADD_N(*pVal, pCtx, pData, int64_t, notNullElems, pCtx->inputType); + } else if (pCtx->inputType == TSDB_DATA_TYPE_DOUBLE) { + LIST_ADD_N_DOUBLE(*pVal, pCtx, pData, double, notNullElems, pCtx->inputType); + } else if (pCtx->inputType == TSDB_DATA_TYPE_FLOAT) { + LIST_ADD_N_DOUBLE_FLOAT(*pVal, pCtx, pData, float, notNullElems, pCtx->inputType); + } else if (pCtx->inputType == TSDB_DATA_TYPE_UTINYINT) { + LIST_ADD_N(*pVal, pCtx, pData, uint8_t, notNullElems, pCtx->inputType); + } else if (pCtx->inputType == TSDB_DATA_TYPE_USMALLINT) { + LIST_ADD_N(*pVal, pCtx, pData, uint16_t, notNullElems, pCtx->inputType); + } else if (pCtx->inputType == TSDB_DATA_TYPE_UINT) { + LIST_ADD_N(*pVal, pCtx, pData, uint32_t, notNullElems, pCtx->inputType); + } else if (pCtx->inputType == TSDB_DATA_TYPE_UBIGINT) { + LIST_ADD_N(*pVal, pCtx, pData, uint64_t, notNullElems, pCtx->inputType); + } + } + + if (!pCtx->hasNull) { + assert(notNullElems == pCtx->size); + } + + SET_VAL(pCtx, notNullElems, 1); + pAvgInfo->num += notNullElems; + + if (notNullElems > 0) { + pResInfo->hasResult = DATA_SET_FLAG; + } + + // keep the data into the final output buffer for super table query since this execution may be the last one + if (pCtx->stableQuery) { + memcpy(pCtx->pOutput, GET_ROWCELL_INTERBUF(pResInfo), sizeof(SAvgInfo)); + } +} + +static void avg_func_merge(SQLFunctionCtx *pCtx) { + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + + double *sum = (double*) pCtx->pOutput; + char *input = GET_INPUT_DATA_LIST(pCtx); + + for (int32_t i = 0; i < pCtx->size; ++i, input += pCtx->inputBytes) { + SAvgInfo *pInput = (SAvgInfo *)input; + if (pInput->num == 0) { // current input is null + continue; + } + + SET_DOUBLE_VAL(sum, *sum + pInput->sum); + + // keep the number of data into the temp buffer + *(int64_t *)GET_ROWCELL_INTERBUF(pResInfo) += pInput->num; + } +} + +/* + * the average value is calculated in finalize routine, since current routine does not know the exact number of points + */ +static void avg_finalizer(SQLFunctionCtx *pCtx) { + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + + if (pCtx->currentStage == MERGE_STAGE) { + assert(pCtx->inputType == TSDB_DATA_TYPE_BINARY); + + if (GET_INT64_VAL(GET_ROWCELL_INTERBUF(pResInfo)) <= 0) { + setNull(pCtx->pOutput, pCtx->outputType, pCtx->outputBytes); + return; + } + + SET_DOUBLE_VAL((double *)pCtx->pOutput,(*(double *)pCtx->pOutput) / *(int64_t *)GET_ROWCELL_INTERBUF(pResInfo)); + } else { // this is the secondary merge, only in the secondary merge, the input type is TSDB_DATA_TYPE_BINARY + assert(IS_NUMERIC_TYPE(pCtx->inputType)); + SAvgInfo *pAvgInfo = (SAvgInfo *)GET_ROWCELL_INTERBUF(pResInfo); + + if (pAvgInfo->num == 0) { // all data are NULL or empty table + setNull(pCtx->pOutput, pCtx->outputType, pCtx->outputBytes); + return; + } + + SET_DOUBLE_VAL((double *)pCtx->pOutput, pAvgInfo->sum / pAvgInfo->num); + } + + // cannot set the numOfIteratedElems again since it is set during previous iteration + GET_RES_INFO(pCtx)->numOfRes = 1; + doFinalizer(pCtx); +} + +///////////////////////////////////////////////////////////////////////////////////////////// + +static void minMax_function(SQLFunctionCtx *pCtx, char *pOutput, int32_t isMin, int32_t *notNullElems) { + // data in current data block are qualified to the query + if (pCtx->isSmaSet) { + *notNullElems = pCtx->size - pCtx->sma.numOfNull; + assert(*notNullElems >= 0); + + if (*notNullElems == 0) { + return; + } + + void* tval = NULL; + int16_t index = 0; + + if (isMin) { + tval = &pCtx->sma.min; + index = pCtx->sma.minIndex; + } else { + tval = &pCtx->sma.max; + index = pCtx->sma.maxIndex; + } + + TSKEY key = TSKEY_INITIAL_VAL; + if (pCtx->ptsList != NULL) { + /** + * NOTE: work around the bug caused by invalid pre-calculated function. + * Here the selectivity + ts will not return correct value. + * + * The following codes of 3 lines will be removed later. + */ +// if (index < 0 || index >= pCtx->size + pCtx->startOffset) { +// index = 0; +// } + + // the index is the original position, not the relative position + key = pCtx->ptsList[index]; + } + + if (IS_SIGNED_NUMERIC_TYPE(pCtx->inputType)) { + int64_t val = GET_INT64_VAL(tval); + if (pCtx->inputType == TSDB_DATA_TYPE_TINYINT) { + int8_t *data = (int8_t *)pOutput; + + UPDATE_DATA(pCtx, *data, (int8_t)val, notNullElems, isMin, key); + } else if (pCtx->inputType == TSDB_DATA_TYPE_SMALLINT) { + int16_t *data = (int16_t *)pOutput; + + UPDATE_DATA(pCtx, *data, (int16_t)val, notNullElems, isMin, key); + } else if (pCtx->inputType == TSDB_DATA_TYPE_INT) { + int32_t *data = (int32_t *)pOutput; +#if defined(_DEBUG_VIEW) + qDebug("max value updated according to pre-cal:%d", *data); +#endif + + if ((*data < val) ^ isMin) { + *data = (int32_t)val; + for (int32_t i = 0; i < (pCtx)->tagInfo.numOfTagCols; ++i) { + SQLFunctionCtx *__ctx = pCtx->tagInfo.pTagCtxList[i]; + if (__ctx->functionId == FUNCTION_TS_DUMMY) { + __ctx->tag.i64 = key; + __ctx->tag.nType = TSDB_DATA_TYPE_BIGINT; + } + + aggFunc[FUNCTION_TAG].exec(__ctx); + } + } + } else if (pCtx->inputType == TSDB_DATA_TYPE_BIGINT) { + int64_t *data = (int64_t *)pOutput; + UPDATE_DATA(pCtx, *data, val, notNullElems, isMin, key); + } + } else if (IS_UNSIGNED_NUMERIC_TYPE(pCtx->inputType)) { + uint64_t val = GET_UINT64_VAL(tval); + if (pCtx->inputType == TSDB_DATA_TYPE_UTINYINT) { + uint8_t *data = (uint8_t *)pOutput; + + UPDATE_DATA(pCtx, *data, (uint8_t)val, notNullElems, isMin, key); + } else if (pCtx->inputType == TSDB_DATA_TYPE_USMALLINT) { + uint16_t *data = (uint16_t *)pOutput; + UPDATE_DATA(pCtx, *data, (uint16_t)val, notNullElems, isMin, key); + } else if (pCtx->inputType == TSDB_DATA_TYPE_UINT) { + uint32_t *data = (uint32_t *)pOutput; + UPDATE_DATA(pCtx, *data, (uint32_t)val, notNullElems, isMin, key); + } else if (pCtx->inputType == TSDB_DATA_TYPE_UBIGINT) { + uint64_t *data = (uint64_t *)pOutput; + UPDATE_DATA(pCtx, *data, val, notNullElems, isMin, key); + } + } else if (pCtx->inputType == TSDB_DATA_TYPE_DOUBLE) { + double *data = (double *)pOutput; + double val = GET_DOUBLE_VAL(tval); + + UPDATE_DATA(pCtx, *data, val, notNullElems, isMin, key); + } else if (pCtx->inputType == TSDB_DATA_TYPE_FLOAT) { + float *data = (float *)pOutput; + double val = GET_DOUBLE_VAL(tval); + + UPDATE_DATA(pCtx, *data, (float)val, notNullElems, isMin, key); + } + + return; + } + + void *p = GET_INPUT_DATA_LIST(pCtx); + TSKEY *tsList = GET_TS_LIST(pCtx); + + *notNullElems = 0; + + if (IS_SIGNED_NUMERIC_TYPE(pCtx->inputType)) { + if (pCtx->inputType == TSDB_DATA_TYPE_TINYINT) { + TYPED_LOOPCHECK_N(int8_t, pOutput, p, pCtx, pCtx->inputType, isMin, *notNullElems); + } else if (pCtx->inputType == TSDB_DATA_TYPE_SMALLINT) { + TYPED_LOOPCHECK_N(int16_t, pOutput, p, pCtx, pCtx->inputType, isMin, *notNullElems); + } else if (pCtx->inputType == TSDB_DATA_TYPE_INT) { + int32_t *pData = p; + int32_t *retVal = (int32_t*) pOutput; + + for (int32_t i = 0; i < pCtx->size; ++i) { + if (pCtx->hasNull && isNull((const char*)&pData[i], pCtx->inputType)) { + continue; + } + + if ((*retVal < pData[i]) ^ isMin) { + *retVal = pData[i]; + TSKEY k = tsList[i]; + + DO_UPDATE_TAG_COLUMNS(pCtx, k); + } + + *notNullElems += 1; + } +#if defined(_DEBUG_VIEW) + qDebug("max value updated:%d", *retVal); +#endif + } else if (pCtx->inputType == TSDB_DATA_TYPE_BIGINT) { + TYPED_LOOPCHECK_N(int64_t, pOutput, p, pCtx, pCtx->inputType, isMin, *notNullElems); + } + } else if (IS_UNSIGNED_NUMERIC_TYPE(pCtx->inputType)) { + if (pCtx->inputType == TSDB_DATA_TYPE_UTINYINT) { + TYPED_LOOPCHECK_N(uint8_t, pOutput, p, pCtx, pCtx->inputType, isMin, *notNullElems); + } else if (pCtx->inputType == TSDB_DATA_TYPE_USMALLINT) { + TYPED_LOOPCHECK_N(uint16_t, pOutput, p, pCtx, pCtx->inputType, isMin, *notNullElems); + } else if (pCtx->inputType == TSDB_DATA_TYPE_UINT) { + TYPED_LOOPCHECK_N(uint32_t, pOutput, p, pCtx, pCtx->inputType, isMin, *notNullElems); + } else if (pCtx->inputType == TSDB_DATA_TYPE_UBIGINT) { + TYPED_LOOPCHECK_N(uint64_t, pOutput, p, pCtx, pCtx->inputType, isMin, *notNullElems); + } + } else if (pCtx->inputType == TSDB_DATA_TYPE_DOUBLE) { + TYPED_LOOPCHECK_N(double, pOutput, p, pCtx, pCtx->inputType, isMin, *notNullElems); + } else if (pCtx->inputType == TSDB_DATA_TYPE_FLOAT) { + TYPED_LOOPCHECK_N(float, pOutput, p, pCtx, pCtx->inputType, isMin, *notNullElems); + } +} + +static bool min_func_setup(SQLFunctionCtx *pCtx, SResultRowCellInfo* pResultInfo) { + if (!function_setup(pCtx, pResultInfo)) { + return false; // not initialized since it has been initialized + } + + GET_TRUE_DATA_TYPE(); + + switch (type) { + case TSDB_DATA_TYPE_TINYINT: + *((int8_t *)pCtx->pOutput) = INT8_MAX; + break; + case TSDB_DATA_TYPE_UTINYINT: + *(uint8_t *) pCtx->pOutput = UINT8_MAX; + break; + case TSDB_DATA_TYPE_SMALLINT: + *((int16_t *)pCtx->pOutput) = INT16_MAX; + break; + case TSDB_DATA_TYPE_USMALLINT: + *((uint16_t *)pCtx->pOutput) = UINT16_MAX; + break; + case TSDB_DATA_TYPE_INT: + *((int32_t *)pCtx->pOutput) = INT32_MAX; + break; + case TSDB_DATA_TYPE_UINT: + *((uint32_t *)pCtx->pOutput) = UINT32_MAX; + break; + case TSDB_DATA_TYPE_BIGINT: + *((int64_t *)pCtx->pOutput) = INT64_MAX; + break; + case TSDB_DATA_TYPE_UBIGINT: + *((uint64_t *)pCtx->pOutput) = UINT64_MAX; + break; + case TSDB_DATA_TYPE_FLOAT: + *((float *)pCtx->pOutput) = FLT_MAX; + break; + case TSDB_DATA_TYPE_DOUBLE: + SET_DOUBLE_VAL(((double *)pCtx->pOutput), DBL_MAX); + break; + default: + assert(0); +// qError("illegal data type:%d in min/max query", pCtx->inputType); + } + + return true; +} + +static bool max_func_setup(SQLFunctionCtx *pCtx, SResultRowCellInfo* pResultInfo) { + if (!function_setup(pCtx, pResultInfo)) { + return false; // not initialized since it has been initialized + } + + GET_TRUE_DATA_TYPE(); + + switch (type) { + case TSDB_DATA_TYPE_INT: + *((int32_t *)pCtx->pOutput) = INT32_MIN; + break; + case TSDB_DATA_TYPE_UINT: + *((uint32_t *)pCtx->pOutput) = 0; + break; + case TSDB_DATA_TYPE_FLOAT: + *((float *)pCtx->pOutput) = -FLT_MAX; + break; + case TSDB_DATA_TYPE_DOUBLE: + SET_DOUBLE_VAL(((double *)pCtx->pOutput), -DBL_MAX); + break; + case TSDB_DATA_TYPE_BIGINT: + *((int64_t *)pCtx->pOutput) = INT64_MIN; + break; + case TSDB_DATA_TYPE_UBIGINT: + *((uint64_t *)pCtx->pOutput) = 0; + break; + case TSDB_DATA_TYPE_SMALLINT: + *((int16_t *)pCtx->pOutput) = INT16_MIN; + break; + case TSDB_DATA_TYPE_USMALLINT: + *((uint16_t *)pCtx->pOutput) = 0; + break; + case TSDB_DATA_TYPE_TINYINT: + *((int8_t *)pCtx->pOutput) = INT8_MIN; + break; + case TSDB_DATA_TYPE_UTINYINT: + *((uint8_t *)pCtx->pOutput) = 0; + break; + default: + assert(0); +// qError("illegal data type:%d in min/max query", pCtx->inputType); + } + + return true; +} + +/* + * the output result of min/max function is the final output buffer, not the intermediate result buffer + */ +static void min_function(SQLFunctionCtx *pCtx) { + int32_t notNullElems = 0; + minMax_function(pCtx, pCtx->pOutput, 1, ¬NullElems); + + SET_VAL(pCtx, notNullElems, 1); + + if (notNullElems > 0) { + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + pResInfo->hasResult = DATA_SET_FLAG; + + // set the flag for super table query + if (pCtx->stableQuery) { + *(pCtx->pOutput + pCtx->inputBytes) = DATA_SET_FLAG; + } + } +} + +static void max_function(SQLFunctionCtx *pCtx) { + int32_t notNullElems = 0; + minMax_function(pCtx, pCtx->pOutput, 0, ¬NullElems); + + SET_VAL(pCtx, notNullElems, 1); + + if (notNullElems > 0) { + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + pResInfo->hasResult = DATA_SET_FLAG; + + // set the flag for super table query + if (pCtx->stableQuery) { + *(pCtx->pOutput + pCtx->inputBytes) = DATA_SET_FLAG; + } + } +} + +static int32_t minmax_merge_impl(SQLFunctionCtx *pCtx, int32_t bytes, char *output, bool isMin) { + int32_t notNullElems = 0; + + GET_TRUE_DATA_TYPE(); + assert(pCtx->stableQuery); + + for (int32_t i = 0; i < pCtx->size; ++i) { + char *input = GET_INPUT_DATA(pCtx, i); + if (input[bytes] != DATA_SET_FLAG) { + continue; + } + + switch (type) { + case TSDB_DATA_TYPE_TINYINT: { + int8_t v = GET_INT8_VAL(input); + DUPATE_DATA_WITHOUT_TS(pCtx, *(int8_t *)output, v, notNullElems, isMin); + break; + } + case TSDB_DATA_TYPE_SMALLINT: { + int16_t v = GET_INT16_VAL(input); + DUPATE_DATA_WITHOUT_TS(pCtx, *(int16_t *)output, v, notNullElems, isMin); + break; + } + case TSDB_DATA_TYPE_INT: { + int32_t v = GET_INT32_VAL(input); + if ((*(int32_t *)output < v) ^ isMin) { + *(int32_t *)output = v; + + for (int32_t j = 0; j < pCtx->tagInfo.numOfTagCols; ++j) { + SQLFunctionCtx *__ctx = pCtx->tagInfo.pTagCtxList[j]; + aggFunc[FUNCTION_TAG].exec(__ctx); + } + + notNullElems++; + } + break; + } + case TSDB_DATA_TYPE_FLOAT: { + float v = GET_FLOAT_VAL(input); + DUPATE_DATA_WITHOUT_TS(pCtx, *(float *)output, v, notNullElems, isMin); + break; + } + case TSDB_DATA_TYPE_DOUBLE: { + double v = GET_DOUBLE_VAL(input); + DUPATE_DATA_WITHOUT_TS(pCtx, *(double *)output, v, notNullElems, isMin); + break; + } + case TSDB_DATA_TYPE_BIGINT: { + int64_t v = GET_INT64_VAL(input); + DUPATE_DATA_WITHOUT_TS(pCtx, *(int64_t *)output, v, notNullElems, isMin); + break; + } + + case TSDB_DATA_TYPE_UTINYINT: { + uint8_t v = GET_UINT8_VAL(input); + DUPATE_DATA_WITHOUT_TS(pCtx, *(uint8_t *)output, v, notNullElems, isMin); + break; + } + + case TSDB_DATA_TYPE_USMALLINT: { + uint16_t v = GET_UINT16_VAL(input); + DUPATE_DATA_WITHOUT_TS(pCtx, *(uint16_t *)output, v, notNullElems, isMin); + break; + } + + case TSDB_DATA_TYPE_UINT: { + uint32_t v = GET_UINT32_VAL(input); + DUPATE_DATA_WITHOUT_TS(pCtx, *(uint32_t *)output, v, notNullElems, isMin); + break; + } + + case TSDB_DATA_TYPE_UBIGINT: { + uint64_t v = GET_UINT64_VAL(input); + DUPATE_DATA_WITHOUT_TS(pCtx, *(uint64_t *)output, v, notNullElems, isMin); + break; + } + + default: + break; + } + } + + return notNullElems; +} + +static void min_func_merge(SQLFunctionCtx *pCtx) { + int32_t notNullElems = minmax_merge_impl(pCtx, pCtx->outputBytes, pCtx->pOutput, 1); + + SET_VAL(pCtx, notNullElems, 1); + + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + if (notNullElems > 0) { + pResInfo->hasResult = DATA_SET_FLAG; + } +} + +static void max_func_merge(SQLFunctionCtx *pCtx) { + int32_t numOfElem = minmax_merge_impl(pCtx, pCtx->outputBytes, pCtx->pOutput, 0); + + SET_VAL(pCtx, numOfElem, 1); + + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + if (numOfElem > 0) { + pResInfo->hasResult = DATA_SET_FLAG; + } +} + +#define LOOP_STDDEV_IMPL(type, r, d, ctx, delta, _type, num) \ + for (int32_t i = 0; i < (ctx)->size; ++i) { \ + if ((ctx)->hasNull && isNull((char *)&((type *)d)[i], (_type))) { \ + continue; \ + } \ + (num) += 1; \ + (r) += POW2(((type *)d)[i] - (delta)); \ + } + +static void stddev_function(SQLFunctionCtx *pCtx) { + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + SStddevInfo *pStd = GET_ROWCELL_INTERBUF(pResInfo); + + if (pCtx->currentStage == REPEAT_SCAN && pStd->stage == 0) { + pStd->stage++; + avg_finalizer(pCtx); + + pResInfo->initialized = true; // set it initialized to avoid re-initialization + + // save average value into tmpBuf, for second stage scan + SAvgInfo *pAvg = GET_ROWCELL_INTERBUF(pResInfo); + + pStd->avg = GET_DOUBLE_VAL(pCtx->pOutput); + assert((isnan(pAvg->sum) && pAvg->num == 0) || (pStd->num == pAvg->num && pStd->avg == pAvg->sum)); + } + + if (pStd->stage == 0) { + // the first stage is to calculate average value + avg_function(pCtx); + } else if (pStd->num > 0) { + // the second stage to calculate standard deviation + // if pStd->num == 0, there are no numbers in the first round check. No need to do the second round + double *retVal = &pStd->res; + double avg = pStd->avg; + + void *pData = GET_INPUT_DATA_LIST(pCtx); + int32_t num = 0; + + switch (pCtx->inputType) { + case TSDB_DATA_TYPE_INT: { + for (int32_t i = 0; i < pCtx->size; ++i) { + if (pCtx->hasNull && isNull((const char*) (&((int32_t *)pData)[i]), pCtx->inputType)) { + continue; + } + num += 1; + *retVal += POW2(((int32_t *)pData)[i] - avg); + } + break; + } + case TSDB_DATA_TYPE_FLOAT: { + LOOP_STDDEV_IMPL(float, *retVal, pData, pCtx, avg, pCtx->inputType, num); + break; + } + case TSDB_DATA_TYPE_DOUBLE: { + LOOP_STDDEV_IMPL(double, *retVal, pData, pCtx, avg, pCtx->inputType, num); + break; + } + case TSDB_DATA_TYPE_BIGINT: { + LOOP_STDDEV_IMPL(int64_t, *retVal, pData, pCtx, avg, pCtx->inputType, num); + break; + } + case TSDB_DATA_TYPE_SMALLINT: { + LOOP_STDDEV_IMPL(int16_t, *retVal, pData, pCtx, avg, pCtx->inputType, num); + break; + } + case TSDB_DATA_TYPE_TINYINT: { + LOOP_STDDEV_IMPL(int8_t, *retVal, pData, pCtx, avg, pCtx->inputType, num); + break; + } + case TSDB_DATA_TYPE_UBIGINT: { + LOOP_STDDEV_IMPL(uint64_t, *retVal, pData, pCtx, avg, pCtx->inputType, num); + break; + } + case TSDB_DATA_TYPE_USMALLINT: { + LOOP_STDDEV_IMPL(uint16_t, *retVal, pData, pCtx, avg, pCtx->inputType, num); + break; + } + case TSDB_DATA_TYPE_UTINYINT: { + LOOP_STDDEV_IMPL(uint8_t, *retVal, pData, pCtx, avg, pCtx->inputType, num); + break; + } + case TSDB_DATA_TYPE_UINT: { + LOOP_STDDEV_IMPL(uint32_t, *retVal, pData, pCtx, avg, pCtx->inputType, num); + break; + } + default: + assert(0); +// qError("stddev function not support data type:%d", pCtx->inputType); + } + + SET_VAL(pCtx, 1, 1); + } +} + +static void stddev_finalizer(SQLFunctionCtx *pCtx) { + SStddevInfo *pStd = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx)); + + if (pStd->num <= 0) { + setNull(pCtx->pOutput, pCtx->outputType, pCtx->outputBytes); + } else { + double *retValue = (double *)pCtx->pOutput; + SET_DOUBLE_VAL(retValue, sqrt(pStd->res / pStd->num)); + SET_VAL(pCtx, 1, 1); + } + + doFinalizer(pCtx); +} + +////////////////////////////////////////////////////////////////////////////////////// +int32_t tsCompare(const void* p1, const void* p2) { + TSKEY k = *(TSKEY*)p1; + SResPair* pair = (SResPair*)p2; + + if (k == pair->key) { + return 0; + } else { + return k < pair->key? -1:1; + } +} + +static void stddev_dst_function(SQLFunctionCtx *pCtx) { + SStddevdstInfo *pStd = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx)); + + // the second stage to calculate standard deviation + double *retVal = &pStd->res; + + // all data are null, no need to proceed + SArray* resList = (SArray*) pCtx->param[0].pz; + if (resList == NULL) { + return; + } + + // find the correct group average results according to the tag value + int32_t len = (int32_t) taosArrayGetSize(resList); + assert(len > 0); + + double avg = 0; + if (len == 1) { + SResPair* p = taosArrayGet(resList, 0); + avg = p->avg; + } else { // todo opt performance by using iterator since the timestamp lsit is matched with the output result + SResPair* p = bsearch(&pCtx->startTs, resList->pData, len, sizeof(SResPair), tsCompare); + if (p == NULL) { + return; + } + + avg = p->avg; + } + + void *pData = GET_INPUT_DATA_LIST(pCtx); + int32_t num = 0; + + switch (pCtx->inputType) { + case TSDB_DATA_TYPE_INT: { + for (int32_t i = 0; i < pCtx->size; ++i) { + if (pCtx->hasNull && isNull((const char*) (&((int32_t *)pData)[i]), pCtx->inputType)) { + continue; + } + num += 1; + *retVal += POW2(((int32_t *)pData)[i] - avg); + } + break; + } + case TSDB_DATA_TYPE_FLOAT: { + LOOP_STDDEV_IMPL(float, *retVal, pData, pCtx, avg, pCtx->inputType, num); + break; + } + case TSDB_DATA_TYPE_DOUBLE: { + LOOP_STDDEV_IMPL(double, *retVal, pData, pCtx, avg, pCtx->inputType, num); + break; + } + case TSDB_DATA_TYPE_TINYINT: { + LOOP_STDDEV_IMPL(int8_t, *retVal, pData, pCtx, avg, pCtx->inputType, num); + break; + } + case TSDB_DATA_TYPE_UTINYINT: { + LOOP_STDDEV_IMPL(int8_t, *retVal, pData, pCtx, avg, pCtx->inputType, num); + break; + } + case TSDB_DATA_TYPE_SMALLINT: { + LOOP_STDDEV_IMPL(int16_t, *retVal, pData, pCtx, avg, pCtx->inputType, num); + break; + } + case TSDB_DATA_TYPE_USMALLINT: { + LOOP_STDDEV_IMPL(uint16_t, *retVal, pData, pCtx, avg, pCtx->inputType, num); + break; + } + case TSDB_DATA_TYPE_UINT: { + LOOP_STDDEV_IMPL(uint32_t, *retVal, pData, pCtx, avg, pCtx->inputType, num); + break; + } + case TSDB_DATA_TYPE_BIGINT: { + LOOP_STDDEV_IMPL(int64_t, *retVal, pData, pCtx, avg, pCtx->inputType, num); + break; + } + case TSDB_DATA_TYPE_UBIGINT: { + LOOP_STDDEV_IMPL(uint64_t, *retVal, pData, pCtx, avg, pCtx->inputType, num); + break; + } + default: + assert(0); +// qError("stddev function not support data type:%d", pCtx->inputType); + } + + pStd->num += num; + SET_VAL(pCtx, num, 1); + + // copy to the final output buffer for super table + memcpy(pCtx->pOutput, GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx)), sizeof(SAvgInfo)); +} + +static void stddev_dst_merge(SQLFunctionCtx *pCtx) { + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + SStddevdstInfo* pRes = GET_ROWCELL_INTERBUF(pResInfo); + + char *input = GET_INPUT_DATA_LIST(pCtx); + + for (int32_t i = 0; i < pCtx->size; ++i, input += pCtx->inputBytes) { + SStddevdstInfo *pInput = (SStddevdstInfo *)input; + if (pInput->num == 0) { // current input is null + continue; + } + + pRes->num += pInput->num; + pRes->res += pInput->res; + } +} + +static void stddev_dst_finalizer(SQLFunctionCtx *pCtx) { + SStddevdstInfo *pStd = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx)); + + if (pStd->num <= 0) { + setNull(pCtx->pOutput, pCtx->outputType, pCtx->outputBytes); + } else { + double *retValue = (double *)pCtx->pOutput; + SET_DOUBLE_VAL(retValue, sqrt(pStd->res / pStd->num)); + SET_VAL(pCtx, 1, 1); + } + + doFinalizer(pCtx); +} + +////////////////////////////////////////////////////////////////////////////////////// +static bool first_last_function_setup(SQLFunctionCtx *pCtx, SResultRowCellInfo* pResInfo) { + if (!function_setup(pCtx, pResInfo)) { + return false; + } + + // used to keep the timestamp for comparison + pCtx->param[1].nType = 0; + pCtx->param[1].i64 = 0; + + return true; +} + +// todo opt for null block +static void first_function(SQLFunctionCtx *pCtx) { + if (pCtx->order == TSDB_ORDER_DESC) { + return; + } + + int32_t notNullElems = 0; + + // handle the null value + for (int32_t i = 0; i < pCtx->size; ++i) { + char *data = GET_INPUT_DATA(pCtx, i); + if (pCtx->hasNull && isNull(data, pCtx->inputType)) { + continue; + } + + memcpy(pCtx->pOutput, data, pCtx->inputBytes); + if (pCtx->ptsList != NULL) { + TSKEY k = GET_TS_DATA(pCtx, i); + DO_UPDATE_TAG_COLUMNS(pCtx, k); + } + + SResultRowCellInfo *pInfo = GET_RES_INFO(pCtx); + pInfo->hasResult = DATA_SET_FLAG; + pInfo->complete = true; + + notNullElems++; + break; + } + + SET_VAL(pCtx, notNullElems, 1); +} + +static void first_data_assign_impl(SQLFunctionCtx *pCtx, char *pData, int32_t index) { + int64_t *timestamp = GET_TS_LIST(pCtx); + + SFirstLastInfo *pInfo = (SFirstLastInfo *)(pCtx->pOutput + pCtx->inputBytes); + + if (pInfo->hasResult != DATA_SET_FLAG || timestamp[index] < pInfo->ts) { + memcpy(pCtx->pOutput, pData, pCtx->inputBytes); + pInfo->hasResult = DATA_SET_FLAG; + pInfo->ts = timestamp[index]; + + DO_UPDATE_TAG_COLUMNS(pCtx, pInfo->ts); + } +} + +/* + * format of intermediate result: "timestamp,value" need to compare the timestamp in the first part (before the comma) + * to decide if the value is earlier than current intermediate result + */ +static void first_dist_function(SQLFunctionCtx *pCtx) { + /* + * do not to check data in the following cases: + * 1. data block that are not loaded + * 2. scan data files in desc order + */ + if (pCtx->order == TSDB_ORDER_DESC) { + return; + } + + int32_t notNullElems = 0; + + // find the first not null value + for (int32_t i = 0; i < pCtx->size; ++i) { + char *data = GET_INPUT_DATA(pCtx, i); + if (pCtx->hasNull && isNull(data, pCtx->inputType)) { + continue; + } + + first_data_assign_impl(pCtx, data, i); + + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + pResInfo->hasResult = DATA_SET_FLAG; + + notNullElems++; + break; + } + + SET_VAL(pCtx, notNullElems, 1); +} + +static void first_dist_func_merge(SQLFunctionCtx *pCtx) { + assert(pCtx->stableQuery); + + char * pData = GET_INPUT_DATA_LIST(pCtx); + SFirstLastInfo *pInput = (SFirstLastInfo*) (pData + pCtx->outputBytes); + if (pInput->hasResult != DATA_SET_FLAG) { + return; + } + + // The param[1] is used to keep the initial value of max ts value + if (pCtx->param[1].nType != pCtx->outputType || pCtx->param[1].i64 > pInput->ts) { + memcpy(pCtx->pOutput, pData, pCtx->outputBytes); + pCtx->param[1].i64 = pInput->ts; + pCtx->param[1].nType = pCtx->outputType; + + DO_UPDATE_TAG_COLUMNS(pCtx, pInput->ts); + } + + SET_VAL(pCtx, 1, 1); + GET_RES_INFO(pCtx)->hasResult = DATA_SET_FLAG; +} + +////////////////////////////////////////////////////////////////////////////////////////// +/* + * last function: + * 1. since the last block may be all null value, so, we simply access the last block is not valid + * each block need to be checked. + * 2. If numOfNull == pBlock->numOfBlocks, the whole block is empty. Otherwise, there is at + * least one data in this block that is not null.(TODO opt for this case) + */ +static void last_function(SQLFunctionCtx *pCtx) { + if (pCtx->order != pCtx->param[0].i64) { + return; + } + + SResultRowCellInfo* pResInfo = GET_RES_INFO(pCtx); + + int32_t notNullElems = 0; + if (pCtx->order == TSDB_ORDER_DESC) { + + for (int32_t i = pCtx->size - 1; i >= 0; --i) { + char *data = GET_INPUT_DATA(pCtx, i); + if (pCtx->hasNull && isNull(data, pCtx->inputType) && (!pCtx->requireNull)) { + continue; + } + + memcpy(pCtx->pOutput, data, pCtx->inputBytes); + + TSKEY ts = pCtx->ptsList ? GET_TS_DATA(pCtx, i) : 0; + DO_UPDATE_TAG_COLUMNS(pCtx, ts); + + pResInfo->hasResult = DATA_SET_FLAG; + pResInfo->complete = true; // set query completed on this column + notNullElems++; + break; + } + } else { // ascending order + for (int32_t i = pCtx->size - 1; i >= 0; --i) { + char *data = GET_INPUT_DATA(pCtx, i); + if (pCtx->hasNull && isNull(data, pCtx->inputType) && (!pCtx->requireNull)) { + continue; + } + + TSKEY ts = pCtx->ptsList ? GET_TS_DATA(pCtx, i) : 0; + + char* buf = GET_ROWCELL_INTERBUF(pResInfo); + if (pResInfo->hasResult != DATA_SET_FLAG || (*(TSKEY*)buf) < ts) { + pResInfo->hasResult = DATA_SET_FLAG; + memcpy(pCtx->pOutput, data, pCtx->inputBytes); + + *(TSKEY*)buf = ts; + DO_UPDATE_TAG_COLUMNS(pCtx, ts); + } + + notNullElems++; + break; + } + } + + SET_VAL(pCtx, notNullElems, 1); +} + +static void last_data_assign_impl(SQLFunctionCtx *pCtx, char *pData, int32_t index) { + int64_t *timestamp = GET_TS_LIST(pCtx); + + SFirstLastInfo *pInfo = (SFirstLastInfo *)(pCtx->pOutput + pCtx->inputBytes); + + if (pInfo->hasResult != DATA_SET_FLAG || pInfo->ts < timestamp[index]) { +#if defined(_DEBUG_VIEW) + qDebug("assign index:%d, ts:%" PRId64 ", val:%d, ", index, timestamp[index], *(int32_t *)pData); +#endif + + memcpy(pCtx->pOutput, pData, pCtx->inputBytes); + pInfo->hasResult = DATA_SET_FLAG; + pInfo->ts = timestamp[index]; + + DO_UPDATE_TAG_COLUMNS(pCtx, pInfo->ts); + } +} + +static void last_dist_function(SQLFunctionCtx *pCtx) { + /* + * 1. for scan data is not the required order + * 2. for data blocks that are not loaded, no need to check data + */ + if (pCtx->order != pCtx->param[0].i64) { + return; + } + + int32_t notNullElems = 0; + for (int32_t i = pCtx->size - 1; i >= 0; --i) { + char *data = GET_INPUT_DATA(pCtx, i); + if (pCtx->hasNull && isNull(data, pCtx->inputType)) { + if (!pCtx->requireNull) { + continue; + } + } + + last_data_assign_impl(pCtx, data, i); + + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + pResInfo->hasResult = DATA_SET_FLAG; + + notNullElems++; + break; + } + + SET_VAL(pCtx, notNullElems, 1); +} + +/* + * in the secondary merge(local reduce), the output is limited by the + * final output size, so the main difference between last_dist_func_merge and second_merge + * is: the output data format in computing + */ +static void last_dist_func_merge(SQLFunctionCtx *pCtx) { + char *pData = GET_INPUT_DATA_LIST(pCtx); + + SFirstLastInfo *pInput = (SFirstLastInfo*) (pData + pCtx->outputBytes); + if (pInput->hasResult != DATA_SET_FLAG) { + return; + } + + /* + * param[1] used to keep the corresponding timestamp to decide if current result is + * the true last result + */ + if (pCtx->param[1].nType != pCtx->outputType || pCtx->param[1].i64 < pInput->ts) { + memcpy(pCtx->pOutput, pData, pCtx->outputBytes); + pCtx->param[1].i64 = pInput->ts; + pCtx->param[1].nType = pCtx->outputType; + + DO_UPDATE_TAG_COLUMNS(pCtx, pInput->ts); + } + + SET_VAL(pCtx, 1, 1); + GET_RES_INFO(pCtx)->hasResult = DATA_SET_FLAG; +} + +////////////////////////////////////////////////////////////////////////////////// +/* + * NOTE: last_row does not use the interResultBuf to keep the result + */ +static void last_row_function(SQLFunctionCtx *pCtx) { + assert(pCtx->size >= 1); + char *pData = GET_INPUT_DATA_LIST(pCtx); + + // assign the last element in current data block + assignVal(pCtx->pOutput, pData + (pCtx->size - 1) * pCtx->inputBytes, pCtx->inputBytes, pCtx->inputType); + + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + pResInfo->hasResult = DATA_SET_FLAG; + + // set the result to final result buffer in case of super table query + if (pCtx->stableQuery) { + SLastrowInfo *pInfo1 = (SLastrowInfo *)(pCtx->pOutput + pCtx->inputBytes); + pInfo1->ts = GET_TS_DATA(pCtx, pCtx->size - 1); + pInfo1->hasResult = DATA_SET_FLAG; + + DO_UPDATE_TAG_COLUMNS(pCtx, pInfo1->ts); + } else { + TSKEY ts = GET_TS_DATA(pCtx, pCtx->size - 1); + DO_UPDATE_TAG_COLUMNS(pCtx, ts); + } + + SET_VAL(pCtx, pCtx->size, 1); +} + +static void last_row_finalizer(SQLFunctionCtx *pCtx) { + // do nothing at the first stage + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + if (pResInfo->hasResult != DATA_SET_FLAG) { + setNull(pCtx->pOutput, pCtx->outputType, pCtx->outputBytes); + return; + } + + GET_RES_INFO(pCtx)->numOfRes = 1; + doFinalizer(pCtx); +} + +////////////////////////////////////////////////////////////////////////////////// +static void valuePairAssign(tValuePair *dst, int16_t type, const char *val, int64_t tsKey, char *pTags, + SExtTagsInfo *pTagInfo, int16_t stage) { + dst->v.nType = type; + dst->v.i64 = *(int64_t *)val; + dst->timestamp = tsKey; + + int32_t size = 0; + if (stage == MERGE_STAGE) { + memcpy(dst->pTags, pTags, (size_t)pTagInfo->tagsLen); + } else { // the tags are dumped from the ctx tag fields + for (int32_t i = 0; i < pTagInfo->numOfTagCols; ++i) { + SQLFunctionCtx* ctx = pTagInfo->pTagCtxList[i]; + if (ctx->functionId == FUNCTION_TS_DUMMY) { + ctx->tag.nType = TSDB_DATA_TYPE_BIGINT; + ctx->tag.i64 = tsKey; + } + + taosVariantDump(&ctx->tag, dst->pTags + size, ctx->tag.nType, true); + size += pTagInfo->pTagCtxList[i]->outputBytes; + } + } +} + +#define VALUEPAIRASSIGN(dst, src, __l) \ + do { \ + (dst)->timestamp = (src)->timestamp; \ + (dst)->v = (src)->v; \ + memcpy((dst)->pTags, (src)->pTags, (size_t)(__l)); \ + } while (0) + +static int32_t topBotComparFn(const void *p1, const void *p2, const void *param) +{ + uint16_t type = *(uint16_t *) param; + tValuePair *val1 = *(tValuePair **) p1; + tValuePair *val2 = *(tValuePair **) p2; + + if (IS_SIGNED_NUMERIC_TYPE(type)) { + if (val1->v.i64 == val2->v.i64) { + return 0; + } + + return (val1->v.i64 > val2->v.i64) ? 1 : -1; + } else if (IS_UNSIGNED_NUMERIC_TYPE(type)) { + if (val1->v.u64 == val2->v.u64) { + return 0; + } + + return (val1->v.u64 > val2->v.u64) ? 1 : -1; + } + + if (val1->v.d == val2->v.d) { + return 0; + } + + return (val1->v.d > val2->v.d) ? 1 : -1; +} + +static void topBotSwapFn(void *dst, void *src, const void *param) +{ + char tag[32768]; + tValuePair temp; + uint16_t tagLen = *(uint16_t *) param; + tValuePair *vdst = *(tValuePair **) dst; + tValuePair *vsrc = *(tValuePair **) src; + + memset(tag, 0, sizeof(tag)); + temp.pTags = tag; + + VALUEPAIRASSIGN(&temp, vdst, tagLen); + VALUEPAIRASSIGN(vdst, vsrc, tagLen); + VALUEPAIRASSIGN(vsrc, &temp, tagLen); +} + +static void do_top_function_add(STopBotInfo *pInfo, int32_t maxLen, void *pData, int64_t ts, uint16_t type, + SExtTagsInfo *pTagInfo, char *pTags, int16_t stage) { + SVariant val = {0}; + taosVariantCreateFromBinary(&val, pData, tDataTypes[type].bytes, type); + + tValuePair **pList = pInfo->res; + assert(pList != NULL); + + if (pInfo->num < maxLen) { + valuePairAssign(pList[pInfo->num], type, (const char *)&val.i64, ts, pTags, pTagInfo, stage); + + taosheapsort((void *) pList, sizeof(tValuePair **), pInfo->num + 1, (const void *) &type, topBotComparFn, (const void *) &pTagInfo->tagsLen, topBotSwapFn, 0); + + pInfo->num++; + } else { + if ((IS_SIGNED_NUMERIC_TYPE(type) && val.i64 > pList[0]->v.i64) || + (IS_UNSIGNED_NUMERIC_TYPE(type) && val.u64 > pList[0]->v.u64) || + (IS_FLOAT_TYPE(type) && val.d > pList[0]->v.d)) { + valuePairAssign(pList[0], type, (const char *)&val.i64, ts, pTags, pTagInfo, stage); + taosheapadjust((void *) pList, sizeof(tValuePair **), 0, maxLen - 1, (const void *) &type, topBotComparFn, (const void *) &pTagInfo->tagsLen, topBotSwapFn, 0); + } + } +} + +static void do_bottom_function_add(STopBotInfo *pInfo, int32_t maxLen, void *pData, int64_t ts, uint16_t type, + SExtTagsInfo *pTagInfo, char *pTags, int16_t stage) { + SVariant val = {0}; + taosVariantCreateFromBinary(&val, pData, tDataTypes[type].bytes, type); + + tValuePair **pList = pInfo->res; + assert(pList != NULL); + + if (pInfo->num < maxLen) { + valuePairAssign(pList[pInfo->num], type, (const char *)&val.i64, ts, pTags, pTagInfo, stage); + + taosheapsort((void *) pList, sizeof(tValuePair **), pInfo->num + 1, (const void *) &type, topBotComparFn, (const void *) &pTagInfo->tagsLen, topBotSwapFn, 1); + + pInfo->num++; + } else { + if ((IS_SIGNED_NUMERIC_TYPE(type) && val.i64 < pList[0]->v.i64) || + (IS_UNSIGNED_NUMERIC_TYPE(type) && val.u64 < pList[0]->v.u64) || + (IS_FLOAT_TYPE(type) && val.d < pList[0]->v.d)) { + valuePairAssign(pList[0], type, (const char *)&val.i64, ts, pTags, pTagInfo, stage); + taosheapadjust((void *) pList, sizeof(tValuePair **), 0, maxLen - 1, (const void *) &type, topBotComparFn, (const void *) &pTagInfo->tagsLen, topBotSwapFn, 1); + } + } +} + +static int32_t resAscComparFn(const void *pLeft, const void *pRight) { + tValuePair *pLeftElem = *(tValuePair **)pLeft; + tValuePair *pRightElem = *(tValuePair **)pRight; + + if (pLeftElem->timestamp == pRightElem->timestamp) { + return 0; + } else { + return pLeftElem->timestamp > pRightElem->timestamp ? 1 : -1; + } +} + +static int32_t resDescComparFn(const void *pLeft, const void *pRight) { return -resAscComparFn(pLeft, pRight); } + +static int32_t resDataAscComparFn(const void *pLeft, const void *pRight) { + tValuePair *pLeftElem = *(tValuePair **)pLeft; + tValuePair *pRightElem = *(tValuePair **)pRight; + + if (IS_FLOAT_TYPE(pLeftElem->v.nType)) { + if (pLeftElem->v.d == pRightElem->v.d) { + return 0; + } else { + return pLeftElem->v.d > pRightElem->v.d ? 1 : -1; + } + } else if (IS_SIGNED_NUMERIC_TYPE(pLeftElem->v.nType)){ + if (pLeftElem->v.i64 == pRightElem->v.i64) { + return 0; + } else { + return pLeftElem->v.i64 > pRightElem->v.i64 ? 1 : -1; + } + } else { + if (pLeftElem->v.u64 == pRightElem->v.u64) { + return 0; + } else { + return pLeftElem->v.u64 > pRightElem->v.u64 ? 1 : -1; + } + } +} + +static int32_t resDataDescComparFn(const void *pLeft, const void *pRight) { return -resDataAscComparFn(pLeft, pRight); } + +static void copyTopBotRes(SQLFunctionCtx *pCtx, int32_t type) { + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + STopBotInfo *pRes = GET_ROWCELL_INTERBUF(pResInfo); + + tValuePair **tvp = pRes->res; + + int32_t step = QUERY_ASC_FORWARD_STEP; + int32_t len = (int32_t)(GET_RES_INFO(pCtx)->numOfRes); + + switch (type) { + case TSDB_DATA_TYPE_UINT: + case TSDB_DATA_TYPE_INT: { + int32_t *output = (int32_t *)pCtx->pOutput; + for (int32_t i = 0; i < len; ++i, output += step) { + *output = (int32_t)tvp[i]->v.i64; + } + break; + } + case TSDB_DATA_TYPE_UBIGINT: + case TSDB_DATA_TYPE_BIGINT: { + int64_t *output = (int64_t *)pCtx->pOutput; + for (int32_t i = 0; i < len; ++i, output += step) { + *output = tvp[i]->v.i64; + } + break; + } + case TSDB_DATA_TYPE_DOUBLE: { + double *output = (double *)pCtx->pOutput; + for (int32_t i = 0; i < len; ++i, output += step) { + SET_DOUBLE_VAL(output, tvp[i]->v.d); + } + break; + } + case TSDB_DATA_TYPE_FLOAT: { + float *output = (float *)pCtx->pOutput; + for (int32_t i = 0; i < len; ++i, output += step) { + *output = (float)tvp[i]->v.d; + } + break; + } + case TSDB_DATA_TYPE_USMALLINT: + case TSDB_DATA_TYPE_SMALLINT: { + int16_t *output = (int16_t *)pCtx->pOutput; + for (int32_t i = 0; i < len; ++i, output += step) { + *output = (int16_t)tvp[i]->v.i64; + } + break; + } + case TSDB_DATA_TYPE_UTINYINT: + case TSDB_DATA_TYPE_TINYINT: { + int8_t *output = (int8_t *)pCtx->pOutput; + for (int32_t i = 0; i < len; ++i, output += step) { + *output = (int8_t)tvp[i]->v.i64; + } + break; + } + default: { +// qError("top/bottom function not support data type:%d", pCtx->inputType); + return; + } + } + + // set the output timestamp of each record. + TSKEY *output = pCtx->ptsOutputBuf; + for (int32_t i = 0; i < len; ++i, output += step) { + *output = tvp[i]->timestamp; + } + + // set the corresponding tag data for each record + // todo check malloc failure + char **pData = calloc(pCtx->tagInfo.numOfTagCols, POINTER_BYTES); + for (int32_t i = 0; i < pCtx->tagInfo.numOfTagCols; ++i) { + pData[i] = pCtx->tagInfo.pTagCtxList[i]->pOutput; + } + + for (int32_t i = 0; i < len; ++i, output += step) { + int16_t offset = 0; + for (int32_t j = 0; j < pCtx->tagInfo.numOfTagCols; ++j) { + memcpy(pData[j], tvp[i]->pTags + offset, (size_t)pCtx->tagInfo.pTagCtxList[j]->outputBytes); + offset += pCtx->tagInfo.pTagCtxList[j]->outputBytes; + pData[j] += pCtx->tagInfo.pTagCtxList[j]->outputBytes; + } + } + + tfree(pData); +} + +/* + * Parameters values: + * 1. param[0]: maximum allowable results + * 2. param[1]: order by type (time or value) + * 3. param[2]: asc/desc order + * + * top/bottom use the intermediate result buffer to keep the intermediate result + */ +static STopBotInfo *getTopBotOutputInfo(SQLFunctionCtx *pCtx) { + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + + // only the first_stage_merge is directly written data into final output buffer + if (pCtx->stableQuery && pCtx->currentStage != MERGE_STAGE) { + return (STopBotInfo*) pCtx->pOutput; + } else { // during normal table query and super table at the secondary_stage, result is written to intermediate buffer + return GET_ROWCELL_INTERBUF(pResInfo); + } +} + + +/* + * keep the intermediate results during scan data blocks in the format of: + * +-----------------------------------+-------------one value pair-----------+------------next value pair-----------+ + * |-------------pointer area----------|----ts---+-----+-----n tags-----------|----ts---+-----+-----n tags-----------| + * +..[Value Pointer1][Value Pointer2].|timestamp|value|tags1|tags2|....|tagsn|timestamp|value|tags1|tags2|....|tagsn+ + */ +static void buildTopBotStruct(STopBotInfo *pTopBotInfo, SQLFunctionCtx *pCtx) { + char *tmp = (char *)pTopBotInfo + sizeof(STopBotInfo); + pTopBotInfo->res = (tValuePair**) tmp; + tmp += POINTER_BYTES * pCtx->param[0].i64; + + size_t size = sizeof(tValuePair) + pCtx->tagInfo.tagsLen; + + for (int32_t i = 0; i < pCtx->param[0].i64; ++i) { + pTopBotInfo->res[i] = (tValuePair*) tmp; + pTopBotInfo->res[i]->pTags = tmp + sizeof(tValuePair); + tmp += size; + } +} + +bool topbot_datablock_filter(SQLFunctionCtx *pCtx, const char *minval, const char *maxval) { + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + if (pResInfo == NULL) { + return true; + } + + STopBotInfo *pTopBotInfo = getTopBotOutputInfo(pCtx); + + // required number of results are not reached, continue load data block + if (pTopBotInfo->num < pCtx->param[0].i64) { + return true; + } + + if ((void *)pTopBotInfo->res[0] != (void *)((char *)pTopBotInfo + sizeof(STopBotInfo) + POINTER_BYTES * pCtx->param[0].i64)) { + buildTopBotStruct(pTopBotInfo, pCtx); + } + + tValuePair **pRes = (tValuePair**) pTopBotInfo->res; + + if (pCtx->functionId == FUNCTION_TOP) { + switch (pCtx->inputType) { + case TSDB_DATA_TYPE_TINYINT: + return GET_INT8_VAL(maxval) > pRes[0]->v.i64; + case TSDB_DATA_TYPE_SMALLINT: + return GET_INT16_VAL(maxval) > pRes[0]->v.i64; + case TSDB_DATA_TYPE_INT: + return GET_INT32_VAL(maxval) > pRes[0]->v.i64; + case TSDB_DATA_TYPE_BIGINT: + return GET_INT64_VAL(maxval) > pRes[0]->v.i64; + case TSDB_DATA_TYPE_FLOAT: + return GET_FLOAT_VAL(maxval) > pRes[0]->v.d; + case TSDB_DATA_TYPE_DOUBLE: + return GET_DOUBLE_VAL(maxval) > pRes[0]->v.d; + default: + return true; + } + } else { + switch (pCtx->inputType) { + case TSDB_DATA_TYPE_TINYINT: + return GET_INT8_VAL(minval) < pRes[0]->v.i64; + case TSDB_DATA_TYPE_SMALLINT: + return GET_INT16_VAL(minval) < pRes[0]->v.i64; + case TSDB_DATA_TYPE_INT: + return GET_INT32_VAL(minval) < pRes[0]->v.i64; + case TSDB_DATA_TYPE_BIGINT: + return GET_INT64_VAL(minval) < pRes[0]->v.i64; + case TSDB_DATA_TYPE_FLOAT: + return GET_FLOAT_VAL(minval) < pRes[0]->v.d; + case TSDB_DATA_TYPE_DOUBLE: + return GET_DOUBLE_VAL(minval) < pRes[0]->v.d; + default: + return true; + } + } +} + +static bool top_bottom_function_setup(SQLFunctionCtx *pCtx, SResultRowCellInfo* pResInfo) { + if (!function_setup(pCtx, pResInfo)) { + return false; + } + + STopBotInfo *pInfo = getTopBotOutputInfo(pCtx); + buildTopBotStruct(pInfo, pCtx); + return true; +} + +static void top_function(SQLFunctionCtx *pCtx) { + int32_t notNullElems = 0; + + STopBotInfo *pRes = getTopBotOutputInfo(pCtx); + assert(pRes->num >= 0); + + if ((void *)pRes->res[0] != (void *)((char *)pRes + sizeof(STopBotInfo) + POINTER_BYTES * pCtx->param[0].i64)) { + buildTopBotStruct(pRes, pCtx); + } + + for (int32_t i = 0; i < pCtx->size; ++i) { + char *data = GET_INPUT_DATA(pCtx, i); + if (pCtx->hasNull && isNull(data, pCtx->inputType)) { + continue; + } + + notNullElems++; + + // NOTE: Set the default timestamp if it is missing [todo refactor] + TSKEY ts = (pCtx->ptsList != NULL)? GET_TS_DATA(pCtx, i):0; + do_top_function_add(pRes, (int32_t)pCtx->param[0].i64, data, ts, pCtx->inputType, &pCtx->tagInfo, NULL, 0); + } + + if (!pCtx->hasNull) { + assert(pCtx->size == notNullElems); + } + + // treat the result as only one result + SET_VAL(pCtx, notNullElems, 1); + + if (notNullElems > 0) { + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + pResInfo->hasResult = DATA_SET_FLAG; + } +} + +static void top_func_merge(SQLFunctionCtx *pCtx) { + STopBotInfo *pInput = (STopBotInfo *)GET_INPUT_DATA_LIST(pCtx); + + // construct the input data struct from binary data + buildTopBotStruct(pInput, pCtx); + + STopBotInfo *pOutput = getTopBotOutputInfo(pCtx); + + // the intermediate result is binary, we only use the output data type + for (int32_t i = 0; i < pInput->num; ++i) { + int16_t type = (pCtx->outputType == TSDB_DATA_TYPE_FLOAT)? TSDB_DATA_TYPE_DOUBLE:pCtx->outputType; + do_top_function_add(pOutput, (int32_t)pCtx->param[0].i64, &pInput->res[i]->v.i64, pInput->res[i]->timestamp, + type, &pCtx->tagInfo, pInput->res[i]->pTags, pCtx->currentStage); + } + + SET_VAL(pCtx, pInput->num, pOutput->num); + + if (pOutput->num > 0) { + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + pResInfo->hasResult = DATA_SET_FLAG; + } +} + +static void bottom_function(SQLFunctionCtx *pCtx) { + int32_t notNullElems = 0; + + STopBotInfo *pRes = getTopBotOutputInfo(pCtx); + + if ((void *)pRes->res[0] != (void *)((char *)pRes + sizeof(STopBotInfo) + POINTER_BYTES * pCtx->param[0].i64)) { + buildTopBotStruct(pRes, pCtx); + } + + for (int32_t i = 0; i < pCtx->size; ++i) { + char *data = GET_INPUT_DATA(pCtx, i); + if (pCtx->hasNull && isNull(data, pCtx->inputType)) { + continue; + } + + notNullElems++; + // NOTE: Set the default timestamp if it is missing [todo refactor] + TSKEY ts = (pCtx->ptsList != NULL)? GET_TS_DATA(pCtx, i):0; + do_bottom_function_add(pRes, (int32_t)pCtx->param[0].i64, data, ts, pCtx->inputType, &pCtx->tagInfo, NULL, 0); + } + + if (!pCtx->hasNull) { + assert(pCtx->size == notNullElems); + } + + // treat the result as only one result + SET_VAL(pCtx, notNullElems, 1); + + if (notNullElems > 0) { + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + pResInfo->hasResult = DATA_SET_FLAG; + } +} + +static void bottom_func_merge(SQLFunctionCtx *pCtx) { + STopBotInfo *pInput = (STopBotInfo *)GET_INPUT_DATA_LIST(pCtx); + + // construct the input data struct from binary data + buildTopBotStruct(pInput, pCtx); + + STopBotInfo *pOutput = getTopBotOutputInfo(pCtx); + + // the intermediate result is binary, we only use the output data type + for (int32_t i = 0; i < pInput->num; ++i) { + int16_t type = (pCtx->outputType == TSDB_DATA_TYPE_FLOAT) ? TSDB_DATA_TYPE_DOUBLE : pCtx->outputType; + do_bottom_function_add(pOutput, (int32_t)pCtx->param[0].i64, &pInput->res[i]->v.i64, pInput->res[i]->timestamp, type, + &pCtx->tagInfo, pInput->res[i]->pTags, pCtx->currentStage); + } + + SET_VAL(pCtx, pInput->num, pOutput->num); + + if (pOutput->num > 0) { + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + pResInfo->hasResult = DATA_SET_FLAG; + } +} + +static void top_bottom_func_finalizer(SQLFunctionCtx *pCtx) { + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + + // data in temporary list is less than the required number of results, not enough qualified number of results + STopBotInfo *pRes = GET_ROWCELL_INTERBUF(pResInfo); + if (pRes->num == 0) { // no result + assert(pResInfo->hasResult != DATA_SET_FLAG); + // TODO: + } + + GET_RES_INFO(pCtx)->numOfRes = pRes->num; + tValuePair **tvp = pRes->res; + + // user specify the order of output by sort the result according to timestamp + if (pCtx->param[1].i64 == PRIMARYKEY_TIMESTAMP_COL_INDEX) { + __compar_fn_t comparator = (pCtx->param[2].i64 == TSDB_ORDER_ASC) ? resAscComparFn : resDescComparFn; + qsort(tvp, (size_t)pResInfo->numOfRes, POINTER_BYTES, comparator); + } else /*if (pCtx->param[1].i64 > PRIMARYKEY_TIMESTAMP_COL_INDEX)*/ { + __compar_fn_t comparator = (pCtx->param[2].i64 == TSDB_ORDER_ASC) ? resDataAscComparFn : resDataDescComparFn; + qsort(tvp, (size_t)pResInfo->numOfRes, POINTER_BYTES, comparator); + } + + GET_TRUE_DATA_TYPE(); + copyTopBotRes(pCtx, type); + + doFinalizer(pCtx); +} + +/////////////////////////////////////////////////////////////////////////////////////////////// +static bool percentile_function_setup(SQLFunctionCtx *pCtx, SResultRowCellInfo* pResultInfo) { + if (!function_setup(pCtx, pResultInfo)) { + return false; + } + + // in the first round, get the min-max value of all involved data + SPercentileInfo *pInfo = GET_ROWCELL_INTERBUF(pResultInfo); + SET_DOUBLE_VAL(&pInfo->minval, DBL_MAX); + SET_DOUBLE_VAL(&pInfo->maxval, -DBL_MAX); + pInfo->numOfElems = 0; + + return true; +} + +static void percentile_function(SQLFunctionCtx *pCtx) { + int32_t notNullElems = 0; + + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + SPercentileInfo *pInfo = GET_ROWCELL_INTERBUF(pResInfo); + + if (pCtx->currentStage == REPEAT_SCAN && pInfo->stage == 0) { + pInfo->stage += 1; + + // all data are null, set it completed + if (pInfo->numOfElems == 0) { + pResInfo->complete = true; + + return; + } else { + pInfo->pMemBucket = tMemBucketCreate(pCtx->inputBytes, pCtx->inputType, pInfo->minval, pInfo->maxval); + } + } + + // the first stage, only acquire the min/max value + if (pInfo->stage == 0) { + if (pCtx->isSmaSet) { + double tmin = 0.0, tmax = 0.0; + if (IS_SIGNED_NUMERIC_TYPE(pCtx->inputType)) { + tmin = (double)GET_INT64_VAL(&pCtx->sma.min); + tmax = (double)GET_INT64_VAL(&pCtx->sma.max); + } else if (IS_FLOAT_TYPE(pCtx->inputType)) { + tmin = GET_DOUBLE_VAL(&pCtx->sma.min); + tmax = GET_DOUBLE_VAL(&pCtx->sma.max); + } else if (IS_UNSIGNED_NUMERIC_TYPE(pCtx->inputType)) { + tmin = (double)GET_UINT64_VAL(&pCtx->sma.min); + tmax = (double)GET_UINT64_VAL(&pCtx->sma.max); + } else { + assert(true); + } + + if (GET_DOUBLE_VAL(&pInfo->minval) > tmin) { + SET_DOUBLE_VAL(&pInfo->minval, tmin); + } + + if (GET_DOUBLE_VAL(&pInfo->maxval) < tmax) { + SET_DOUBLE_VAL(&pInfo->maxval, tmax); + } + + pInfo->numOfElems += (pCtx->size - pCtx->sma.numOfNull); + } else { + for (int32_t i = 0; i < pCtx->size; ++i) { + char *data = GET_INPUT_DATA(pCtx, i); + if (pCtx->hasNull && isNull(data, pCtx->inputType)) { + continue; + } + + double v = 0; + GET_TYPED_DATA(v, double, pCtx->inputType, data); + + if (v < GET_DOUBLE_VAL(&pInfo->minval)) { + SET_DOUBLE_VAL(&pInfo->minval, v); + } + + if (v > GET_DOUBLE_VAL(&pInfo->maxval)) { + SET_DOUBLE_VAL(&pInfo->maxval, v); + } + + pInfo->numOfElems += 1; + } + } + + return; + } + + // the second stage, calculate the true percentile value + for (int32_t i = 0; i < pCtx->size; ++i) { + char *data = GET_INPUT_DATA(pCtx, i); + if (pCtx->hasNull && isNull(data, pCtx->inputType)) { + continue; + } + + notNullElems += 1; + tMemBucketPut(pInfo->pMemBucket, data, 1); + } + + SET_VAL(pCtx, notNullElems, 1); + pResInfo->hasResult = DATA_SET_FLAG; +} + +static void percentile_finalizer(SQLFunctionCtx *pCtx) { + double v = pCtx->param[0].nType == TSDB_DATA_TYPE_INT ? pCtx->param[0].i64 : pCtx->param[0].d; + + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + SPercentileInfo* ppInfo = (SPercentileInfo *) GET_ROWCELL_INTERBUF(pResInfo); + + tMemBucket * pMemBucket = ppInfo->pMemBucket; + if (pMemBucket == NULL || pMemBucket->total == 0) { // check for null + assert(ppInfo->numOfElems == 0); + setNull(pCtx->pOutput, pCtx->outputType, pCtx->outputBytes); + } else { + SET_DOUBLE_VAL((double *)pCtx->pOutput, getPercentile(pMemBucket, v)); + } + + tMemBucketDestroy(pMemBucket); + doFinalizer(pCtx); +} + +////////////////////////////////////////////////////////////////////////////////// +static void buildHistogramInfo(SAPercentileInfo* pInfo) { + pInfo->pHisto = (SHistogramInfo*) ((char*) pInfo + sizeof(SAPercentileInfo)); + pInfo->pHisto->elems = (SHistBin*) ((char*)pInfo->pHisto + sizeof(SHistogramInfo)); +} + +static SAPercentileInfo *getAPerctInfo(SQLFunctionCtx *pCtx) { + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + SAPercentileInfo* pInfo = NULL; + + if (pCtx->stableQuery && pCtx->currentStage != MERGE_STAGE) { + pInfo = (SAPercentileInfo*) pCtx->pOutput; + } else { + pInfo = GET_ROWCELL_INTERBUF(pResInfo); + } + + buildHistogramInfo(pInfo); + return pInfo; +} + +static bool apercentile_function_setup(SQLFunctionCtx *pCtx, SResultRowCellInfo* pResultInfo) { + if (!function_setup(pCtx, pResultInfo)) { + return false; + } + + SAPercentileInfo *pInfo = getAPerctInfo(pCtx); + + char *tmp = (char *)pInfo + sizeof(SAPercentileInfo); + pInfo->pHisto = tHistogramCreateFrom(tmp, MAX_HISTOGRAM_BIN); + return true; +} + +static void apercentile_function(SQLFunctionCtx *pCtx) { + int32_t notNullElems = 0; + + SResultRowCellInfo * pResInfo = GET_RES_INFO(pCtx); + SAPercentileInfo *pInfo = getAPerctInfo(pCtx); + + assert(pInfo->pHisto->elems != NULL); + + for (int32_t i = 0; i < pCtx->size; ++i) { + char *data = GET_INPUT_DATA(pCtx, i); + if (pCtx->hasNull && isNull(data, pCtx->inputType)) { + continue; + } + + notNullElems += 1; + + double v = 0; + GET_TYPED_DATA(v, double, pCtx->inputType, data); + tHistogramAdd(&pInfo->pHisto, v); + } + + if (!pCtx->hasNull) { + assert(pCtx->size == notNullElems); + } + + SET_VAL(pCtx, notNullElems, 1); + + if (notNullElems > 0) { + pResInfo->hasResult = DATA_SET_FLAG; + } +} + +static void apercentile_func_merge(SQLFunctionCtx *pCtx) { + SAPercentileInfo *pInput = (SAPercentileInfo *)GET_INPUT_DATA_LIST(pCtx); + + pInput->pHisto = (SHistogramInfo*) ((char *)pInput + sizeof(SAPercentileInfo)); + pInput->pHisto->elems = (SHistBin*) ((char *)pInput->pHisto + sizeof(SHistogramInfo)); + + if (pInput->pHisto->numOfElems <= 0) { + return; + } + + SAPercentileInfo *pOutput = getAPerctInfo(pCtx); + SHistogramInfo *pHisto = pOutput->pHisto; + + if (pHisto->numOfElems <= 0) { + memcpy(pHisto, pInput->pHisto, sizeof(SHistogramInfo) + sizeof(SHistBin) * (MAX_HISTOGRAM_BIN + 1)); + pHisto->elems = (SHistBin*) ((char *)pHisto + sizeof(SHistogramInfo)); + } else { + //TODO(dengyihao): avoid memcpy + pHisto->elems = (SHistBin*) ((char *)pHisto + sizeof(SHistogramInfo)); + SHistogramInfo *pRes = tHistogramMerge(pHisto, pInput->pHisto, MAX_HISTOGRAM_BIN); + memcpy(pHisto, pRes, sizeof(SHistogramInfo) + sizeof(SHistBin) * MAX_HISTOGRAM_BIN); + pHisto->elems = (SHistBin*) ((char *)pHisto + sizeof(SHistogramInfo)); + tHistogramDestroy(&pRes); + } + + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + pResInfo->hasResult = DATA_SET_FLAG; + SET_VAL(pCtx, 1, 1); +} + +static void apercentile_finalizer(SQLFunctionCtx *pCtx) { + double v = (pCtx->param[0].nType == TSDB_DATA_TYPE_INT) ? pCtx->param[0].i64 : pCtx->param[0].d; + + SResultRowCellInfo * pResInfo = GET_RES_INFO(pCtx); + SAPercentileInfo *pOutput = GET_ROWCELL_INTERBUF(pResInfo); + + if (pCtx->currentStage == MERGE_STAGE) { + if (pResInfo->hasResult == DATA_SET_FLAG) { // check for null + assert(pOutput->pHisto->numOfElems > 0); + + double ratio[] = {v}; + double *res = tHistogramUniform(pOutput->pHisto, ratio, 1); + + memcpy(pCtx->pOutput, res, sizeof(double)); + free(res); + } else { + setNull(pCtx->pOutput, pCtx->outputType, pCtx->outputBytes); + return; + } + } else { + if (pOutput->pHisto->numOfElems > 0) { + double ratio[] = {v}; + + double *res = tHistogramUniform(pOutput->pHisto, ratio, 1); + memcpy(pCtx->pOutput, res, sizeof(double)); + free(res); + } else { // no need to free + setNull(pCtx->pOutput, pCtx->outputType, pCtx->outputBytes); + return; + } + } + + doFinalizer(pCtx); +} + +///////////////////////////////////////////////////////////////////////////////// +static bool leastsquares_function_setup(SQLFunctionCtx *pCtx, SResultRowCellInfo* pResInfo) { + if (!function_setup(pCtx, pResInfo)) { + return false; + } + + SLeastsquaresInfo *pInfo = GET_ROWCELL_INTERBUF(pResInfo); + + // 2*3 matrix + pInfo->startVal = pCtx->param[0].d; + return true; +} + +#define LEASTSQR_CAL(p, x, y, index, step) \ + do { \ + (p)[0][0] += (double)(x) * (x); \ + (p)[0][1] += (double)(x); \ + (p)[0][2] += (double)(x) * (y)[index]; \ + (p)[1][2] += (y)[index]; \ + (x) += step; \ + } while (0) + +#define LEASTSQR_CAL_LOOP(ctx, param, x, y, tsdbType, n, step) \ + for (int32_t i = 0; i < (ctx)->size; ++i) { \ + if ((ctx)->hasNull && isNull((char *)&(y)[i], tsdbType)) { \ + continue; \ + } \ + (n)++; \ + LEASTSQR_CAL(param, x, y, i, step); \ + } + +static void leastsquares_function(SQLFunctionCtx *pCtx) { + SResultRowCellInfo * pResInfo = GET_RES_INFO(pCtx); + SLeastsquaresInfo *pInfo = GET_ROWCELL_INTERBUF(pResInfo); + + double(*param)[3] = pInfo->mat; + double x = pInfo->startVal; + + void *pData = GET_INPUT_DATA_LIST(pCtx); + + int32_t numOfElem = 0; + switch (pCtx->inputType) { + case TSDB_DATA_TYPE_INT: { + int32_t *p = pData; + // LEASTSQR_CAL_LOOP(pCtx, param, pParamData, p); + for (int32_t i = 0; i < pCtx->size; ++i) { + if (pCtx->hasNull && isNull((const char*) p, pCtx->inputType)) { + continue; + } + + param[0][0] += x * x; + param[0][1] += x; + param[0][2] += x * p[i]; + param[1][2] += p[i]; + + x += pCtx->param[1].d; + numOfElem++; + } + break; + } + case TSDB_DATA_TYPE_BIGINT: { + int64_t *p = pData; + LEASTSQR_CAL_LOOP(pCtx, param, x, p, pCtx->inputType, numOfElem, pCtx->param[1].d); + break; + } + case TSDB_DATA_TYPE_DOUBLE: { + double *p = pData; + LEASTSQR_CAL_LOOP(pCtx, param, x, p, pCtx->inputType, numOfElem, pCtx->param[1].d); + break; + } + case TSDB_DATA_TYPE_FLOAT: { + float *p = pData; + LEASTSQR_CAL_LOOP(pCtx, param, x, p, pCtx->inputType, numOfElem, pCtx->param[1].d); + break; + }; + case TSDB_DATA_TYPE_SMALLINT: { + int16_t *p = pData; + LEASTSQR_CAL_LOOP(pCtx, param, x, p, pCtx->inputType, numOfElem, pCtx->param[1].d); + break; + } + case TSDB_DATA_TYPE_TINYINT: { + int8_t *p = pData; + LEASTSQR_CAL_LOOP(pCtx, param, x, p, pCtx->inputType, numOfElem, pCtx->param[1].d); + break; + } + case TSDB_DATA_TYPE_UTINYINT: { + uint8_t *p = pData; + LEASTSQR_CAL_LOOP(pCtx, param, x, p, pCtx->inputType, numOfElem, pCtx->param[1].d); + break; + } + case TSDB_DATA_TYPE_USMALLINT: { + uint16_t *p = pData; + LEASTSQR_CAL_LOOP(pCtx, param, x, p, pCtx->inputType, numOfElem, pCtx->param[1].d); + break; + } + case TSDB_DATA_TYPE_UINT: { + uint32_t *p = pData; + LEASTSQR_CAL_LOOP(pCtx, param, x, p, pCtx->inputType, numOfElem, pCtx->param[1].d); + break; + } + case TSDB_DATA_TYPE_UBIGINT: { + uint64_t *p = pData; + LEASTSQR_CAL_LOOP(pCtx, param, x, p, pCtx->inputType, numOfElem, pCtx->param[1].d); + break; + } + } + + pInfo->startVal = x; + pInfo->num += numOfElem; + + if (pInfo->num > 0) { + pResInfo->hasResult = DATA_SET_FLAG; + } + + SET_VAL(pCtx, numOfElem, 1); +} + +static void leastsquares_finalizer(SQLFunctionCtx *pCtx) { + // no data in query + SResultRowCellInfo * pResInfo = GET_RES_INFO(pCtx); + SLeastsquaresInfo *pInfo = GET_ROWCELL_INTERBUF(pResInfo); + + if (pInfo->num == 0) { + setNull(pCtx->pOutput, pCtx->outputType, pCtx->outputBytes); + return; + } + + double(*param)[3] = pInfo->mat; + + param[1][1] = (double)pInfo->num; + param[1][0] = param[0][1]; + + param[0][0] -= param[1][0] * (param[0][1] / param[1][1]); + param[0][2] -= param[1][2] * (param[0][1] / param[1][1]); + param[0][1] = 0; + param[1][2] -= param[0][2] * (param[1][0] / param[0][0]); + param[1][0] = 0; + param[0][2] /= param[0][0]; + + param[1][2] /= param[1][1]; + + int32_t maxOutputSize = AVG_FUNCTION_INTER_BUFFER_SIZE - VARSTR_HEADER_SIZE; + size_t n = snprintf(varDataVal(pCtx->pOutput), maxOutputSize, "{slop:%.6lf, intercept:%.6lf}", + param[0][2], param[1][2]); + + varDataSetLen(pCtx->pOutput, n); + doFinalizer(pCtx); +} + +static void date_col_output_function(SQLFunctionCtx *pCtx) { + SET_VAL(pCtx, pCtx->size, 1); + *(int64_t *)(pCtx->pOutput) = pCtx->startTs; +} + +static void col_project_function(SQLFunctionCtx *pCtx) { + // the number of output rows should not affect the final number of rows, so set it to be 0 + if (pCtx->numOfParams == 2) { + return; + } + + // only one row is required. + if (pCtx->param[0].i64 == 1) { + SET_VAL(pCtx, pCtx->size, 1); + } else { + INC_INIT_VAL(pCtx, pCtx->size); + } + + char *pData = GET_INPUT_DATA_LIST(pCtx); + if (pCtx->order == TSDB_ORDER_ASC) { + int32_t numOfRows = (pCtx->param[0].i64 == 1)? 1:pCtx->size; + memcpy(pCtx->pOutput, pData, (size_t) numOfRows * pCtx->inputBytes); + } else { + for(int32_t i = 0; i < pCtx->size; ++i) { + memcpy(pCtx->pOutput + (pCtx->size - 1 - i) * pCtx->inputBytes, pData + i * pCtx->inputBytes, + pCtx->inputBytes); + } + } +} + +/** + * only used for tag projection query in select clause + * @param pCtx + * @return + */ +static void tag_project_function(SQLFunctionCtx *pCtx) { + INC_INIT_VAL(pCtx, pCtx->size); + + assert(pCtx->inputBytes == pCtx->outputBytes); + + taosVariantDump(&pCtx->tag, pCtx->pOutput, pCtx->outputType, true); + char* data = pCtx->pOutput; + pCtx->pOutput += pCtx->outputBytes; + + // directly copy from the first one + for (int32_t i = 1; i < pCtx->size; ++i) { + memmove(pCtx->pOutput, data, pCtx->outputBytes); + pCtx->pOutput += pCtx->outputBytes; + } +} + +/** + * used in group by clause. when applying group by tags, the tags value is + * assign by using tag function. + * NOTE: there is only ONE output for ONE query range + * @param pCtx + * @return + */ +static void copy_function(SQLFunctionCtx *pCtx); + +static void tag_function(SQLFunctionCtx *pCtx) { + SET_VAL(pCtx, 1, 1); + if (pCtx->currentStage == MERGE_STAGE) { + copy_function(pCtx); + } else { + taosVariantDump(&pCtx->tag, pCtx->pOutput, pCtx->outputType, true); + } +} + +static void copy_function(SQLFunctionCtx *pCtx) { + SET_VAL(pCtx, pCtx->size, 1); + + char *pData = GET_INPUT_DATA_LIST(pCtx); + assignVal(pCtx->pOutput, pData, pCtx->inputBytes, pCtx->inputType); +} + +enum { + INITIAL_VALUE_NOT_ASSIGNED = 0, +}; + +static bool diff_function_setup(SQLFunctionCtx *pCtx, SResultRowCellInfo* pResInfo) { + if (!function_setup(pCtx, pResInfo)) { + return false; + } + + // diff function require the value is set to -1 + pCtx->param[1].nType = INITIAL_VALUE_NOT_ASSIGNED; + return false; +} + +static bool deriv_function_setup(SQLFunctionCtx *pCtx, SResultRowCellInfo* pResultInfo) { + if (!function_setup(pCtx, pResultInfo)) { + return false; + } + + // diff function require the value is set to -1 + SDerivInfo* pDerivInfo = GET_ROWCELL_INTERBUF(pResultInfo); + + pDerivInfo->ignoreNegative = pCtx->param[1].i64; + pDerivInfo->prevTs = -1; + pDerivInfo->tsWindow = pCtx->param[0].i64; + pDerivInfo->valueSet = false; + return false; +} + +static void deriv_function(SQLFunctionCtx *pCtx) { + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + SDerivInfo* pDerivInfo = GET_ROWCELL_INTERBUF(pResInfo); + + void *data = GET_INPUT_DATA_LIST(pCtx); + + int32_t notNullElems = 0; + int32_t step = GET_FORWARD_DIRECTION_FACTOR(pCtx->order); + int32_t i = (pCtx->order == TSDB_ORDER_ASC) ? 0 : pCtx->size - 1; + + TSKEY *pTimestamp = pCtx->ptsOutputBuf; + TSKEY *tsList = GET_TS_LIST(pCtx); + + double *pOutput = (double *)pCtx->pOutput; + + switch (pCtx->inputType) { + case TSDB_DATA_TYPE_INT: { + int32_t *pData = (int32_t *)data; + for (; i < pCtx->size && i >= 0; i += step) { + if (pCtx->hasNull && isNull((const char *)&pData[i], pCtx->inputType)) { + continue; + } + + if (!pDerivInfo->valueSet) { // initial value is not set yet + pDerivInfo->valueSet = true; + } else { + SET_DOUBLE_VAL(pOutput, ((pData[i] - pDerivInfo->prevValue) * pDerivInfo->tsWindow) / (tsList[i] - pDerivInfo->prevTs)); + if (pDerivInfo->ignoreNegative && *pOutput < 0) { + } else { + *pTimestamp = tsList[i]; + pOutput += 1; + pTimestamp += 1; + notNullElems++; + } + } + + pDerivInfo->prevValue = pData[i]; + pDerivInfo->prevTs = tsList[i]; + } + + break; + }; + + case TSDB_DATA_TYPE_BIGINT: { + int64_t *pData = (int64_t *)data; + for (; i < pCtx->size && i >= 0; i += step) { + if (pCtx->hasNull && isNull((const char *)&pData[i], pCtx->inputType)) { + continue; + } + + if (!pDerivInfo->valueSet) { // initial value is not set yet + pDerivInfo->valueSet = true; + } else { + *pOutput = ((pData[i] - pDerivInfo->prevValue) * pDerivInfo->tsWindow) / (tsList[i] - pDerivInfo->prevTs); + if (pDerivInfo->ignoreNegative && *pOutput < 0) { + } else { + *pTimestamp = tsList[i]; + pOutput += 1; + pTimestamp += 1; + notNullElems++; + } + } + + pDerivInfo->prevValue = (double) pData[i]; + pDerivInfo->prevTs = tsList[i]; + } + break; + } + case TSDB_DATA_TYPE_DOUBLE: { + double *pData = (double *)data; + + for (; i < pCtx->size && i >= 0; i += step) { + if (pCtx->hasNull && isNull((const char *)&pData[i], pCtx->inputType)) { + continue; + } + + if (!pDerivInfo->valueSet) { // initial value is not set yet + pDerivInfo->valueSet = true; + } else { + *pOutput = ((pData[i] - pDerivInfo->prevValue) * pDerivInfo->tsWindow) / (tsList[i] - pDerivInfo->prevTs); + if (pDerivInfo->ignoreNegative && *pOutput < 0) { + } else { + *pTimestamp = tsList[i]; + pOutput += 1; + pTimestamp += 1; + notNullElems++; + } + } + + pDerivInfo->prevValue = pData[i]; + pDerivInfo->prevTs = tsList[i]; + } + break; + } + + case TSDB_DATA_TYPE_FLOAT: { + float *pData = (float *)data; + + for (; i < pCtx->size && i >= 0; i += step) { + if (pCtx->hasNull && isNull((const char *)&pData[i], pCtx->inputType)) { + continue; + } + + if (!pDerivInfo->valueSet) { // initial value is not set yet + pDerivInfo->valueSet = true; + } else { + *pOutput = ((pData[i] - pDerivInfo->prevValue) * pDerivInfo->tsWindow) / (tsList[i] - pDerivInfo->prevTs); + if (pDerivInfo->ignoreNegative && *pOutput < 0) { + } else { + *pTimestamp = tsList[i]; + pOutput += 1; + pTimestamp += 1; + notNullElems++; + } + } + + pDerivInfo->prevValue = pData[i]; + pDerivInfo->prevTs = tsList[i]; + } + break; + } + + case TSDB_DATA_TYPE_SMALLINT: { + int16_t *pData = (int16_t *)data; + for (; i < pCtx->size && i >= 0; i += step) { + if (pCtx->hasNull && isNull((const char *)&pData[i], pCtx->inputType)) { + continue; + } + + if (!pDerivInfo->valueSet) { // initial value is not set yet + pDerivInfo->valueSet = true; + } else { + *pOutput = ((pData[i] - pDerivInfo->prevValue) * pDerivInfo->tsWindow) / (tsList[i] - pDerivInfo->prevTs); + if (pDerivInfo->ignoreNegative && *pOutput < 0) { + } else { + *pTimestamp = tsList[i]; + pOutput += 1; + pTimestamp += 1; + notNullElems++; + } + } + + pDerivInfo->prevValue = pData[i]; + pDerivInfo->prevTs = tsList[i]; + } + break; + } + + case TSDB_DATA_TYPE_TINYINT: { + int8_t *pData = (int8_t *)data; + for (; i < pCtx->size && i >= 0; i += step) { + if (pCtx->hasNull && isNull((char *)&pData[i], pCtx->inputType)) { + continue; + } + + if (!pDerivInfo->valueSet) { // initial value is not set yet + pDerivInfo->valueSet = true; + } else { + *pOutput = ((pData[i] - pDerivInfo->prevValue) * pDerivInfo->tsWindow) / (tsList[i] - pDerivInfo->prevTs); + if (pDerivInfo->ignoreNegative && *pOutput < 0) { + } else { + *pTimestamp = tsList[i]; + + pOutput += 1; + pTimestamp += 1; + notNullElems++; + } + } + + pDerivInfo->prevValue = pData[i]; + pDerivInfo->prevTs = tsList[i]; + } + break; + } + default: + assert(0); +// qError("error input type"); + } + + GET_RES_INFO(pCtx)->numOfRes += notNullElems; +} + +#define DIFF_IMPL(ctx, d, type) \ + do { \ + if ((ctx)->param[1].nType == INITIAL_VALUE_NOT_ASSIGNED) { \ + (ctx)->param[1].nType = (ctx)->inputType; \ + *(type *)&(ctx)->param[1].i64 = *(type *)(d); \ + } else { \ + *(type *)(ctx)->pOutput = *(type *)(d) - (*(type *)(&(ctx)->param[1].i64)); \ + *(type *)(&(ctx)->param[1].i64) = *(type *)(d); \ + *(int64_t *)(ctx)->ptsOutputBuf = GET_TS_DATA(ctx, index); \ + } \ + } while (0); + +// TODO difference in date column +static void diff_function(SQLFunctionCtx *pCtx) { + void *data = GET_INPUT_DATA_LIST(pCtx); + bool isFirstBlock = (pCtx->param[1].nType == INITIAL_VALUE_NOT_ASSIGNED); + + int32_t notNullElems = 0; + + int32_t step = GET_FORWARD_DIRECTION_FACTOR(pCtx->order); + int32_t i = (pCtx->order == TSDB_ORDER_ASC) ? 0 : pCtx->size - 1; + + TSKEY* pTimestamp = pCtx->ptsOutputBuf; + TSKEY* tsList = GET_TS_LIST(pCtx); + + switch (pCtx->inputType) { + case TSDB_DATA_TYPE_INT: { + int32_t *pData = (int32_t *)data; + int32_t *pOutput = (int32_t *)pCtx->pOutput; + + for (; i < pCtx->size && i >= 0; i += step) { + if (pCtx->hasNull && isNull((const char*) &pData[i], pCtx->inputType)) { + continue; + } + + if (pCtx->param[1].nType != INITIAL_VALUE_NOT_ASSIGNED) { // initial value is not set yet + *pOutput = (int32_t)(pData[i] - pCtx->param[1].i64); // direct previous may be null + *pTimestamp = (tsList != NULL)? tsList[i]:0; + pOutput += 1; + pTimestamp += 1; + } + + pCtx->param[1].i64 = pData[i]; + pCtx->param[1].nType = pCtx->inputType; + notNullElems++; + } + break; + }; + case TSDB_DATA_TYPE_BIGINT: { + int64_t *pData = (int64_t *)data; + int64_t *pOutput = (int64_t *)pCtx->pOutput; + + for (; i < pCtx->size && i >= 0; i += step) { + if (pCtx->hasNull && isNull((const char*) &pData[i], pCtx->inputType)) { + continue; + } + + if (pCtx->param[1].nType != INITIAL_VALUE_NOT_ASSIGNED) { // initial value is not set yet + *pOutput = pData[i] - pCtx->param[1].i64; // direct previous may be null + *pTimestamp = (tsList != NULL)? tsList[i]:0; + pOutput += 1; + pTimestamp += 1; + } + + pCtx->param[1].i64 = pData[i]; + pCtx->param[1].nType = pCtx->inputType; + notNullElems++; + } + break; + } + case TSDB_DATA_TYPE_DOUBLE: { + double *pData = (double *)data; + double *pOutput = (double *)pCtx->pOutput; + + for (; i < pCtx->size && i >= 0; i += step) { + if (pCtx->hasNull && isNull((const char*) &pData[i], pCtx->inputType)) { + continue; + } + + if (pCtx->param[1].nType != INITIAL_VALUE_NOT_ASSIGNED) { // initial value is not set yet + SET_DOUBLE_VAL(pOutput, pData[i] - pCtx->param[1].d); // direct previous may be null + *pTimestamp = (tsList != NULL)? tsList[i]:0; + pOutput += 1; + pTimestamp += 1; + } + + pCtx->param[1].d = pData[i]; + pCtx->param[1].nType = pCtx->inputType; + notNullElems++; + } + break; + } + case TSDB_DATA_TYPE_FLOAT: { + float *pData = (float *)data; + float *pOutput = (float *)pCtx->pOutput; + + for (; i < pCtx->size && i >= 0; i += step) { + if (pCtx->hasNull && isNull((const char*) &pData[i], pCtx->inputType)) { + continue; + } + + if (pCtx->param[1].nType != INITIAL_VALUE_NOT_ASSIGNED) { // initial value is not set yet + *pOutput = (float)(pData[i] - pCtx->param[1].d); // direct previous may be null + *pTimestamp = (tsList != NULL)? tsList[i]:0; + pOutput += 1; + pTimestamp += 1; + } + + pCtx->param[1].d = pData[i]; + pCtx->param[1].nType = pCtx->inputType; + notNullElems++; + } + break; + } + case TSDB_DATA_TYPE_SMALLINT: { + int16_t *pData = (int16_t *)data; + int16_t *pOutput = (int16_t *)pCtx->pOutput; + + for (; i < pCtx->size && i >= 0; i += step) { + if (pCtx->hasNull && isNull((const char*) &pData[i], pCtx->inputType)) { + continue; + } + + if (pCtx->param[1].nType != INITIAL_VALUE_NOT_ASSIGNED) { // initial value is not set yet + *pOutput = (int16_t)(pData[i] - pCtx->param[1].i64); // direct previous may be null + *pTimestamp = (tsList != NULL)? tsList[i]:0; + pOutput += 1; + pTimestamp += 1; + } + + pCtx->param[1].i64 = pData[i]; + pCtx->param[1].nType = pCtx->inputType; + notNullElems++; + } + break; + } + + case TSDB_DATA_TYPE_TINYINT: { + int8_t *pData = (int8_t *)data; + int8_t *pOutput = (int8_t *)pCtx->pOutput; + + for (; i < pCtx->size && i >= 0; i += step) { + if (pCtx->hasNull && isNull((char *)&pData[i], pCtx->inputType)) { + continue; + } + + if (pCtx->param[1].nType != INITIAL_VALUE_NOT_ASSIGNED) { // initial value is not set yet + *pOutput = (int8_t)(pData[i] - pCtx->param[1].i64); // direct previous may be null + *pTimestamp = (tsList != NULL)? tsList[i]:0; + pOutput += 1; + pTimestamp += 1; + } + + pCtx->param[1].i64 = pData[i]; + pCtx->param[1].nType = pCtx->inputType; + notNullElems++; + } + break; + } + default: + assert(0); +// qError("error input type"); + } + + // initial value is not set yet + if (pCtx->param[1].nType == INITIAL_VALUE_NOT_ASSIGNED || notNullElems <= 0) { + /* + * 1. current block and blocks before are full of null + * 2. current block may be null value + */ + assert(pCtx->hasNull); + } else { + int32_t forwardStep = (isFirstBlock) ? notNullElems - 1 : notNullElems; + + GET_RES_INFO(pCtx)->numOfRes += forwardStep; + } +} + +char *getArithColumnData(void *param, const char* name, int32_t colId) { + SArithmeticSupport *pSupport = (SArithmeticSupport *)param; + + int32_t index = -1; + for (int32_t i = 0; i < pSupport->numOfCols; ++i) { + if (colId == pSupport->colList[i].colId) { + index = i; + break; + } + } + + assert(index >= 0); + return pSupport->data[index] + pSupport->offset * pSupport->colList[index].bytes; +} + +static void arithmetic_function(SQLFunctionCtx *pCtx) { + GET_RES_INFO(pCtx)->numOfRes += pCtx->size; + SArithmeticSupport *sas = (SArithmeticSupport *)pCtx->param[1].pz; + +// arithmeticTreeTraverse(sas->pExprInfo->pExpr, pCtx->size, pCtx->pOutput, sas, pCtx->order, getArithColumnData); +} + +#define LIST_MINMAX_N(ctx, minOutput, maxOutput, elemCnt, data, type, tsdbType, numOfNotNullElem) \ + { \ + type *inputData = (type *)data; \ + for (int32_t i = 0; i < elemCnt; ++i) { \ + if ((ctx)->hasNull && isNull((char *)&inputData[i], tsdbType)) { \ + continue; \ + } \ + if (inputData[i] < minOutput) { \ + minOutput = (double)inputData[i]; \ + } \ + if (inputData[i] > maxOutput) { \ + maxOutput = (double)inputData[i]; \ + } \ + numOfNotNullElem++; \ + } \ + } + +///////////////////////////////////////////////////////////////////////////////// +static bool spread_function_setup(SQLFunctionCtx *pCtx, SResultRowCellInfo* pResInfo) { + if (!function_setup(pCtx, pResInfo)) { + return false; + } + + SSpreadInfo *pInfo = GET_ROWCELL_INTERBUF(pResInfo); + + // this is the server-side setup function in client-side, the secondary merge do not need this procedure + if (pCtx->currentStage == MERGE_STAGE) { + pCtx->param[0].d = DBL_MAX; + pCtx->param[3].d = -DBL_MAX; + } else { + pInfo->min = DBL_MAX; + pInfo->max = -DBL_MAX; + } + + return true; +} + +static void spread_function(SQLFunctionCtx *pCtx) { + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + SSpreadInfo *pInfo = GET_ROWCELL_INTERBUF(pResInfo); + + int32_t numOfElems = 0; + + // todo : opt with pre-calculated result + // column missing cause the hasNull to be true + if (pCtx->isSmaSet) { + numOfElems = pCtx->size - pCtx->sma.numOfNull; + + // all data are null in current data block, ignore current data block + if (numOfElems == 0) { + goto _spread_over; + } + + if (IS_SIGNED_NUMERIC_TYPE(pCtx->inputType) || IS_UNSIGNED_NUMERIC_TYPE(pCtx->inputType) || + (pCtx->inputType == TSDB_DATA_TYPE_TIMESTAMP)) { + if (pInfo->min > pCtx->sma.min) { + pInfo->min = (double)pCtx->sma.min; + } + + if (pInfo->max < pCtx->sma.max) { + pInfo->max = (double)pCtx->sma.max; + } + } else if (IS_FLOAT_TYPE(pCtx->inputType)) { + if (pInfo->min > GET_DOUBLE_VAL((const char *)&(pCtx->sma.min))) { + pInfo->min = GET_DOUBLE_VAL((const char *)&(pCtx->sma.min)); + } + + if (pInfo->max < GET_DOUBLE_VAL((const char *)&(pCtx->sma.max))) { + pInfo->max = GET_DOUBLE_VAL((const char *)&(pCtx->sma.max)); + } + } + + goto _spread_over; + } + + void *pData = GET_INPUT_DATA_LIST(pCtx); + numOfElems = 0; + + if (pCtx->inputType == TSDB_DATA_TYPE_TINYINT) { + LIST_MINMAX_N(pCtx, pInfo->min, pInfo->max, pCtx->size, pData, int8_t, pCtx->inputType, numOfElems); + } else if (pCtx->inputType == TSDB_DATA_TYPE_SMALLINT) { + LIST_MINMAX_N(pCtx, pInfo->min, pInfo->max, pCtx->size, pData, int16_t, pCtx->inputType, numOfElems); + } else if (pCtx->inputType == TSDB_DATA_TYPE_INT) { + LIST_MINMAX_N(pCtx, pInfo->min, pInfo->max, pCtx->size, pData, int32_t, pCtx->inputType, numOfElems); + } else if (pCtx->inputType == TSDB_DATA_TYPE_BIGINT || pCtx->inputType == TSDB_DATA_TYPE_TIMESTAMP) { + LIST_MINMAX_N(pCtx, pInfo->min, pInfo->max, pCtx->size, pData, int64_t, pCtx->inputType, numOfElems); + } else if (pCtx->inputType == TSDB_DATA_TYPE_DOUBLE) { + LIST_MINMAX_N(pCtx, pInfo->min, pInfo->max, pCtx->size, pData, double, pCtx->inputType, numOfElems); + } else if (pCtx->inputType == TSDB_DATA_TYPE_FLOAT) { + LIST_MINMAX_N(pCtx, pInfo->min, pInfo->max, pCtx->size, pData, float, pCtx->inputType, numOfElems); + } else if (pCtx->inputType == TSDB_DATA_TYPE_UTINYINT) { + LIST_MINMAX_N(pCtx, pInfo->min, pInfo->max, pCtx->size, pData, uint8_t, pCtx->inputType, numOfElems); + } else if (pCtx->inputType == TSDB_DATA_TYPE_USMALLINT) { + LIST_MINMAX_N(pCtx, pInfo->min, pInfo->max, pCtx->size, pData, uint16_t, pCtx->inputType, numOfElems); + } else if (pCtx->inputType == TSDB_DATA_TYPE_UINT) { + LIST_MINMAX_N(pCtx, pInfo->min, pInfo->max, pCtx->size, pData, uint32_t, pCtx->inputType, numOfElems); + } else if (pCtx->inputType == TSDB_DATA_TYPE_UBIGINT) { + LIST_MINMAX_N(pCtx, pInfo->min, pInfo->max, pCtx->size, pData, uint64_t, pCtx->inputType, numOfElems); + } + + if (!pCtx->hasNull) { + assert(pCtx->size == numOfElems); + } + + _spread_over: + SET_VAL(pCtx, numOfElems, 1); + + if (numOfElems > 0) { + pResInfo->hasResult = DATA_SET_FLAG; + pInfo->hasResult = DATA_SET_FLAG; + } + + // keep the data into the final output buffer for super table query since this execution may be the last one + if (pCtx->stableQuery) { + memcpy(pCtx->pOutput, GET_ROWCELL_INTERBUF(pResInfo), sizeof(SSpreadInfo)); + } +} + +/* + * here we set the result value back to the intermediate buffer, to apply the finalize the function + * the final result is generated in spread_function_finalizer + */ +void spread_func_merge(SQLFunctionCtx *pCtx) { + SSpreadInfo *pData = (SSpreadInfo *)GET_INPUT_DATA_LIST(pCtx); + if (pData->hasResult != DATA_SET_FLAG) { + return; + } + + if (pCtx->param[0].d > pData->min) { + pCtx->param[0].d = pData->min; + } + + if (pCtx->param[3].d < pData->max) { + pCtx->param[3].d = pData->max; + } + + GET_RES_INFO(pCtx)->hasResult = DATA_SET_FLAG; +} + +void spread_function_finalizer(SQLFunctionCtx *pCtx) { + /* + * here we do not check the input data types, because in case of metric query, + * the type of intermediate data is binary + */ + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + + if (pCtx->currentStage == MERGE_STAGE) { + assert(pCtx->inputType == TSDB_DATA_TYPE_BINARY); + + if (pResInfo->hasResult != DATA_SET_FLAG) { + setNull(pCtx->pOutput, pCtx->outputType, pCtx->outputBytes); + return; + } + + SET_DOUBLE_VAL((double *)pCtx->pOutput, pCtx->param[3].d - pCtx->param[0].d); + } else { + assert(IS_NUMERIC_TYPE(pCtx->inputType) || (pCtx->inputType == TSDB_DATA_TYPE_TIMESTAMP)); + + SSpreadInfo *pInfo = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx)); + if (pInfo->hasResult != DATA_SET_FLAG) { + setNull(pCtx->pOutput, pCtx->outputType, pCtx->outputBytes); + return; + } + + SET_DOUBLE_VAL((double *)pCtx->pOutput, pInfo->max - pInfo->min); + } + + GET_RES_INFO(pCtx)->numOfRes = 1; // todo add test case + doFinalizer(pCtx); +} + + +/** + * param[1]: start time + * param[2]: end time + * @param pCtx + */ +static bool twa_function_setup(SQLFunctionCtx *pCtx, SResultRowCellInfo* pResInfo) { + if (!function_setup(pCtx, pResInfo)) { + return false; + } + + STwaInfo *pInfo = GET_ROWCELL_INTERBUF(pResInfo); + pInfo->p.key = INT64_MIN; + pInfo->win = TSWINDOW_INITIALIZER; + return true; +} + +static double twa_get_area(SPoint1 s, SPoint1 e) { + if ((s.val >= 0 && e.val >= 0)|| (s.val <=0 && e.val <= 0)) { + return (s.val + e.val) * (e.key - s.key) / 2; + } + + double x = (s.key * e.val - e.key * s.val)/(e.val - s.val); + double val = (s.val * (x - s.key) + e.val * (e.key - x)) / 2; + return val; +} + +static int32_t twa_function_impl(SQLFunctionCtx* pCtx, int32_t index, int32_t size) { + int32_t notNullElems = 0; + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + + STwaInfo *pInfo = GET_ROWCELL_INTERBUF(pResInfo); + TSKEY *tsList = GET_TS_LIST(pCtx); + + int32_t i = index; + int32_t step = GET_FORWARD_DIRECTION_FACTOR(pCtx->order); + SPoint1* last = &pInfo->p; + + if (pCtx->start.key != INT64_MIN) { + assert((pCtx->start.key < tsList[i] && pCtx->order == TSDB_ORDER_ASC) || + (pCtx->start.key > tsList[i] && pCtx->order == TSDB_ORDER_DESC)); + + assert(last->key == INT64_MIN); + + last->key = tsList[i]; + GET_TYPED_DATA(last->val, double, pCtx->inputType, GET_INPUT_DATA(pCtx, index)); + + pInfo->dOutput += twa_get_area(pCtx->start, *last); + + pInfo->hasResult = DATA_SET_FLAG; + pInfo->win.skey = pCtx->start.key; + notNullElems++; + i += step; + } else if (pInfo->p.key == INT64_MIN) { + last->key = tsList[i]; + GET_TYPED_DATA(last->val, double, pCtx->inputType, GET_INPUT_DATA(pCtx, index)); + + pInfo->hasResult = DATA_SET_FLAG; + pInfo->win.skey = last->key; + notNullElems++; + i += step; + } + + // calculate the value of + switch(pCtx->inputType) { + case TSDB_DATA_TYPE_TINYINT: { + int8_t *val = (int8_t*) GET_INPUT_DATA(pCtx, 0); + for (; i < size && i >= 0; i += step) { + if (pCtx->hasNull && isNull((const char*) &val[i], pCtx->inputType)) { + continue; + } + +#ifndef _TD_NINGSI_60 + SPoint1 st = {.key = tsList[i], .val = val[i]}; +#else + SPoint1 st; + st.key = tsList[i]; + st.val = val[i]; +#endif + pInfo->dOutput += twa_get_area(pInfo->p, st); + pInfo->p = st; + } + break; + } + case TSDB_DATA_TYPE_SMALLINT: { + int16_t *val = (int16_t*) GET_INPUT_DATA(pCtx, 0); + for (; i < size && i >= 0; i += step) { + if (pCtx->hasNull && isNull((const char*) &val[i], pCtx->inputType)) { + continue; + } + +#ifndef _TD_NINGSI_60 + SPoint1 st = {.key = tsList[i], .val = val[i]}; +#else + SPoint1 st; + st.key = tsList[i]; + st.val = val[i]; +#endif + pInfo->dOutput += twa_get_area(pInfo->p, st); + pInfo->p = st; + } + break; + } + case TSDB_DATA_TYPE_INT: { + int32_t *val = (int32_t*) GET_INPUT_DATA(pCtx, 0); + for (; i < size && i >= 0; i += step) { + if (pCtx->hasNull && isNull((const char*) &val[i], pCtx->inputType)) { + continue; + } + +#ifndef _TD_NINGSI_60 + SPoint1 st = {.key = tsList[i], .val = val[i]}; +#else + SPoint1 st; + st.key = tsList[i]; + st.val = val[i]; +#endif + pInfo->dOutput += twa_get_area(pInfo->p, st); + pInfo->p = st; + } + break; + } + case TSDB_DATA_TYPE_BIGINT: { + int64_t *val = (int64_t*) GET_INPUT_DATA(pCtx, 0); + for (; i < size && i >= 0; i += step) { + if (pCtx->hasNull && isNull((const char*) &val[i], pCtx->inputType)) { + continue; + } + +#ifndef _TD_NINGSI_60 + SPoint1 st = {.key = tsList[i], .val = (double) val[i]}; +#else + SPoint1 st; + st.key = tsList[i]; + st.val = (double)val[i]; +#endif + pInfo->dOutput += twa_get_area(pInfo->p, st); + pInfo->p = st; + } + break; + } + case TSDB_DATA_TYPE_FLOAT: { + float *val = (float*) GET_INPUT_DATA(pCtx, 0); + for (; i < size && i >= 0; i += step) { + if (pCtx->hasNull && isNull((const char*) &val[i], pCtx->inputType)) { + continue; + } + +#ifndef _TD_NINGSI_60 + SPoint1 st = {.key = tsList[i], .val = val[i]}; +#else + SPoint1 st; + st.key = tsList[i]; + st.val = (double)val[i]; +#endif + pInfo->dOutput += twa_get_area(pInfo->p, st); + pInfo->p = st; + } + break; + } + case TSDB_DATA_TYPE_DOUBLE: { + double *val = (double*) GET_INPUT_DATA(pCtx, 0); + for (; i < size && i >= 0; i += step) { + if (pCtx->hasNull && isNull((const char*) &val[i], pCtx->inputType)) { + continue; + } + +#ifndef _TD_NINGSI_60 + SPoint1 st = {.key = tsList[i], .val = val[i]}; +#else + SPoint1 st; + st.key = tsList[i]; + st.val = val[i]; +#endif + pInfo->dOutput += twa_get_area(pInfo->p, st); + pInfo->p = st; + } + break; + } + case TSDB_DATA_TYPE_UTINYINT: { + uint8_t *val = (uint8_t*) GET_INPUT_DATA(pCtx, 0); + for (; i < size && i >= 0; i += step) { + if (pCtx->hasNull && isNull((const char*) &val[i], pCtx->inputType)) { + continue; + } + +#ifndef _TD_NINGSI_60 + SPoint1 st = {.key = tsList[i], .val = val[i]}; +#else + SPoint1 st; + st.key = tsList[i]; + st.val = val[i]; +#endif + pInfo->dOutput += twa_get_area(pInfo->p, st); + pInfo->p = st; + } + break; + } + case TSDB_DATA_TYPE_USMALLINT: { + uint16_t *val = (uint16_t*) GET_INPUT_DATA(pCtx, 0); + for (; i < size && i >= 0; i += step) { + if (pCtx->hasNull && isNull((const char*) &val[i], pCtx->inputType)) { + continue; + } + +#ifndef _TD_NINGSI_60 + SPoint1 st = {.key = tsList[i], .val = val[i]}; +#else + SPoint1 st; + st.key = tsList[i]; + st.val = val[i]; +#endif + pInfo->dOutput += twa_get_area(pInfo->p, st); + pInfo->p = st; + } + break; + } + case TSDB_DATA_TYPE_UINT: { + uint32_t *val = (uint32_t*) GET_INPUT_DATA(pCtx, 0); + for (; i < size && i >= 0; i += step) { + if (pCtx->hasNull && isNull((const char*) &val[i], pCtx->inputType)) { + continue; + } + +#ifndef _TD_NINGSI_60 + SPoint1 st = {.key = tsList[i], .val = val[i]}; +#else + SPoint1 st; + st.key = tsList[i]; + st.val = val[i]; +#endif + pInfo->dOutput += twa_get_area(pInfo->p, st); + pInfo->p = st; + } + break; + } + case TSDB_DATA_TYPE_UBIGINT: { + uint64_t *val = (uint64_t*) GET_INPUT_DATA(pCtx, 0); + for (; i < size && i >= 0; i += step) { + if (pCtx->hasNull && isNull((const char*) &val[i], pCtx->inputType)) { + continue; + } + +#ifndef _TD_NINGSI_60 + SPoint1 st = {.key = tsList[i], .val = (double) val[i]}; +#else + SPoint1 st; + st.key = tsList[i]; + st.val = (double) val[i]; +#endif + pInfo->dOutput += twa_get_area(pInfo->p, st); + pInfo->p = st; + } + break; + } + default: assert(0); + } + + // the last interpolated time window value + if (pCtx->end.key != INT64_MIN) { + pInfo->dOutput += twa_get_area(pInfo->p, pCtx->end); + pInfo->p = pCtx->end; + } + + pInfo->win.ekey = pInfo->p.key; + return notNullElems; +} + +static void twa_function(SQLFunctionCtx *pCtx) { + void *data = GET_INPUT_DATA_LIST(pCtx); + + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + STwaInfo *pInfo = GET_ROWCELL_INTERBUF(pResInfo); + + // skip null value + int32_t step = GET_FORWARD_DIRECTION_FACTOR(pCtx->order); + int32_t i = (pCtx->order == TSDB_ORDER_ASC)? 0:(pCtx->size - 1); + while (pCtx->hasNull && i < pCtx->size && i >= 0 && isNull((char *)data + pCtx->inputBytes * i, pCtx->inputType)) { + i += step; + } + + int32_t notNullElems = 0; + if (i >= 0 && i < pCtx->size) { + notNullElems = twa_function_impl(pCtx, i, pCtx->size); + } + + SET_VAL(pCtx, notNullElems, 1); + + if (notNullElems > 0) { + pResInfo->hasResult = DATA_SET_FLAG; + } + + if (pCtx->stableQuery) { + memcpy(pCtx->pOutput, pInfo, sizeof(STwaInfo)); + } +} + +/* + * To copy the input to interResBuf to avoid the input buffer space be over writen + * by next input data. The TWA function only applies to each table, so no merge procedure + * is required, we simply copy to the resut ot interResBuffer. + */ +void twa_function_copy(SQLFunctionCtx *pCtx) { + assert(pCtx->inputType == TSDB_DATA_TYPE_BINARY); + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + + memcpy(GET_ROWCELL_INTERBUF(pResInfo), pCtx->pInput, (size_t)pCtx->inputBytes); + pResInfo->hasResult = ((STwaInfo *)pCtx->pInput)->hasResult; +} + +void twa_function_finalizer(SQLFunctionCtx *pCtx) { + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + + STwaInfo *pInfo = (STwaInfo *)GET_ROWCELL_INTERBUF(pResInfo); + if (pInfo->hasResult != DATA_SET_FLAG) { + setNull(pCtx->pOutput, TSDB_DATA_TYPE_DOUBLE, sizeof(double)); + return; + } + + assert(pInfo->win.ekey == pInfo->p.key && pInfo->hasResult == pResInfo->hasResult); + if (pInfo->win.ekey == pInfo->win.skey) { + SET_DOUBLE_VAL((double *)pCtx->pOutput, pInfo->p.val); + } else { + SET_DOUBLE_VAL((double *)pCtx->pOutput , pInfo->dOutput / (pInfo->win.ekey - pInfo->win.skey)); + } + + GET_RES_INFO(pCtx)->numOfRes = 1; + doFinalizer(pCtx); +} + +/** + * + * @param pCtx + */ + +static void interp_function_impl(SQLFunctionCtx *pCtx) { + int32_t type = (int32_t) pCtx->param[2].i64; + if (type == TSDB_FILL_NONE) { + return; + } + + bool ascQuery = (pCtx->order == TSDB_ORDER_ASC); + + if (pCtx->inputType == TSDB_DATA_TYPE_TIMESTAMP) { + *(TSKEY *)pCtx->pOutput = pCtx->startTs; + } else if (type == TSDB_FILL_NULL) { + setNull(pCtx->pOutput, pCtx->outputType, pCtx->outputBytes); + } else if (type == TSDB_FILL_SET_VALUE) { + taosVariantDump(&pCtx->param[1], pCtx->pOutput, pCtx->inputType, true); + } else { + if (pCtx->start.key != INT64_MIN && ((ascQuery && pCtx->start.key <= pCtx->startTs && pCtx->end.key >= pCtx->startTs) || ((!ascQuery) && pCtx->start.key >= pCtx->startTs && pCtx->end.key <= pCtx->startTs))) { + if (type == TSDB_FILL_PREV) { + if (IS_NUMERIC_TYPE(pCtx->inputType) || pCtx->inputType == TSDB_DATA_TYPE_BOOL) { + SET_TYPED_DATA(pCtx->pOutput, pCtx->inputType, pCtx->start.val); + } else { + assignVal(pCtx->pOutput, pCtx->start.ptr, pCtx->outputBytes, pCtx->inputType); + } + } else if (type == TSDB_FILL_NEXT) { + if (IS_NUMERIC_TYPE(pCtx->inputType) || pCtx->inputType == TSDB_DATA_TYPE_BOOL) { + SET_TYPED_DATA(pCtx->pOutput, pCtx->inputType, pCtx->end.val); + } else { + assignVal(pCtx->pOutput, pCtx->end.ptr, pCtx->outputBytes, pCtx->inputType); + } + } else if (type == TSDB_FILL_LINEAR) { + SPoint point1 = {.key = pCtx->start.key, .val = &pCtx->start.val}; + SPoint point2 = {.key = pCtx->end.key, .val = &pCtx->end.val}; + SPoint point = {.key = pCtx->startTs, .val = pCtx->pOutput}; + + int32_t srcType = pCtx->inputType; + if (IS_NUMERIC_TYPE(srcType)) { // TODO should find the not null data? + if (isNull((char *)&pCtx->start.val, srcType) || isNull((char *)&pCtx->end.val, srcType)) { + setNull(pCtx->pOutput, srcType, pCtx->inputBytes); + } else { + taosGetLinearInterpolationVal(&point, pCtx->outputType, &point1, &point2, TSDB_DATA_TYPE_DOUBLE); + } + } else { + setNull(pCtx->pOutput, srcType, pCtx->inputBytes); + } + } + } else { + // no data generated yet + if (pCtx->size < 1) { + return; + } + + // check the timestamp in input buffer + TSKEY skey = GET_TS_DATA(pCtx, 0); + + if (type == TSDB_FILL_PREV) { + if ((ascQuery && skey > pCtx->startTs) || ((!ascQuery) && skey < pCtx->startTs)) { + return; + } + + if (pCtx->size > 1) { + TSKEY ekey = GET_TS_DATA(pCtx, 1); + if ((ascQuery && ekey > skey && ekey <= pCtx->startTs) || + ((!ascQuery) && ekey < skey && ekey >= pCtx->startTs)){ + skey = ekey; + } + } + assignVal(pCtx->pOutput, pCtx->pInput, pCtx->outputBytes, pCtx->inputType); + } else if (type == TSDB_FILL_NEXT) { + TSKEY ekey = skey; + char* val = NULL; + + if ((ascQuery && ekey < pCtx->startTs) || ((!ascQuery) && ekey > pCtx->startTs)) { + if (pCtx->size > 1) { + ekey = GET_TS_DATA(pCtx, 1); + if ((ascQuery && ekey < pCtx->startTs) || ((!ascQuery) && ekey > pCtx->startTs)) { + return; + } + + val = ((char*)pCtx->pInput) + pCtx->inputBytes; + } else { + return; + } + } else { + val = (char*)pCtx->pInput; + } + + assignVal(pCtx->pOutput, val, pCtx->outputBytes, pCtx->inputType); + } else if (type == TSDB_FILL_LINEAR) { + if (pCtx->size <= 1) { + return; + } + + TSKEY ekey = GET_TS_DATA(pCtx, 1); + + // no data generated yet + if ((ascQuery && !(skey <= pCtx->startTs && ekey >= pCtx->startTs)) + || ((!ascQuery) && !(skey >= pCtx->startTs && ekey <= pCtx->startTs))) { + return; + } + + char *start = GET_INPUT_DATA(pCtx, 0); + char *end = GET_INPUT_DATA(pCtx, 1); + + SPoint point1 = {.key = skey, .val = start}; + SPoint point2 = {.key = ekey, .val = end}; + SPoint point = {.key = pCtx->startTs, .val = pCtx->pOutput}; + + int32_t srcType = pCtx->inputType; + if (IS_NUMERIC_TYPE(srcType)) { // TODO should find the not null data? + if (isNull(start, srcType) || isNull(end, srcType)) { + setNull(pCtx->pOutput, srcType, pCtx->inputBytes); + } else { + taosGetLinearInterpolationVal(&point, pCtx->outputType, &point1, &point2, srcType); + } + } else { + setNull(pCtx->pOutput, srcType, pCtx->inputBytes); + } + } + } + } + + SET_VAL(pCtx, 1, 1); +} + +static void interp_function(SQLFunctionCtx *pCtx) { + // at this point, the value is existed, return directly + if (pCtx->size > 0) { + bool ascQuery = (pCtx->order == TSDB_ORDER_ASC); + TSKEY key; + char *pData; + int32_t typedData = 0; + + if (ascQuery) { + key = GET_TS_DATA(pCtx, 0); + pData = GET_INPUT_DATA(pCtx, 0); + } else { + key = pCtx->start.key; + if (key == INT64_MIN) { + key = GET_TS_DATA(pCtx, 0); + pData = GET_INPUT_DATA(pCtx, 0); + } else { + if (!(IS_NUMERIC_TYPE(pCtx->inputType) || pCtx->inputType == TSDB_DATA_TYPE_BOOL)) { + pData = pCtx->start.ptr; + } else { + typedData = 1; + pData = (char *)&pCtx->start.val; + } + } + } + + //if (key == pCtx->startTs && (ascQuery || !(IS_NUMERIC_TYPE(pCtx->inputType) || pCtx->inputType == TSDB_DATA_TYPE_BOOL))) { + if (key == pCtx->startTs) { + if (typedData) { + SET_TYPED_DATA(pCtx->pOutput, pCtx->inputType, *(double *)pData); + } else { + assignVal(pCtx->pOutput, pData, pCtx->inputBytes, pCtx->inputType); + } + + SET_VAL(pCtx, 1, 1); + } else { + interp_function_impl(pCtx); + } + } else { //no qualified data rows and interpolation is required + interp_function_impl(pCtx); + } +} + +static bool ts_comp_function_setup(SQLFunctionCtx *pCtx, SResultRowCellInfo* pResInfo) { + if (!function_setup(pCtx, pResInfo)) { + return false; // not initialized since it has been initialized + } + + STSCompInfo *pInfo = GET_ROWCELL_INTERBUF(pResInfo); + pInfo->pTSBuf = tsBufCreate(false, pCtx->order); + pInfo->pTSBuf->tsOrder = pCtx->order; + return true; +} + +static void ts_comp_function(SQLFunctionCtx *pCtx) { + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + STSBuf * pTSbuf = ((STSCompInfo *)(GET_ROWCELL_INTERBUF(pResInfo)))->pTSBuf; + + const char *input = GET_INPUT_DATA_LIST(pCtx); + + // primary ts must be existed, so no need to check its existance + if (pCtx->order == TSDB_ORDER_ASC) { + tsBufAppend(pTSbuf, (int32_t)pCtx->param[0].i64, &pCtx->tag, input, pCtx->size * TSDB_KEYSIZE); + } else { + for (int32_t i = pCtx->size - 1; i >= 0; --i) { + char *d = GET_INPUT_DATA(pCtx, i); + tsBufAppend(pTSbuf, (int32_t)pCtx->param[0].i64, &pCtx->tag, d, (int32_t)TSDB_KEYSIZE); + } + } + + SET_VAL(pCtx, pCtx->size, 1); + pResInfo->hasResult = DATA_SET_FLAG; +} + +static void ts_comp_finalize(SQLFunctionCtx *pCtx) { + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + + STSCompInfo *pInfo = GET_ROWCELL_INTERBUF(pResInfo); + STSBuf * pTSbuf = pInfo->pTSBuf; + + tsBufFlush(pTSbuf); +// qDebug("total timestamp :%"PRId64, pTSbuf->numOfTotal); + + // TODO refactor transfer ownership of current file + *(FILE **)pCtx->pOutput = pTSbuf->f; + + pResInfo->complete = true; + + // get the file size + struct stat fStat; + if ((fstat(fileno(pTSbuf->f), &fStat) == 0)) { + pResInfo->numOfRes = fStat.st_size; + } + + pTSbuf->remainOpen = true; + tsBufDestroy(pTSbuf); + + doFinalizer(pCtx); +} + +////////////////////////////////////////////////////////////////////////////////////////////// +// rate functions +static double do_calc_rate(const SRateInfo* pRateInfo, double tickPerSec) { + if ((INT64_MIN == pRateInfo->lastKey) || (INT64_MIN == pRateInfo->firstKey) || + (pRateInfo->firstKey >= pRateInfo->lastKey)) { + return 0.0; + } + + double diff = 0; + if (pRateInfo->isIRate) { + // If the previous value of the last is greater than the last value, only keep the last point instead of the delta + // value between two values. + diff = pRateInfo->lastValue; + if (diff >= pRateInfo->firstValue) { + diff -= pRateInfo->firstValue; + } + } else { + diff = pRateInfo->correctionValue + pRateInfo->lastValue - pRateInfo->firstValue; + if (diff <= 0) { + return 0; + } + } + + int64_t duration = pRateInfo->lastKey - pRateInfo->firstKey; + if (duration == 0) { + return 0; + } + + return (duration > 0)? ((double)diff) / (duration/tickPerSec):0.0; +} + +static bool rate_function_setup(SQLFunctionCtx *pCtx, SResultRowCellInfo* pResInfo) { + if (!function_setup(pCtx, pResInfo)) { + return false; + } + + SRateInfo *pInfo = GET_ROWCELL_INTERBUF(pResInfo); + pInfo->correctionValue = 0; + pInfo->firstKey = INT64_MIN; + pInfo->lastKey = INT64_MIN; + pInfo->firstValue = (double) INT64_MIN; + pInfo->lastValue = (double) INT64_MIN; + + pInfo->hasResult = 0; + pInfo->isIRate = (pCtx->functionId == FUNCTION_IRATE); + return true; +} + +static void rate_function(SQLFunctionCtx *pCtx) { + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + + int32_t notNullElems = 0; + SRateInfo *pRateInfo = (SRateInfo *)GET_ROWCELL_INTERBUF(pResInfo); + TSKEY *primaryKey = GET_TS_LIST(pCtx); + +// qDebug("%p rate_function() size:%d, hasNull:%d", pCtx, pCtx->size, pCtx->hasNull); + + for (int32_t i = 0; i < pCtx->size; ++i) { + char *pData = GET_INPUT_DATA(pCtx, i); + if (pCtx->hasNull && isNull(pData, pCtx->inputType)) { +// qDebug("%p rate_function() index of null data:%d", pCtx, i); + continue; + } + + notNullElems++; + + double v = 0; + GET_TYPED_DATA(v, double, pCtx->inputType, pData); + + if ((INT64_MIN == pRateInfo->firstValue) || (INT64_MIN == pRateInfo->firstKey)) { + pRateInfo->firstValue = v; + pRateInfo->firstKey = primaryKey[i]; + } + + if (INT64_MIN == pRateInfo->lastValue) { + pRateInfo->lastValue = v; + } else if (v < pRateInfo->lastValue) { + pRateInfo->correctionValue += pRateInfo->lastValue; + } + + pRateInfo->lastValue = v; + pRateInfo->lastKey = primaryKey[i]; + } + + if (!pCtx->hasNull) { + assert(pCtx->size == notNullElems); + } + + SET_VAL(pCtx, notNullElems, 1); + + if (notNullElems > 0) { + pRateInfo->hasResult = DATA_SET_FLAG; + pResInfo->hasResult = DATA_SET_FLAG; + } + + // keep the data into the final output buffer for super table query since this execution may be the last one + if (pCtx->stableQuery) { + memcpy(pCtx->pOutput, GET_ROWCELL_INTERBUF(pResInfo), sizeof(SRateInfo)); + } +} + +static void rate_func_copy(SQLFunctionCtx *pCtx) { + assert(pCtx->inputType == TSDB_DATA_TYPE_BINARY); + + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + memcpy(GET_ROWCELL_INTERBUF(pResInfo), pCtx->pInput, (size_t)pCtx->inputBytes); + pResInfo->hasResult = ((SRateInfo*)pCtx->pInput)->hasResult; +} + +static void rate_finalizer(SQLFunctionCtx *pCtx) { + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + SRateInfo *pRateInfo = (SRateInfo *)GET_ROWCELL_INTERBUF(pResInfo); + + if (pRateInfo->hasResult != DATA_SET_FLAG) { + setNull(pCtx->pOutput, TSDB_DATA_TYPE_DOUBLE, sizeof(double)); + return; + } + + SET_DOUBLE_VAL((double*) pCtx->pOutput, do_calc_rate(pRateInfo, (double) TSDB_TICK_PER_SECOND(pCtx->param[0].i64))); + + // cannot set the numOfIteratedElems again since it is set during previous iteration + pResInfo->numOfRes = 1; + pResInfo->hasResult = DATA_SET_FLAG; + + doFinalizer(pCtx); +} + +static void irate_function(SQLFunctionCtx *pCtx) { + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + + int32_t notNullElems = 0; + SRateInfo *pRateInfo = (SRateInfo *)GET_ROWCELL_INTERBUF(pResInfo); + TSKEY *primaryKey = GET_TS_LIST(pCtx); + + for (int32_t i = pCtx->size - 1; i >= 0; --i) { + char *pData = GET_INPUT_DATA(pCtx, i); + if (pCtx->hasNull && isNull(pData, pCtx->inputType)) { + continue; + } + + notNullElems++; + + double v = 0; + GET_TYPED_DATA(v, double, pCtx->inputType, pData); + + if ((INT64_MIN == pRateInfo->lastKey) || primaryKey[i] > pRateInfo->lastKey) { + pRateInfo->lastValue = v; + pRateInfo->lastKey = primaryKey[i]; + continue; + } + + if ((INT64_MIN == pRateInfo->firstKey) || primaryKey[i] > pRateInfo->firstKey) { + pRateInfo->firstValue = v; + pRateInfo->firstKey = primaryKey[i]; + break; + } + } + + SET_VAL(pCtx, notNullElems, 1); + + if (notNullElems > 0) { + pRateInfo->hasResult = DATA_SET_FLAG; + pResInfo->hasResult = DATA_SET_FLAG; + } + + // keep the data into the final output buffer for super table query since this execution may be the last one + if (pCtx->stableQuery) { + memcpy(pCtx->pOutput, GET_ROWCELL_INTERBUF(pResInfo), sizeof(SRateInfo)); + } +} + +static void blockDistInfoFromBinary(const char* data, int32_t len, STableBlockDist* pDist) { + SBufferReader br = tbufInitReader(data, len, false); + + pDist->numOfTables = tbufReadUint32(&br); + pDist->numOfFiles = tbufReadUint16(&br); + pDist->totalSize = tbufReadUint64(&br); + pDist->totalRows = tbufReadUint64(&br); + pDist->maxRows = tbufReadInt32(&br); + pDist->minRows = tbufReadInt32(&br); + pDist->numOfRowsInMemTable = tbufReadUint32(&br); + pDist->numOfSmallBlocks = tbufReadUint32(&br); + int64_t numSteps = tbufReadUint64(&br); + + bool comp = tbufReadUint8(&br); + uint32_t compLen = tbufReadUint32(&br); + + size_t originalLen = (size_t) (numSteps *sizeof(SFileBlockInfo)); + + char* outputBuf = NULL; + if (comp) { + outputBuf = malloc(originalLen); + + size_t actualLen = compLen; + const char* compStr = tbufReadBinary(&br, &actualLen); + + int32_t orignalLen = tsDecompressString(compStr, compLen, 1, outputBuf, + (int32_t)originalLen , ONE_STAGE_COMP, NULL, 0); + assert(orignalLen == numSteps *sizeof(SFileBlockInfo)); + } else { + outputBuf = (char*) tbufReadBinary(&br, &originalLen); + } + + pDist->dataBlockInfos = taosArrayFromList(outputBuf, (uint32_t)numSteps, sizeof(SFileBlockInfo)); + if (comp) { + tfree(outputBuf); + } +} + +static void blockInfo_func(SQLFunctionCtx* pCtx) { + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + STableBlockDist* pDist = (STableBlockDist*) GET_ROWCELL_INTERBUF(pResInfo); + + int32_t len = *(int32_t*) pCtx->pInput; + blockDistInfoFromBinary((char*)pCtx->pInput + sizeof(int32_t), len, pDist); + pDist->rowSize = (uint16_t)pCtx->param[0].i64; + + memcpy(pCtx->pOutput, pCtx->pInput, sizeof(int32_t) + len); + + pResInfo->numOfRes = 1; + pResInfo->hasResult = DATA_SET_FLAG; +} + +static void mergeTableBlockDist(SResultRowCellInfo* pResInfo, const STableBlockDist* pSrc) { + STableBlockDist* pDist = (STableBlockDist*) GET_ROWCELL_INTERBUF(pResInfo); + assert(pDist != NULL && pSrc != NULL); + + pDist->numOfTables += pSrc->numOfTables; + pDist->numOfRowsInMemTable += pSrc->numOfRowsInMemTable; + pDist->numOfSmallBlocks += pSrc->numOfSmallBlocks; + pDist->numOfFiles += pSrc->numOfFiles; + pDist->totalSize += pSrc->totalSize; + pDist->totalRows += pSrc->totalRows; + + if (pResInfo->hasResult == DATA_SET_FLAG) { + pDist->maxRows = MAX(pDist->maxRows, pSrc->maxRows); + pDist->minRows = MIN(pDist->minRows, pSrc->minRows); + } else { + pDist->maxRows = pSrc->maxRows; + pDist->minRows = pSrc->minRows; + + int32_t maxSteps = TSDB_MAX_MAX_ROW_FBLOCK/TSDB_BLOCK_DIST_STEP_ROWS; + if (TSDB_MAX_MAX_ROW_FBLOCK % TSDB_BLOCK_DIST_STEP_ROWS != 0) { + ++maxSteps; + } + pDist->dataBlockInfos = taosArrayInit(maxSteps, sizeof(SFileBlockInfo)); + taosArraySetSize(pDist->dataBlockInfos, maxSteps); + } + + size_t steps = taosArrayGetSize(pSrc->dataBlockInfos); + for (int32_t i = 0; i < steps; ++i) { + int32_t srcNumBlocks = ((SFileBlockInfo*)taosArrayGet(pSrc->dataBlockInfos, i))->numBlocksOfStep; + SFileBlockInfo* blockInfo = (SFileBlockInfo*)taosArrayGet(pDist->dataBlockInfos, i); + blockInfo->numBlocksOfStep += srcNumBlocks; + } +} + +void block_func_merge(SQLFunctionCtx* pCtx) { + STableBlockDist info = {0}; + int32_t len = *(int32_t*) pCtx->pInput; + blockDistInfoFromBinary(((char*)pCtx->pInput) + sizeof(int32_t), len, &info); + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + mergeTableBlockDist(pResInfo, &info); + taosArrayDestroy(info.dataBlockInfos); + + pResInfo->numOfRes = 1; + pResInfo->hasResult = DATA_SET_FLAG; +} + +void getPercentiles(STableBlockDist *pTableBlockDist, int64_t totalBlocks, int32_t numOfPercents, + double* percents, int32_t* percentiles) { + if (totalBlocks == 0) { + for (int32_t i = 0; i < numOfPercents; ++i) { + percentiles[i] = 0; + } + return; + } + + SArray *blocksInfos = pTableBlockDist->dataBlockInfos; + size_t numSteps = taosArrayGetSize(blocksInfos); + size_t cumulativeBlocks = 0; + + int percentIndex = 0; + for (int32_t indexStep = 0; indexStep < numSteps; ++indexStep) { + int32_t numStepBlocks = ((SFileBlockInfo *)taosArrayGet(blocksInfos, indexStep))->numBlocksOfStep; + if (numStepBlocks == 0) continue; + cumulativeBlocks += numStepBlocks; + + while (percentIndex < numOfPercents) { + double blockRank = totalBlocks * percents[percentIndex]; + if (blockRank <= cumulativeBlocks) { + percentiles[percentIndex] = indexStep; + ++percentIndex; + } else { + break; + } + } + } + + for (int32_t i = 0; i < numOfPercents; ++i) { + percentiles[i] = (percentiles[i]+1) * TSDB_BLOCK_DIST_STEP_ROWS - TSDB_BLOCK_DIST_STEP_ROWS/2; + } +} + +void generateBlockDistResult(STableBlockDist *pTableBlockDist, char* result) { + if (pTableBlockDist == NULL) { + return; + } + + SArray* blockInfos = pTableBlockDist->dataBlockInfos; + uint64_t totalRows = pTableBlockDist->totalRows; + size_t numSteps = taosArrayGetSize(blockInfos); + int64_t totalBlocks = 0; + int64_t min = -1, max = -1, avg = 0; + + for (int32_t i = 0; i < numSteps; i++) { + SFileBlockInfo *blockInfo = taosArrayGet(blockInfos, i); + int64_t blocks = blockInfo->numBlocksOfStep; + totalBlocks += blocks; + } + + avg = totalBlocks > 0 ? (int64_t)(totalRows/totalBlocks) : 0; + min = totalBlocks > 0 ? pTableBlockDist->minRows : 0; + max = totalBlocks > 0 ? pTableBlockDist->maxRows : 0; + + double stdDev = 0; + if (totalBlocks > 0) { + double variance = 0; + for (int32_t i = 0; i < numSteps; i++) { + SFileBlockInfo *blockInfo = taosArrayGet(blockInfos, i); + int64_t blocks = blockInfo->numBlocksOfStep; + int32_t rows = (i * TSDB_BLOCK_DIST_STEP_ROWS + TSDB_BLOCK_DIST_STEP_ROWS / 2); + variance += blocks * (rows - avg) * (rows - avg); + } + variance = variance / totalBlocks; + stdDev = sqrt(variance); + } + + double percents[] = {0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90, 0.95, 0.99}; + int32_t percentiles[] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; + assert(sizeof(percents)/sizeof(double) == sizeof(percentiles)/sizeof(int32_t)); + getPercentiles(pTableBlockDist, totalBlocks, sizeof(percents)/sizeof(double), percents, percentiles); + + uint64_t totalLen = pTableBlockDist->totalSize; + int32_t rowSize = pTableBlockDist->rowSize; + int32_t smallBlocks = pTableBlockDist->numOfSmallBlocks; + double compRatio = (totalRows>0) ? ((double)(totalLen)/(rowSize*totalRows)) : 1; + int sz = sprintf(result + VARSTR_HEADER_SIZE, + "summary: \n\t " + "5th=[%d], 10th=[%d], 20th=[%d], 30th=[%d], 40th=[%d], 50th=[%d]\n\t " + "60th=[%d], 70th=[%d], 80th=[%d], 90th=[%d], 95th=[%d], 99th=[%d]\n\t " + "Min=[%"PRId64"(Rows)] Max=[%"PRId64"(Rows)] Avg=[%"PRId64"(Rows)] Stddev=[%.2f] \n\t " + "Rows=[%"PRIu64"], Blocks=[%"PRId64"], SmallBlocks=[%d], Size=[%.3f(Kb)] Comp=[%.2f]\n\t " + "RowsInMem=[%d] \n\t", + percentiles[0], percentiles[1], percentiles[2], percentiles[3], percentiles[4], percentiles[5], + percentiles[6], percentiles[7], percentiles[8], percentiles[9], percentiles[10], percentiles[11], + min, max, avg, stdDev, + totalRows, totalBlocks, smallBlocks, totalLen/1024.0, compRatio, + pTableBlockDist->numOfRowsInMemTable); + varDataSetLen(result, sz); + UNUSED(sz); +} + +void blockinfo_func_finalizer(SQLFunctionCtx* pCtx) { + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + STableBlockDist* pDist = (STableBlockDist*) GET_ROWCELL_INTERBUF(pResInfo); + + pDist->rowSize = (uint16_t)pCtx->param[0].i64; + generateBlockDistResult(pDist, pCtx->pOutput); + + if (pDist->dataBlockInfos != NULL) { + taosArrayDestroy(pDist->dataBlockInfos); + pDist->dataBlockInfos = NULL; + } + + // cannot set the numOfIteratedElems again since it is set during previous iteration + pResInfo->numOfRes = 1; + pResInfo->hasResult = DATA_SET_FLAG; + + doFinalizer(pCtx); +} + +///////////////////////////////////////////////////////////////////////////////////////////// +/* + * function compatible list. + * tag and ts are not involved in the compatibility check + * + * 1. functions that are not simultaneously present with any other functions. e.g., diff/ts_z/top/bottom + * 2. functions that are only allowed to be present only with same functions. e.g., last_row, interp + * 3. functions that are allowed to be present with other functions. + * e.g., count/sum/avg/min/max/stddev/percentile/apercentile/first/last... + * + */ +int32_t functionCompatList[] = { + // count, sum, avg, min, max, stddev, percentile, apercentile, first, last + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + // last_row,top, bottom, spread, twa, leastsqr, ts, ts_dummy, tag_dummy, ts_comp + 4, -1, -1, 1, 1, 1, 1, 1, 1, -1, + // tag, colprj, tagprj, arithmetic, diff, first_dist, last_dist, stddev_dst, interp rate irate + 1, 1, 1, 1, -1, 1, 1, 1, 5, 1, 1, + // tid_tag, derivative, blk_info + 6, 8, 7, +}; + +SAggFunctionInfo aggFunc[34] = {{ + // 0, count function does not invoke the finalize function + "count", + FUNCTION_AGG, + FUNCTION_COUNT, + FUNCTION_COUNT, + BASIC_FUNC_SO, + function_setup, + count_function, + doFinalizer, + count_func_merge, + countRequired, + }, + { + // 1 + "sum", + FUNCTION_AGG, + FUNCTION_SUM, + FUNCTION_SUM, + BASIC_FUNC_SO, + function_setup, + sum_function, + function_finalizer, + sum_func_merge, + statisRequired, + }, + { + // 2 + "avg", + FUNCTION_AGG, + FUNCTION_AVG, + FUNCTION_AVG, + BASIC_FUNC_SO, + function_setup, + avg_function, + avg_finalizer, + avg_func_merge, + statisRequired, + }, + { + // 3 + "min", + FUNCTION_AGG, + FUNCTION_MIN, + FUNCTION_MIN, + BASIC_FUNC_SO | FUNCSTATE_SELECTIVITY, + min_func_setup, + min_function, + function_finalizer, + min_func_merge, + statisRequired, + }, + { + // 4 + "max", + FUNCTION_AGG, + FUNCTION_MAX, + FUNCTION_MAX, + BASIC_FUNC_SO | FUNCSTATE_SELECTIVITY, + max_func_setup, + max_function, + function_finalizer, + max_func_merge, + statisRequired, + }, + { + // 5 + "stddev", + FUNCTION_AGG, + FUNCTION_STDDEV, + FUNCTION_STDDEV_DST, + FUNCSTATE_SO | FUNCSTATE_STREAM, + function_setup, + stddev_function, + stddev_finalizer, + noop1, + dataBlockRequired, + }, + { + // 6 + "percentile", + FUNCTION_AGG, + FUNCTION_PERCT, + FUNCTION_INVALID_ID, + FUNCSTATE_SO | FUNCSTATE_STREAM, + percentile_function_setup, + percentile_function, + percentile_finalizer, + noop1, + dataBlockRequired, + }, + { + // 7 + "apercentile", + FUNCTION_AGG, + FUNCTION_APERCT, + FUNCTION_APERCT, + FUNCSTATE_SO | FUNCSTATE_STREAM | FUNCSTATE_STABLE, + apercentile_function_setup, + apercentile_function, + apercentile_finalizer, + apercentile_func_merge, + dataBlockRequired, + }, + { + // 8 + "first", + FUNCTION_AGG, + FUNCTION_FIRST, + FUNCTION_FIRST_DST, + BASIC_FUNC_SO | FUNCSTATE_SELECTIVITY, + function_setup, + first_function, + function_finalizer, + noop1, + firstFuncRequired, + }, + { + // 9 + "last", + FUNCTION_AGG, + FUNCTION_LAST, + FUNCTION_LAST_DST, + BASIC_FUNC_SO | FUNCSTATE_SELECTIVITY, + function_setup, + last_function, + function_finalizer, + noop1, + lastFuncRequired, + }, + { + // 10 + "last_row", + FUNCTION_AGG, + FUNCTION_LAST_ROW, + FUNCTION_LAST_ROW, + FUNCSTATE_SO | FUNCSTATE_STABLE | FUNCSTATE_NEED_TS | FUNCSTATE_SELECTIVITY, + first_last_function_setup, + last_row_function, + last_row_finalizer, + last_dist_func_merge, + dataBlockRequired, + }, + { + // 11 + "top", + FUNCTION_AGG, + FUNCTION_TOP, + FUNCTION_TOP, + FUNCSTATE_MO | FUNCSTATE_STABLE | FUNCSTATE_NEED_TS | FUNCSTATE_SELECTIVITY, + top_bottom_function_setup, + top_function, + top_bottom_func_finalizer, + top_func_merge, + dataBlockRequired, + }, + { + // 12 + "bottom", + FUNCTION_AGG, + FUNCTION_BOTTOM, + FUNCTION_BOTTOM, + FUNCSTATE_MO | FUNCSTATE_STABLE | FUNCSTATE_NEED_TS | FUNCSTATE_SELECTIVITY, + top_bottom_function_setup, + bottom_function, + top_bottom_func_finalizer, + bottom_func_merge, + dataBlockRequired, + }, + { + // 13 + "spread", + FUNCTION_AGG, + FUNCTION_SPREAD, + FUNCTION_SPREAD, + BASIC_FUNC_SO, + spread_function_setup, + spread_function, + spread_function_finalizer, + spread_func_merge, + countRequired, + }, + { + // 14 + "twa", + FUNCTION_AGG, + FUNCTION_TWA, + FUNCTION_TWA, + BASIC_FUNC_SO | FUNCSTATE_NEED_TS, + twa_function_setup, + twa_function, + twa_function_finalizer, + twa_function_copy, + dataBlockRequired, + }, + { + // 15 + "leastsquares", + FUNCTION_AGG, + FUNCTION_LEASTSQR, + FUNCTION_INVALID_ID, + FUNCSTATE_SO | FUNCSTATE_STREAM, + leastsquares_function_setup, + leastsquares_function, + leastsquares_finalizer, + noop1, + dataBlockRequired, + }, + { + // 16 + "ts", + FUNCTION_AGG, + FUNCTION_TS, + FUNCTION_TS, + BASIC_FUNC_SO | FUNCSTATE_NEED_TS, + function_setup, + date_col_output_function, + doFinalizer, + copy_function, + noDataRequired, + }, + { + // 17 + "ts", + FUNCTION_AGG, + FUNCTION_TS_DUMMY, + FUNCTION_TS_DUMMY, + BASIC_FUNC_SO | FUNCSTATE_NEED_TS, + function_setup, + noop1, + doFinalizer, + copy_function, + dataBlockRequired, + }, + { + // 18 + "tag_dummy", + FUNCTION_AGG, + FUNCTION_TAG_DUMMY, + FUNCTION_TAG_DUMMY, + BASIC_FUNC_SO, + function_setup, + tag_function, + doFinalizer, + copy_function, + noDataRequired, + }, + { + // 19 + "ts", + FUNCTION_AGG, + FUNCTION_TS_COMP, + FUNCTION_TS_COMP, + FUNCSTATE_MO | FUNCSTATE_NEED_TS, + ts_comp_function_setup, + ts_comp_function, + ts_comp_finalize, + copy_function, + dataBlockRequired, + }, + { + // 20 + "tag", + FUNCTION_AGG, + FUNCTION_TAG, + FUNCTION_TAG, + BASIC_FUNC_SO, + function_setup, + tag_function, + doFinalizer, + copy_function, + noDataRequired, + }, + {//TODO this is a scala function + // 21, column project sql function + "colprj", + FUNCTION_AGG, + FUNCTION_PRJ, + FUNCTION_PRJ, + BASIC_FUNC_MO | FUNCSTATE_NEED_TS, + function_setup, + col_project_function, + doFinalizer, + copy_function, + dataBlockRequired, + }, + { + // 22, multi-output, tag function has only one result + "tagprj", + FUNCTION_AGG, + FUNCTION_TAGPRJ, + FUNCTION_TAGPRJ, + BASIC_FUNC_MO, + function_setup, + tag_project_function, + doFinalizer, + copy_function, + noDataRequired, + }, + { + // 23 + "arithmetic", + FUNCTION_AGG, + FUNCTION_ARITHM, + FUNCTION_ARITHM, + FUNCSTATE_MO | FUNCSTATE_STABLE | FUNCSTATE_NEED_TS, + function_setup, + arithmetic_function, + doFinalizer, + copy_function, + dataBlockRequired, + }, + { + // 24 + "diff", + FUNCTION_AGG, + FUNCTION_DIFF, + FUNCTION_INVALID_ID, + FUNCSTATE_MO | FUNCSTATE_STABLE | FUNCSTATE_NEED_TS | FUNCSTATE_SELECTIVITY, + diff_function_setup, + diff_function, + doFinalizer, + noop1, + dataBlockRequired, + }, + // distributed version used in two-stage aggregation processes + { + // 25 + "first_dist", + FUNCTION_AGG, + FUNCTION_FIRST_DST, + FUNCTION_FIRST_DST, + BASIC_FUNC_SO | FUNCSTATE_NEED_TS | FUNCSTATE_SELECTIVITY, + first_last_function_setup, + first_dist_function, + function_finalizer, + first_dist_func_merge, + firstDistFuncRequired, + }, + { + // 26 + "last_dist", + FUNCTION_AGG, + FUNCTION_LAST_DST, + FUNCTION_LAST_DST, + BASIC_FUNC_SO | FUNCSTATE_NEED_TS | FUNCSTATE_SELECTIVITY, + first_last_function_setup, + last_dist_function, + function_finalizer, + last_dist_func_merge, + lastDistFuncRequired, + }, + { + // 27 + "stddev", // return table id and the corresponding tags for join match and subscribe + FUNCTION_AGG, + FUNCTION_STDDEV_DST, + FUNCTION_AVG, + FUNCSTATE_SO | FUNCSTATE_STABLE, + function_setup, + stddev_dst_function, + stddev_dst_finalizer, + stddev_dst_merge, + dataBlockRequired, + }, + { + // 28 + "interp", + FUNCTION_AGG, + FUNCTION_INTERP, + FUNCTION_INTERP, + FUNCSTATE_SO | FUNCSTATE_STABLE | FUNCSTATE_NEED_TS , + function_setup, + interp_function, + doFinalizer, + copy_function, + dataBlockRequired, + }, + { + // 29 + "rate", + FUNCTION_AGG, + FUNCTION_RATE, + FUNCTION_RATE, + BASIC_FUNC_SO | FUNCSTATE_NEED_TS, + rate_function_setup, + rate_function, + rate_finalizer, + rate_func_copy, + dataBlockRequired, + }, + { + // 30 + "irate", + FUNCTION_AGG, + FUNCTION_IRATE, + FUNCTION_IRATE, + BASIC_FUNC_SO | FUNCSTATE_NEED_TS, + rate_function_setup, + irate_function, + rate_finalizer, + rate_func_copy, + dataBlockRequired, + }, + { + // 31 + "tbid", // return table id and the corresponding tags for join match and subscribe + FUNCTION_AGG, + FUNCTION_TID_TAG, + FUNCTION_TID_TAG, + FUNCSTATE_MO | FUNCSTATE_STABLE, + function_setup, + noop1, + noop1, + noop1, + dataBlockRequired, + }, + { //32 + "derivative", // return table id and the corresponding tags for join match and subscribe + FUNCTION_AGG, + FUNCTION_DERIVATIVE, + FUNCTION_INVALID_ID, + FUNCSTATE_MO | FUNCSTATE_STABLE | FUNCSTATE_NEED_TS | FUNCSTATE_SELECTIVITY, + deriv_function_setup, + deriv_function, + doFinalizer, + noop1, + dataBlockRequired, + }, + { + // 33 + "_block_dist", // return table id and the corresponding tags for join match and subscribe + FUNCTION_AGG, + FUNCTION_BLKINFO, + FUNCTION_BLKINFO, + FUNCSTATE_SO | FUNCSTATE_STABLE, + function_setup, + blockInfo_func, + blockinfo_func_finalizer, + block_func_merge, + dataBlockRequired, + }}; diff --git a/source/libs/executor/src/texpr.c b/source/libs/function/src/texpr.c similarity index 88% rename from source/libs/executor/src/texpr.c rename to source/libs/function/src/texpr.c index 7e2797743d..32fe3d2912 100644 --- a/source/libs/executor/src/texpr.c +++ b/source/libs/function/src/texpr.c @@ -15,7 +15,6 @@ #include "os.h" -#include "texpr.h" #include "exception.h" #include "taosdef.h" #include "taosmsg.h" @@ -26,21 +25,21 @@ #include "thash.h" #include "tskiplist.h" #include "texpr.h" -#include "tarithoperator.h" +//#include "tarithoperator.h" #include "tvariant.h" -static uint8_t UNUSED_FUNC isQueryOnPrimaryKey(const char *primaryColumnName, const tExprNode *pLeft, const tExprNode *pRight) { - if (pLeft->nodeType == TSQL_NODE_COL) { - // if left node is the primary column,return true - return (strcmp(primaryColumnName, pLeft->pSchema->name) == 0) ? 1 : 0; - } else { - // if any children have query on primary key, their parents are also keep this value - return ((pLeft->nodeType == TSQL_NODE_EXPR && pLeft->_node.hasPK == 1) || - (pRight->nodeType == TSQL_NODE_EXPR && pRight->_node.hasPK == 1)) == true - ? 1 - : 0; - } -} +//static uint8_t UNUSED_FUNC isQueryOnPrimaryKey(const char *primaryColumnName, const tExprNode *pLeft, const tExprNode *pRight) { +// if (pLeft->nodeType == TEXPR_COL_NODE) { +// // if left node is the primary column,return true +// return (strcmp(primaryColumnName, pLeft->pSchema->name) == 0) ? 1 : 0; +// } else { +// // if any children have query on primary key, their parents are also keep this value +// return ((pLeft->nodeType == TEXPR_BINARYEXPR_NODE && pLeft->_node.hasPK == 1) || +// (pRight->nodeType == TEXPR_BINARYEXPR_NODE && pRight->_node.hasPK == 1)) == true +// ? 1 +// : 0; +// } +//} static void reverseCopy(char* dest, const char* src, int16_t type, int32_t numOfRows) { switch(type) { @@ -114,11 +113,11 @@ void tExprTreeDestroy(tExprNode *pNode, void (*fp)(void *)) { return; } - if (pNode->nodeType == TSQL_NODE_EXPR) { + if (pNode->nodeType == TEXPR_BINARYEXPR_NODE || pNode->nodeType == TEXPR_UNARYEXPR_NODE) { doExprTreeDestroy(&pNode, fp); - } else if (pNode->nodeType == TSQL_NODE_VALUE) { + } else if (pNode->nodeType == TEXPR_VALUE_NODE) { taosVariantDestroy(pNode->pVal); - } else if (pNode->nodeType == TSQL_NODE_COL) { + } else if (pNode->nodeType == TEXPR_COL_NODE) { tfree(pNode->pSchema); } @@ -130,17 +129,17 @@ static void doExprTreeDestroy(tExprNode **pExpr, void (*fp)(void *)) { return; } - if ((*pExpr)->nodeType == TSQL_NODE_EXPR) { + if ((*pExpr)->nodeType == TEXPR_BINARYEXPR_NODE) { doExprTreeDestroy(&(*pExpr)->_node.pLeft, fp); doExprTreeDestroy(&(*pExpr)->_node.pRight, fp); if (fp != NULL) { fp((*pExpr)->_node.info); } - } else if ((*pExpr)->nodeType == TSQL_NODE_VALUE) { + } else if ((*pExpr)->nodeType == TEXPR_VALUE_NODE) { taosVariantDestroy((*pExpr)->pVal); free((*pExpr)->pVal); - } else if ((*pExpr)->nodeType == TSQL_NODE_COL) { + } else if ((*pExpr)->nodeType == TEXPR_COL_NODE) { free((*pExpr)->pSchema); } @@ -153,7 +152,7 @@ bool exprTreeApplyFilter(tExprNode *pExpr, const void *pItem, SExprTraverseSupp tExprNode *pRight = pExpr->_node.pRight; //non-leaf nodes, recursively traverse the expression tree in the post-root order - if (pLeft->nodeType == TSQL_NODE_EXPR && pRight->nodeType == TSQL_NODE_EXPR) { + if (pLeft->nodeType == TEXPR_BINARYEXPR_NODE && pRight->nodeType == TEXPR_BINARYEXPR_NODE) { if (pExpr->_node.optr == TSDB_RELATION_OR) { // or if (exprTreeApplyFilter(pLeft, pItem, param)) { return true; @@ -180,26 +179,26 @@ void arithmeticTreeTraverse(tExprNode *pExprs, int32_t numOfRows, char *pOutput, if (pExprs == NULL) { return; } - +#if 0 tExprNode *pLeft = pExprs->_node.pLeft; tExprNode *pRight = pExprs->_node.pRight; /* the left output has result from the left child syntax tree */ char *pLeftOutput = (char*)malloc(sizeof(int64_t) * numOfRows); - if (pLeft->nodeType == TSQL_NODE_EXPR) { + if (pLeft->nodeType == TEXPR_BINARYEXPR_NODE) { arithmeticTreeTraverse(pLeft, numOfRows, pLeftOutput, param, order, getSourceDataBlock); } - /* the right output has result from the right child syntax tree */ + // the right output has result from the right child syntax tree char *pRightOutput = malloc(sizeof(int64_t) * numOfRows); char *pdata = malloc(sizeof(int64_t) * numOfRows); - if (pRight->nodeType == TSQL_NODE_EXPR) { + if (pRight->nodeType == TEXPR_BINARYEXPR_NODE) { arithmeticTreeTraverse(pRight, numOfRows, pRightOutput, param, order, getSourceDataBlock); } - if (pLeft->nodeType == TSQL_NODE_EXPR) { - if (pRight->nodeType == TSQL_NODE_EXPR) { + if (pLeft->nodeType == TEXPR_BINARYEXPR_NODE) { + if (pRight->nodeType == TEXPR_BINARYEXPR_NODE) { /* * exprLeft + exprRight * the type of returned value of one expression is always double float precious @@ -207,7 +206,7 @@ void arithmeticTreeTraverse(tExprNode *pExprs, int32_t numOfRows, char *pOutput, _arithmetic_operator_fn_t OperatorFn = getArithmeticOperatorFn(pExprs->_node.optr); OperatorFn(pLeftOutput, numOfRows, TSDB_DATA_TYPE_DOUBLE, pRightOutput, numOfRows, TSDB_DATA_TYPE_DOUBLE, pOutput, TSDB_ORDER_ASC); - } else if (pRight->nodeType == TSQL_NODE_COL) { // exprLeft + columnRight + } else if (pRight->nodeType == TEXPR_COL_NODE) { // exprLeft + columnRight _arithmetic_operator_fn_t OperatorFn = getArithmeticOperatorFn(pExprs->_node.optr); // set input buffer @@ -219,14 +218,14 @@ void arithmeticTreeTraverse(tExprNode *pExprs, int32_t numOfRows, char *pOutput, OperatorFn(pLeftOutput, numOfRows, TSDB_DATA_TYPE_DOUBLE, pInputData, numOfRows, pRight->pSchema->type, pOutput, TSDB_ORDER_ASC); } - } else if (pRight->nodeType == TSQL_NODE_VALUE) { // exprLeft + 12 + } else if (pRight->nodeType == TEXPR_VALUE_NODE) { // exprLeft + 12 _arithmetic_operator_fn_t OperatorFn = getArithmeticOperatorFn(pExprs->_node.optr); OperatorFn(pLeftOutput, numOfRows, TSDB_DATA_TYPE_DOUBLE, &pRight->pVal->i64, 1, pRight->pVal->nType, pOutput, TSDB_ORDER_ASC); } - } else if (pLeft->nodeType == TSQL_NODE_COL) { + } else if (pLeft->nodeType == TEXPR_COL_NODE) { // column data specified on left-hand-side char *pLeftInputData = getSourceDataBlock(param, pLeft->pSchema->name, pLeft->pSchema->colId); - if (pRight->nodeType == TSQL_NODE_EXPR) { // columnLeft + expr2 + if (pRight->nodeType == TEXPR_BINARYEXPR_NODE) { // columnLeft + expr2 _arithmetic_operator_fn_t OperatorFn = getArithmeticOperatorFn(pExprs->_node.optr); if (order == TSDB_ORDER_DESC) { @@ -236,14 +235,14 @@ void arithmeticTreeTraverse(tExprNode *pExprs, int32_t numOfRows, char *pOutput, OperatorFn(pLeftInputData, numOfRows, pLeft->pSchema->type, pRightOutput, numOfRows, TSDB_DATA_TYPE_DOUBLE, pOutput, TSDB_ORDER_ASC); } - } else if (pRight->nodeType == TSQL_NODE_COL) { // columnLeft + columnRight + } else if (pRight->nodeType == TEXPR_COL_NODE) { // columnLeft + columnRight // column data specified on right-hand-side char *pRightInputData = getSourceDataBlock(param, pRight->pSchema->name, pRight->pSchema->colId); _arithmetic_operator_fn_t OperatorFn = getArithmeticOperatorFn(pExprs->_node.optr); // both columns are descending order, do not reverse the source data OperatorFn(pLeftInputData, numOfRows, pLeft->pSchema->type, pRightInputData, numOfRows, pRight->pSchema->type, pOutput, order); - } else if (pRight->nodeType == TSQL_NODE_VALUE) { // columnLeft + 12 + } else if (pRight->nodeType == TEXPR_VALUE_NODE) { // columnLeft + 12 _arithmetic_operator_fn_t OperatorFn = getArithmeticOperatorFn(pExprs->_node.optr); if (order == TSDB_ORDER_DESC) { @@ -255,11 +254,11 @@ void arithmeticTreeTraverse(tExprNode *pExprs, int32_t numOfRows, char *pOutput, } } else { // column data specified on left-hand-side - if (pRight->nodeType == TSQL_NODE_EXPR) { // 12 + expr2 + if (pRight->nodeType == TEXPR_BINARYEXPR_NODE) { // 12 + expr2 _arithmetic_operator_fn_t OperatorFn = getArithmeticOperatorFn(pExprs->_node.optr); OperatorFn(&pLeft->pVal->i64, 1, pLeft->pVal->nType, pRightOutput, numOfRows, TSDB_DATA_TYPE_DOUBLE, pOutput, TSDB_ORDER_ASC); - } else if (pRight->nodeType == TSQL_NODE_COL) { // 12 + columnRight + } else if (pRight->nodeType == TEXPR_COL_NODE) { // 12 + columnRight // column data specified on right-hand-side char *pRightInputData = getSourceDataBlock(param, pRight->pSchema->name, pRight->pSchema->colId); _arithmetic_operator_fn_t OperatorFn = getArithmeticOperatorFn(pExprs->_node.optr); @@ -271,7 +270,7 @@ void arithmeticTreeTraverse(tExprNode *pExprs, int32_t numOfRows, char *pOutput, OperatorFn(&pLeft->pVal->i64, 1, pLeft->pVal->nType, pRightInputData, numOfRows, pRight->pSchema->type, pOutput, TSDB_ORDER_ASC); } - } else if (pRight->nodeType == TSQL_NODE_VALUE) { // 12 + 12 + } else if (pRight->nodeType == TEXPR_VALUE_NODE) { // 12 + 12 _arithmetic_operator_fn_t OperatorFn = getArithmeticOperatorFn(pExprs->_node.optr); OperatorFn(&pLeft->pVal->i64, 1, pLeft->pVal->nType, &pRight->pVal->i64, 1, pRight->pVal->nType, pOutput, TSDB_ORDER_ASC); } @@ -280,12 +279,14 @@ void arithmeticTreeTraverse(tExprNode *pExprs, int32_t numOfRows, char *pOutput, tfree(pdata); tfree(pLeftOutput); tfree(pRightOutput); +#endif + } static void exprTreeToBinaryImpl(SBufferWriter* bw, tExprNode* expr) { tbufWriteUint8(bw, expr->nodeType); - if (expr->nodeType == TSQL_NODE_VALUE) { + if (expr->nodeType == TEXPR_VALUE_NODE) { SVariant* pVal = expr->pVal; tbufWriteUint32(bw, pVal->nType); @@ -296,16 +297,15 @@ static void exprTreeToBinaryImpl(SBufferWriter* bw, tExprNode* expr) { tbufWriteInt64(bw, pVal->i64); } - } else if (expr->nodeType == TSQL_NODE_COL) { + } else if (expr->nodeType == TEXPR_COL_NODE) { SSchema* pSchema = expr->pSchema; tbufWriteInt16(bw, pSchema->colId); tbufWriteInt16(bw, pSchema->bytes); tbufWriteUint8(bw, pSchema->type); tbufWriteString(bw, pSchema->name); - } else if (expr->nodeType == TSQL_NODE_EXPR) { + } else if (expr->nodeType == TEXPR_BINARYEXPR_NODE) { tbufWriteUint8(bw, expr->_node.optr); - tbufWriteUint8(bw, expr->_node.hasPK); exprTreeToBinaryImpl(bw, expr->_node.pLeft); exprTreeToBinaryImpl(bw, expr->_node.pRight); } @@ -353,7 +353,7 @@ static tExprNode* exprTreeFromBinaryImpl(SBufferReader* br) { CLEANUP_PUSH_VOID_PTR_PTR(true, tExprTreeDestroy, pExpr, NULL); pExpr->nodeType = tbufReadUint8(br); - if (pExpr->nodeType == TSQL_NODE_VALUE) { + if (pExpr->nodeType == TEXPR_VALUE_NODE) { SVariant* pVal = exception_calloc(1, sizeof(SVariant)); pExpr->pVal = pVal; @@ -366,7 +366,7 @@ static tExprNode* exprTreeFromBinaryImpl(SBufferReader* br) { pVal->i64 = tbufReadInt64(br); } - } else if (pExpr->nodeType == TSQL_NODE_COL) { + } else if (pExpr->nodeType == TEXPR_COL_NODE) { SSchema* pSchema = exception_calloc(1, sizeof(SSchema)); pExpr->pSchema = pSchema; @@ -375,9 +375,8 @@ static tExprNode* exprTreeFromBinaryImpl(SBufferReader* br) { pSchema->type = tbufReadUint8(br); tbufReadToString(br, pSchema->name, TSDB_COL_NAME_LEN); - } else if (pExpr->nodeType == TSQL_NODE_EXPR) { + } else if (pExpr->nodeType == TEXPR_BINARYEXPR_NODE) { pExpr->_node.optr = tbufReadUint8(br); - pExpr->_node.hasPK = tbufReadUint8(br); pExpr->_node.pLeft = exprTreeFromBinaryImpl(br); pExpr->_node.pRight = exprTreeFromBinaryImpl(br); assert(pExpr->_node.pLeft != NULL && pExpr->_node.pRight != NULL); @@ -406,12 +405,12 @@ tExprNode* exprTreeFromTableName(const char* tbnameCond) { tExprNode* expr = exception_calloc(1, sizeof(tExprNode)); CLEANUP_PUSH_VOID_PTR_PTR(true, tExprTreeDestroy, expr, NULL); - expr->nodeType = TSQL_NODE_EXPR; + expr->nodeType = TEXPR_BINARYEXPR_NODE; tExprNode* left = exception_calloc(1, sizeof(tExprNode)); expr->_node.pLeft = left; - left->nodeType = TSQL_NODE_COL; + left->nodeType = TEXPR_COL_NODE; SSchema* pSchema = exception_calloc(1, sizeof(SSchema)); left->pSchema = pSchema; @@ -421,7 +420,7 @@ tExprNode* exprTreeFromTableName(const char* tbnameCond) { expr->_node.pRight = right; if (strncmp(tbnameCond, QUERY_COND_REL_PREFIX_LIKE, QUERY_COND_REL_PREFIX_LIKE_LEN) == 0) { - right->nodeType = TSQL_NODE_VALUE; + right->nodeType = TEXPR_VALUE_NODE; expr->_node.optr = TSDB_RELATION_LIKE; SVariant* pVal = exception_calloc(1, sizeof(SVariant)); right->pVal = pVal; @@ -432,7 +431,7 @@ tExprNode* exprTreeFromTableName(const char* tbnameCond) { pVal->nLen = (int32_t)len; } else if (strncmp(tbnameCond, QUERY_COND_REL_PREFIX_MATCH, QUERY_COND_REL_PREFIX_MATCH_LEN) == 0) { - right->nodeType = TSQL_NODE_VALUE; + right->nodeType = TEXPR_VALUE_NODE; expr->_node.optr = TSDB_RELATION_MATCH; SVariant* pVal = exception_calloc(1, sizeof(SVariant)); right->pVal = pVal; @@ -442,7 +441,7 @@ tExprNode* exprTreeFromTableName(const char* tbnameCond) { pVal->nType = TSDB_DATA_TYPE_BINARY; pVal->nLen = (int32_t)len; } else if (strncmp(tbnameCond, QUERY_COND_REL_PREFIX_NMATCH, QUERY_COND_REL_PREFIX_NMATCH_LEN) == 0) { - right->nodeType = TSQL_NODE_VALUE; + right->nodeType = TEXPR_VALUE_NODE; expr->_node.optr = TSDB_RELATION_NMATCH; SVariant* pVal = exception_calloc(1, sizeof(SVariant)); right->pVal = pVal; @@ -452,7 +451,7 @@ tExprNode* exprTreeFromTableName(const char* tbnameCond) { pVal->nType = TSDB_DATA_TYPE_BINARY; pVal->nLen = (int32_t)len; } else if (strncmp(tbnameCond, QUERY_COND_REL_PREFIX_IN, QUERY_COND_REL_PREFIX_IN_LEN) == 0) { - right->nodeType = TSQL_NODE_VALUE; + right->nodeType = TEXPR_VALUE_NODE; expr->_node.optr = TSDB_RELATION_IN; SVariant* pVal = exception_calloc(1, sizeof(SVariant)); right->pVal = pVal; @@ -699,25 +698,23 @@ err_ret: tfree(tmp); } - tExprNode* exprdup(tExprNode* pNode) { if (pNode == NULL) { return NULL; } tExprNode* pCloned = calloc(1, sizeof(tExprNode)); - if (pNode->nodeType == TSQL_NODE_EXPR) { + if (pNode->nodeType == TEXPR_BINARYEXPR_NODE) { tExprNode* pLeft = exprdup(pNode->_node.pLeft); tExprNode* pRight = exprdup(pNode->_node.pRight); pCloned->_node.pLeft = pLeft; pCloned->_node.pRight = pRight; pCloned->_node.optr = pNode->_node.optr; - pCloned->_node.hasPK = pNode->_node.hasPK; - } else if (pNode->nodeType == TSQL_NODE_VALUE) { + } else if (pNode->nodeType == TEXPR_VALUE_NODE) { pCloned->pVal = calloc(1, sizeof(SVariant)); taosVariantAssign(pCloned->pVal, pNode->pVal); - } else if (pNode->nodeType == TSQL_NODE_COL) { + } else if (pNode->nodeType == TEXPR_COL_NODE) { pCloned->pSchema = calloc(1, sizeof(SSchema)); *pCloned->pSchema = *pNode->pSchema; } diff --git a/source/libs/function/src/tfill.c b/source/libs/function/src/tfill.c new file mode 100644 index 0000000000..f26231a732 --- /dev/null +++ b/source/libs/function/src/tfill.c @@ -0,0 +1,524 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "os.h" + +#include "taosdef.h" +#include "taosmsg.h" +#include "ttypes.h" + +#include "tfill.h" +#include "thash.h" +#include "function.h" +#include "common.h" +#include "ttime.h" + +#define FILL_IS_ASC_FILL(_f) ((_f)->order == TSDB_ORDER_ASC) +#define DO_INTERPOLATION(_v1, _v2, _k1, _k2, _k) ((_v1) + ((_v2) - (_v1)) * (((double)(_k)) - ((double)(_k1))) / (((double)(_k2)) - ((double)(_k1)))) +#define GET_FORWARD_DIRECTION_FACTOR(_ord) (((_ord) == TSDB_ORDER_ASC)? 1:-1) + +static void setTagsValue(SFillInfo* pFillInfo, void** data, int32_t genRows) { + for(int32_t j = 0; j < pFillInfo->numOfCols; ++j) { + SFillColInfo* pCol = &pFillInfo->pFillCol[j]; + if (TSDB_COL_IS_NORMAL_COL(pCol->flag) || TSDB_COL_IS_UD_COL(pCol->flag)) { + continue; + } + + char* val1 = elePtrAt(data[j], pCol->col.bytes, genRows); + + assert(pCol->tagIndex >= 0 && pCol->tagIndex < pFillInfo->numOfTags); + SFillTagColInfo* pTag = &pFillInfo->pTags[pCol->tagIndex]; + + assert (pTag->col.colId == pCol->col.colId); + assignVal(val1, pTag->tagVal, pCol->col.bytes, pCol->col.type); + } +} + +static void setNullValueForRow(SFillInfo* pFillInfo, void** data, int32_t numOfCol, int32_t rowIndex) { + // the first are always the timestamp column, so start from the second column. + for (int32_t i = 1; i < numOfCol; ++i) { + SFillColInfo* pCol = &pFillInfo->pFillCol[i]; + + char* output = elePtrAt(data[i], pCol->col.bytes, rowIndex); + setNull(output, pCol->col.type, pCol->col.bytes); + } +} + +static void doFillOneRowResult(SFillInfo* pFillInfo, void** data, char** srcData, int64_t ts, bool outOfBound) { + char* prev = pFillInfo->prevValues; + char* next = pFillInfo->nextValues; + + SPoint point1, point2, point; + int32_t step = GET_FORWARD_DIRECTION_FACTOR(pFillInfo->order); + + // set the primary timestamp column value + int32_t index = pFillInfo->numOfCurrent; + char* val = elePtrAt(data[0], TSDB_KEYSIZE, index); + *(TSKEY*) val = pFillInfo->currentKey; + + // set the other values + if (pFillInfo->type == TSDB_FILL_PREV) { + char* p = FILL_IS_ASC_FILL(pFillInfo) ? prev : next; + + if (p != NULL) { + for (int32_t i = 1; i < pFillInfo->numOfCols; ++i) { + SFillColInfo* pCol = &pFillInfo->pFillCol[i]; + if (TSDB_COL_IS_TAG(pCol->flag)) { + continue; + } + + char* output = elePtrAt(data[i], pCol->col.bytes, index); + assignVal(output, p + pCol->col.offset, pCol->col.bytes, pCol->col.type); + } + } else { // no prev value yet, set the value for NULL + setNullValueForRow(pFillInfo, data, pFillInfo->numOfCols, index); + } + } else if (pFillInfo->type == TSDB_FILL_NEXT) { + char* p = FILL_IS_ASC_FILL(pFillInfo)? next : prev; + + if (p != NULL) { + for (int32_t i = 1; i < pFillInfo->numOfCols; ++i) { + SFillColInfo* pCol = &pFillInfo->pFillCol[i]; + if (TSDB_COL_IS_TAG(pCol->flag)) { + continue; + } + + char* output = elePtrAt(data[i], pCol->col.bytes, index); + assignVal(output, p + pCol->col.offset, pCol->col.bytes, pCol->col.type); + } + } else { // no prev value yet, set the value for NULL + setNullValueForRow(pFillInfo, data, pFillInfo->numOfCols, index); + } + } else if (pFillInfo->type == TSDB_FILL_LINEAR) { + // TODO : linear interpolation supports NULL value + if (prev != NULL && !outOfBound) { + for (int32_t i = 1; i < pFillInfo->numOfCols; ++i) { + SFillColInfo* pCol = &pFillInfo->pFillCol[i]; + if (TSDB_COL_IS_TAG(pCol->flag)) { + continue; + } + + int16_t type = pCol->col.type; + int16_t bytes = pCol->col.bytes; + + char *val1 = elePtrAt(data[i], pCol->col.bytes, index); + if (type == TSDB_DATA_TYPE_BINARY|| type == TSDB_DATA_TYPE_NCHAR || type == TSDB_DATA_TYPE_BOOL) { + setNull(val1, pCol->col.type, bytes); + continue; + } + + point1 = (SPoint){.key = *(TSKEY*)(prev), .val = prev + pCol->col.offset}; + point2 = (SPoint){.key = ts, .val = srcData[i] + pFillInfo->index * bytes}; + point = (SPoint){.key = pFillInfo->currentKey, .val = val1}; + taosGetLinearInterpolationVal(&point, type, &point1, &point2, type); + } + } else { + setNullValueForRow(pFillInfo, data, pFillInfo->numOfCols, index); + } + } else { // fill the default value */ + for (int32_t i = 1; i < pFillInfo->numOfCols; ++i) { + SFillColInfo* pCol = &pFillInfo->pFillCol[i]; + if (TSDB_COL_IS_TAG(pCol->flag)/* || IS_VAR_DATA_TYPE(pCol->col.type)*/) { + continue; + } + + char* val1 = elePtrAt(data[i], pCol->col.bytes, index); + assignVal(val1, (char*)&pCol->fillVal.i, pCol->col.bytes, pCol->col.type); + } + } + + setTagsValue(pFillInfo, data, index); + pFillInfo->currentKey = taosTimeAdd(pFillInfo->currentKey, pFillInfo->interval.sliding * step, pFillInfo->interval.slidingUnit, pFillInfo->precision); + pFillInfo->numOfCurrent++; +} + +static void initBeforeAfterDataBuf(SFillInfo* pFillInfo, char** next) { + if (*next != NULL) { + return; + } + + *next = calloc(1, pFillInfo->rowSize); + for (int i = 1; i < pFillInfo->numOfCols; i++) { + SFillColInfo* pCol = &pFillInfo->pFillCol[i]; + setNull(*next + pCol->col.offset, pCol->col.type, pCol->col.bytes); + } +} + +static void copyCurrentRowIntoBuf(SFillInfo* pFillInfo, char** srcData, char* buf) { + int32_t rowIndex = pFillInfo->index; + for (int32_t i = 0; i < pFillInfo->numOfCols; ++i) { + SFillColInfo* pCol = &pFillInfo->pFillCol[i]; + memcpy(buf + pCol->col.offset, srcData[i] + rowIndex * pCol->col.bytes, pCol->col.bytes); + } +} + +static int32_t fillResultImpl(SFillInfo* pFillInfo, void** data, int32_t outputRows) { + pFillInfo->numOfCurrent = 0; + + char** srcData = pFillInfo->pData; + char** prev = &pFillInfo->prevValues; + char** next = &pFillInfo->nextValues; + + int32_t step = GET_FORWARD_DIRECTION_FACTOR(pFillInfo->order); + + if (FILL_IS_ASC_FILL(pFillInfo)) { + assert(pFillInfo->currentKey >= pFillInfo->start); + } else { + assert(pFillInfo->currentKey <= pFillInfo->start); + } + + while (pFillInfo->numOfCurrent < outputRows) { + int64_t ts = ((int64_t*)pFillInfo->pData[0])[pFillInfo->index]; + + // set the next value for interpolation + if ((pFillInfo->currentKey < ts && FILL_IS_ASC_FILL(pFillInfo)) || + (pFillInfo->currentKey > ts && !FILL_IS_ASC_FILL(pFillInfo))) { + initBeforeAfterDataBuf(pFillInfo, next); + copyCurrentRowIntoBuf(pFillInfo, srcData, *next); + } + + if (((pFillInfo->currentKey < ts && FILL_IS_ASC_FILL(pFillInfo)) || (pFillInfo->currentKey > ts && !FILL_IS_ASC_FILL(pFillInfo))) && + pFillInfo->numOfCurrent < outputRows) { + + // fill the gap between two actual input rows + while (((pFillInfo->currentKey < ts && FILL_IS_ASC_FILL(pFillInfo)) || + (pFillInfo->currentKey > ts && !FILL_IS_ASC_FILL(pFillInfo))) && + pFillInfo->numOfCurrent < outputRows) { + doFillOneRowResult(pFillInfo, data, srcData, ts, false); + } + + // output buffer is full, abort + if (pFillInfo->numOfCurrent == outputRows) { + pFillInfo->numOfTotal += pFillInfo->numOfCurrent; + return outputRows; + } + } else { + assert(pFillInfo->currentKey == ts); + initBeforeAfterDataBuf(pFillInfo, prev); + if (pFillInfo->type == TSDB_FILL_NEXT && (pFillInfo->index + 1) < pFillInfo->numOfRows) { + initBeforeAfterDataBuf(pFillInfo, next); + ++pFillInfo->index; + copyCurrentRowIntoBuf(pFillInfo, srcData, *next); + --pFillInfo->index; + } + + // assign rows to dst buffer + for (int32_t i = 0; i < pFillInfo->numOfCols; ++i) { + SFillColInfo* pCol = &pFillInfo->pFillCol[i]; + if (TSDB_COL_IS_TAG(pCol->flag)/* || IS_VAR_DATA_TYPE(pCol->col.type)*/) { + continue; + } + + char* output = elePtrAt(data[i], pCol->col.bytes, pFillInfo->numOfCurrent); + char* src = elePtrAt(srcData[i], pCol->col.bytes, pFillInfo->index); + + if (i == 0 || (pCol->functionId != FUNCTION_COUNT && !isNull(src, pCol->col.type)) || + (pCol->functionId == FUNCTION_COUNT && GET_INT64_VAL(src) != 0)) { + assignVal(output, src, pCol->col.bytes, pCol->col.type); + memcpy(*prev + pCol->col.offset, src, pCol->col.bytes); + } else { // i > 0 and data is null , do interpolation + if (pFillInfo->type == TSDB_FILL_PREV) { + assignVal(output, *prev + pCol->col.offset, pCol->col.bytes, pCol->col.type); + } else if (pFillInfo->type == TSDB_FILL_LINEAR) { + assignVal(output, src, pCol->col.bytes, pCol->col.type); + memcpy(*prev + pCol->col.offset, src, pCol->col.bytes); + } else if (pFillInfo->type == TSDB_FILL_NEXT) { + if (*next) { + assignVal(output, *next + pCol->col.offset, pCol->col.bytes, pCol->col.type); + } else { + setNull(output, pCol->col.type, pCol->col.bytes); + } + } else { + assignVal(output, (char*)&pCol->fillVal.i, pCol->col.bytes, pCol->col.type); + } + } + } + + // set the tag value for final result + setTagsValue(pFillInfo, data, pFillInfo->numOfCurrent); + + pFillInfo->currentKey = taosTimeAdd(pFillInfo->currentKey, pFillInfo->interval.sliding * step, + pFillInfo->interval.slidingUnit, pFillInfo->precision); + pFillInfo->index += 1; + pFillInfo->numOfCurrent += 1; + } + + if (pFillInfo->index >= pFillInfo->numOfRows || pFillInfo->numOfCurrent >= outputRows) { + /* the raw data block is exhausted, next value does not exists */ + if (pFillInfo->index >= pFillInfo->numOfRows) { + tfree(*next); + } + + pFillInfo->numOfTotal += pFillInfo->numOfCurrent; + return pFillInfo->numOfCurrent; + } + } + + return pFillInfo->numOfCurrent; +} + +static int64_t appendFilledResult(SFillInfo* pFillInfo, void** output, int64_t resultCapacity) { + /* + * These data are generated according to fill strategy, since the current timestamp is out of the time window of + * real result set. Note that we need to keep the direct previous result rows, to generated the filled data. + */ + pFillInfo->numOfCurrent = 0; + while (pFillInfo->numOfCurrent < resultCapacity) { + doFillOneRowResult(pFillInfo, output, pFillInfo->pData, pFillInfo->start, true); + } + + pFillInfo->numOfTotal += pFillInfo->numOfCurrent; + + assert(pFillInfo->numOfCurrent == resultCapacity); + return resultCapacity; +} + +// there are no duplicated tags in the SFillTagColInfo list +static int32_t setTagColumnInfo(SFillInfo* pFillInfo, int32_t numOfCols, int32_t capacity) { + int32_t rowsize = 0; + int32_t numOfTags = 0; + + int32_t k = 0; + for (int32_t i = 0; i < numOfCols; ++i) { + SFillColInfo* pColInfo = &pFillInfo->pFillCol[i]; + pFillInfo->pData[i] = NULL; + + if (TSDB_COL_IS_TAG(pColInfo->flag) || pColInfo->col.type == TSDB_DATA_TYPE_BINARY) { + numOfTags += 1; + + bool exists = false; + int32_t index = -1; + for (int32_t j = 0; j < k; ++j) { + if (pFillInfo->pTags[j].col.colId == pColInfo->col.colId) { + exists = true; + index = j; + break; + } + } + + if (!exists) { + SSchema* pSchema = &pFillInfo->pTags[k].col; + pSchema->colId = pColInfo->col.colId; + pSchema->type = pColInfo->col.type; + pSchema->bytes = pColInfo->col.bytes; + + pFillInfo->pTags[k].tagVal = calloc(1, pColInfo->col.bytes); + pColInfo->tagIndex = k; + + k += 1; + } else { + pColInfo->tagIndex = index; + } + } + + rowsize += pColInfo->col.bytes; + } + + pFillInfo->numOfTags = numOfTags; + + assert(k <= pFillInfo->numOfTags); + return rowsize; +} + +static int32_t taosNumOfRemainRows(SFillInfo* pFillInfo) { + if (pFillInfo->numOfRows == 0 || (pFillInfo->numOfRows > 0 && pFillInfo->index >= pFillInfo->numOfRows)) { + return 0; + } + + return pFillInfo->numOfRows - pFillInfo->index; +} + +SFillInfo* taosCreateFillInfo(int32_t order, TSKEY skey, int32_t numOfTags, int32_t capacity, int32_t numOfCols, + int64_t slidingTime, int8_t slidingUnit, int8_t precision, int32_t fillType, + SFillColInfo* pCol, void* handle) { + if (fillType == TSDB_FILL_NONE) { + return NULL; + } + + SFillInfo* pFillInfo = calloc(1, sizeof(SFillInfo)); + taosResetFillInfo(pFillInfo, skey); + + pFillInfo->order = order; + pFillInfo->type = fillType; + pFillInfo->pFillCol = pCol; + pFillInfo->numOfTags = numOfTags; + pFillInfo->numOfCols = numOfCols; + pFillInfo->precision = precision; + pFillInfo->alloc = capacity; + pFillInfo->handle = handle; + + pFillInfo->interval.interval = slidingTime; + pFillInfo->interval.intervalUnit = slidingUnit; + pFillInfo->interval.sliding = slidingTime; + pFillInfo->interval.slidingUnit = slidingUnit; + + pFillInfo->pData = malloc(POINTER_BYTES * numOfCols); + +// if (numOfTags > 0) { + pFillInfo->pTags = calloc(numOfCols, sizeof(SFillTagColInfo)); + for (int32_t i = 0; i < numOfCols; ++i) { + pFillInfo->pTags[i].col.colId = -2; // TODO + } +// } + + pFillInfo->rowSize = setTagColumnInfo(pFillInfo, pFillInfo->numOfCols, pFillInfo->alloc); + assert(pFillInfo->rowSize > 0); + + return pFillInfo; +} + +void taosResetFillInfo(SFillInfo* pFillInfo, TSKEY startTimestamp) { + pFillInfo->start = startTimestamp; + pFillInfo->currentKey = startTimestamp; + pFillInfo->end = startTimestamp; + pFillInfo->index = -1; + pFillInfo->numOfRows = 0; + pFillInfo->numOfCurrent = 0; + pFillInfo->numOfTotal = 0; +} + +void* taosDestroyFillInfo(SFillInfo* pFillInfo) { + if (pFillInfo == NULL) { + return NULL; + } + + tfree(pFillInfo->prevValues); + tfree(pFillInfo->nextValues); + + for(int32_t i = 0; i < pFillInfo->numOfTags; ++i) { + tfree(pFillInfo->pTags[i].tagVal); + } + + tfree(pFillInfo->pTags); + + tfree(pFillInfo->pData); + tfree(pFillInfo->pFillCol); + + tfree(pFillInfo); + return NULL; +} + +void taosFillSetStartInfo(SFillInfo* pFillInfo, int32_t numOfRows, TSKEY endKey) { + if (pFillInfo->type == TSDB_FILL_NONE) { + return; + } + + pFillInfo->end = endKey; + if (!FILL_IS_ASC_FILL(pFillInfo)) { + pFillInfo->end = taosTimeTruncate(endKey, &pFillInfo->interval, pFillInfo->precision); + } + + pFillInfo->index = 0; + pFillInfo->numOfRows = numOfRows; +} + +void taosFillSetInputDataBlock(SFillInfo* pFillInfo, const SSDataBlock* pInput) { + for (int32_t i = 0; i < pFillInfo->numOfCols; ++i) { + SFillColInfo* pCol = &pFillInfo->pFillCol[i]; + + SColumnInfoData* pColData = taosArrayGet(pInput->pDataBlock, i); + pFillInfo->pData[i] = pColData->pData; + + if (TSDB_COL_IS_TAG(pCol->flag)) { // copy the tag value to tag value buffer + SFillTagColInfo* pTag = &pFillInfo->pTags[pCol->tagIndex]; + assert (pTag->col.colId == pCol->col.colId); + memcpy(pTag->tagVal, pColData->pData, pCol->col.bytes); // TODO not memcpy?? + } + } +} + +bool taosFillHasMoreResults(SFillInfo* pFillInfo) { + int32_t remain = taosNumOfRemainRows(pFillInfo); + if (remain > 0) { + return true; + } + + if (pFillInfo->numOfTotal > 0 && (((pFillInfo->end > pFillInfo->start) && FILL_IS_ASC_FILL(pFillInfo)) || + (pFillInfo->end < pFillInfo->start && !FILL_IS_ASC_FILL(pFillInfo)))) { + return getNumOfResultsAfterFillGap(pFillInfo, pFillInfo->end, 4096) > 0; + } + + return false; +} + +int64_t getNumOfResultsAfterFillGap(SFillInfo* pFillInfo, TSKEY ekey, int32_t maxNumOfRows) { + int64_t* tsList = (int64_t*) pFillInfo->pData[0]; + + int32_t numOfRows = taosNumOfRemainRows(pFillInfo); + + TSKEY ekey1 = ekey; + if (!FILL_IS_ASC_FILL(pFillInfo)) { + pFillInfo->end = taosTimeTruncate(ekey, &pFillInfo->interval, pFillInfo->precision); + } + + int64_t numOfRes = -1; + if (numOfRows > 0) { // still fill gap within current data block, not generating data after the result set. + TSKEY lastKey = tsList[pFillInfo->numOfRows - 1]; + numOfRes = taosTimeCountInterval( + lastKey, + pFillInfo->currentKey, + pFillInfo->interval.sliding, + pFillInfo->interval.slidingUnit, + pFillInfo->precision); + numOfRes += 1; + assert(numOfRes >= numOfRows); + } else { // reach the end of data + if ((ekey1 < pFillInfo->currentKey && FILL_IS_ASC_FILL(pFillInfo)) || + (ekey1 > pFillInfo->currentKey && !FILL_IS_ASC_FILL(pFillInfo))) { + return 0; + } + numOfRes = taosTimeCountInterval( + ekey1, + pFillInfo->currentKey, + pFillInfo->interval.sliding, + pFillInfo->interval.slidingUnit, + pFillInfo->precision); + numOfRes += 1; + } + + return (numOfRes > maxNumOfRows) ? maxNumOfRows : numOfRes; +} + +int32_t taosGetLinearInterpolationVal(SPoint* point, int32_t outputType, SPoint* point1, SPoint* point2, int32_t inputType) { + double v1 = -1, v2 = -1; + GET_TYPED_DATA(v1, double, inputType, point1->val); + GET_TYPED_DATA(v2, double, inputType, point2->val); + + double r = DO_INTERPOLATION(v1, v2, point1->key, point2->key, point->key); + SET_TYPED_DATA(point->val, outputType, r); + + return TSDB_CODE_SUCCESS; +} + +int64_t taosFillResultDataBlock(SFillInfo* pFillInfo, void** output, int32_t capacity) { + int32_t remain = taosNumOfRemainRows(pFillInfo); + + int64_t numOfRes = getNumOfResultsAfterFillGap(pFillInfo, pFillInfo->end, capacity); + assert(numOfRes <= capacity); + + // no data existed for fill operation now, append result according to the fill strategy + if (remain == 0) { + appendFilledResult(pFillInfo, output, numOfRes); + } else { + fillResultImpl(pFillInfo, output, (int32_t) numOfRes); + assert(numOfRes == pFillInfo->numOfCurrent); + } + +// qDebug("fill:%p, generated fill result, src block:%d, index:%d, brange:%"PRId64"-%"PRId64", currentKey:%"PRId64", current:%d, total:%d, %p", +// pFillInfo, pFillInfo->numOfRows, pFillInfo->index, pFillInfo->start, pFillInfo->end, pFillInfo->currentKey, pFillInfo->numOfCurrent, +// pFillInfo->numOfTotal, pFillInfo->handle); + + return numOfRes; +} diff --git a/source/libs/function/src/tfunction.c b/source/libs/function/src/tfunction.c new file mode 100644 index 0000000000..7154c48d2a --- /dev/null +++ b/source/libs/function/src/tfunction.c @@ -0,0 +1,421 @@ +#include "os.h" +#include "tarray.h" +#include "function.h" +#include "thash.h" +#include "taggfunction.h" +#include "tscalarfunction.h" + +static SHashObj* functionHashTable = NULL; + +static void doInitFunctionHashTable() { + int numOfEntries = tListLen(aggFunc); + functionHashTable = taosHashInit(numOfEntries, MurmurHash3_32, false, false); + for (int32_t i = 0; i < numOfEntries; i++) { + int32_t len = (uint32_t)strlen(aggFunc[i].name); + + SAggFunctionInfo* ptr = &aggFunc[i]; + taosHashPut(functionHashTable, aggFunc[i].name, len, (void*)&ptr, POINTER_BYTES); + } + + numOfEntries = tListLen(scalarFunc); + for(int32_t i = 0; i < numOfEntries; ++i) { + int32_t len = (int32_t) strlen(scalarFunc[i].name); + SScalarFunctionInfo* ptr = &scalarFunc[i]; + taosHashPut(functionHashTable, scalarFunc[i].name, len, (void*)&ptr, POINTER_BYTES); + } +} + +static pthread_once_t functionHashTableInit = PTHREAD_ONCE_INIT; + +int32_t qIsBuiltinFunction(const char* name, int32_t len) { + pthread_once(&functionHashTableInit, doInitFunctionHashTable); + + SAggFunctionInfo** pInfo = taosHashGet(functionHashTable, name, len); + if (pInfo != NULL) { + return (*pInfo)->functionId; + } else { + return -1; + } +} + +bool qIsValidUdf(SArray* pUdfInfo, const char* name, int32_t len, int32_t* functionId) { + return true; +} + +const char* qGetFunctionName(int32_t functionId) { + +} + +bool isTagsQuery(SArray* pFunctionIdList) { + int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList); + for (int32_t i = 0; i < num; ++i) { + int16_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i); + + // "select count(tbname)" query +// if (functId == FUNCTION_COUNT && pExpr->base.colInfo.colId == TSDB_TBNAME_COLUMN_INDEX) { +// continue; +// } + + if (f != FUNCTION_TAGPRJ && f != FUNCTION_TID_TAG) { + return false; + } + } + + return true; +} + +//bool tscMultiRoundQuery(SArray* pFunctionIdList, int32_t index) { +// if (!UTIL_TABLE_IS_SUPER_TABLE(pQueryInfo->pTableMetaInfo[index])) { +// return false; +// } +// +// size_t numOfExprs = (int32_t) getNumOfExprs(pQueryInfo); +// for(int32_t i = 0; i < numOfExprs; ++i) { +// SExprInfo* pExpr = getExprInfo(pQueryInfo, i); +// if (pExpr->base.functionId == FUNCTION_STDDEV_DST) { +// return true; +// } +// } +// +// return false; +//} + +bool isBlockInfoQuery(SArray* pFunctionIdList) { + int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList); + for (int32_t i = 0; i < num; ++i) { + int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i); + + if (f == FUNCTION_BLKINFO) { + return true; + } + } + + return false; +} + +bool isProjectionQuery(SArray* pFunctionIdList) { + int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList); + for (int32_t i = 0; i < num; ++i) { + int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i); + if (f == FUNCTION_TS_DUMMY) { + continue; + } + + if (f != FUNCTION_PRJ && f != FUNCTION_TAGPRJ && f != FUNCTION_TAG && + f != FUNCTION_TS && f != FUNCTION_ARITHM && f != FUNCTION_DIFF && + f != FUNCTION_DERIVATIVE) { + return false; + } + } + + return true; +} + +bool isDiffDerivQuery(SArray* pFunctionIdList) { + int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList); + for (int32_t i = 0; i < num; ++i) { + int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i); + if (f == FUNCTION_TS_DUMMY) { + continue; + } + + if (f == FUNCTION_DIFF || f == FUNCTION_DERIVATIVE) { + return true; + } + } + + return false; +} + +bool isPointInterpQuery(SArray* pFunctionIdList) { + int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList); + for (int32_t i = 0; i < num; ++i) { + int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i); + if (f == FUNCTION_TAG || f == FUNCTION_TS) { + continue; + } + + if (f != FUNCTION_INTERP) { + return false; + } + } + + return true; +} + +bool isArithmeticQueryOnAggResult(SArray* pFunctionIdList) { + if (isProjectionQuery(pFunctionIdList)) { + return false; + } + + assert(0); + +// size_t numOfOutput = getNumOfFields(pQueryInfo); +// for(int32_t i = 0; i < numOfOutput; ++i) { +// SExprInfo* pExprInfo = tscFieldInfoGetInternalField(&pQueryInfo->fieldsInfo, i)->pExpr; +// if (pExprInfo->pExpr != NULL) { +// return true; +// } +// } + + return false; +} + +bool isGroupbyColumn(SArray* pFunctionIdList) { +// STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); +// int32_t numOfCols = getNumOfColumns(pTableMetaInfo->pTableMeta); +// +// SGroupbyExpr* pGroupbyExpr = &pQueryInfo->groupbyExpr; +// for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) { +// SColIndex* pIndex = taosArrayGet(pGroupbyExpr->columnInfo, k); +// if (!TSDB_COL_IS_TAG(pIndex->flag) && pIndex->colIndex < numOfCols) { // group by normal columns +// return true; +// } +// } + + return false; +} + +bool isTopBotQuery(SArray* pFunctionIdList) { + int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList); + for (int32_t i = 0; i < num; ++i) { + int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i); + if (f == FUNCTION_TS) { + continue; + } + + if (f == FUNCTION_TOP || f == FUNCTION_BOTTOM) { + return true; + } + } + + return false; +} + +bool isTsCompQuery(SArray* pFunctionIdList) { + int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList); + if (num != 1) { + return false; + } + + int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, 0); + return f == FUNCTION_TS_COMP; +} + +bool isTWAQuery(SArray* pFunctionIdList) { + int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList); + for (int32_t i = 0; i < num; ++i) { + int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i); + if (f == FUNCTION_TWA) { + return true; + } + } + + return false; +} + +bool isIrateQuery(SArray* pFunctionIdList) { + int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList); + for (int32_t i = 0; i < num; ++i) { + int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i); + if (f == FUNCTION_IRATE) { + return true; + } + } + + return false; +} + +bool isStabledev(SArray* pFunctionIdList) { + int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList); + for (int32_t i = 0; i < num; ++i) { + int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i); + if (f == FUNCTION_STDDEV_DST) { + return true; + } + } + + return false; +} + +bool needReverseScan(SArray* pFunctionIdList) { + assert(0); + int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList); + for (int32_t i = 0; i < num; ++i) { + int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i); + if (f == FUNCTION_TS || f == FUNCTION_TS_DUMMY || f == FUNCTION_TAG) { + continue; + } + +// if ((f == FUNCTION_FIRST || f == FUNCTION_FIRST_DST) && pQueryInfo->order.order == TSDB_ORDER_DESC) { +// return true; +// } + + if (f == FUNCTION_LAST || f == FUNCTION_LAST_DST) { + // the scan order to acquire the last result of the specified column +// int32_t order = (int32_t)pExpr->base.param[0].i64; +// if (order != pQueryInfo->order.order) { +// return true; +// } + } + } + + return false; +} + +bool isSimpleAggregateRv(SArray* pFunctionIdList) { + assert(0); + +// if (pQueryInfo->interval.interval > 0 || pQueryInfo->sessionWindow.gap > 0) { +// return false; +// } +// +// if (tscIsDiffDerivQuery(pQueryInfo)) { +// return false; +// } +// +// size_t numOfExprs = getNumOfExprs(pQueryInfo); +// for (int32_t i = 0; i < numOfExprs; ++i) { +// SExprInfo* pExpr = getExprInfo(pQueryInfo, i); +// if (pExpr == NULL) { +// continue; +// } +// +// int32_t functionId = pExpr->base.functionId; +// if (functionId < 0) { +// SUdfInfo* pUdfInfo = taosArrayGet(pQueryInfo->pUdfInfo, -1 * functionId - 1); +// if (pUdfInfo->funcType == TSDB_UDF_TYPE_AGGREGATE) { +// return true; +// } +// +// continue; +// } +// +// if (functionId == FUNCTION_TS || functionId == FUNCTION_TS_DUMMY) { +// continue; +// } +// +// if ((!IS_MULTIOUTPUT(aAggs[functionId].status)) || +// (functionId == FUNCTION_TOP || functionId == FUNCTION_BOTTOM || functionId == FUNCTION_TS_COMP)) { +// return true; +// } +// } + + return false; +} + +bool isBlockDistQuery(SArray* pFunctionIdList) { + int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList); + int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, 0); + return (num == 1 && f == FUNCTION_BLKINFO); +} + +bool isTwoStageSTableQuery(SArray* pFunctionIdList, int32_t tableIndex) { +// if (pQueryInfo == NULL) { +// return false; +// } +// +// STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, tableIndex); +// if (pTableMetaInfo == NULL) { +// return false; +// } +// +// if ((pQueryInfo->type & TSDB_QUERY_TYPE_FREE_RESOURCE) == TSDB_QUERY_TYPE_FREE_RESOURCE) { +// return false; +// } +// +// // for ordered projection query, iterate all qualified vnodes sequentially +// if (tscNonOrderedProjectionQueryOnSTable(pQueryInfo, tableIndex)) { +// return false; +// } +// +// if (!TSDB_QUERY_HAS_TYPE(pQueryInfo->type, TSDB_QUERY_TYPE_STABLE_SUBQUERY) && pQueryInfo->command == TSDB_SQL_SELECT) { +// return UTIL_TABLE_IS_SUPER_TABLE(pTableMetaInfo); +// } + + return false; +} + +bool isProjectionQueryOnSTable(SArray* pFunctionIdList, int32_t tableIndex) { +// STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, tableIndex); +// +// /* +// * In following cases, return false for non ordered project query on super table +// * 1. failed to get tableMeta from server; 2. not a super table; 3. limitation is 0; +// * 4. show queries, instead of a select query +// */ +// size_t numOfExprs = getNumOfExprs(pQueryInfo); +// if (pTableMetaInfo == NULL || !UTIL_TABLE_IS_SUPER_TABLE(pTableMetaInfo) || +// pQueryInfo->command == TSDB_SQL_RETRIEVE_EMPTY_RESULT || numOfExprs == 0) { +// return false; +// } +// +// for (int32_t i = 0; i < numOfExprs; ++i) { +// int32_t functionId = getExprInfo(pQueryInfo, i)->base.functionId; +// +// if (functionId < 0) { +// SUdfInfo* pUdfInfo = taosArrayGet(pQueryInfo->pUdfInfo, -1 * functionId - 1); +// if (pUdfInfo->funcType == TSDB_UDF_TYPE_AGGREGATE) { +// return false; +// } +// +// continue; +// } +// +// if (functionId != FUNCTION_PRJ && +// functionId != FUNCTION_TAGPRJ && +// functionId != FUNCTION_TAG && +// functionId != FUNCTION_TS && +// functionId != FUNCTION_ARITHM && +// functionId != FUNCTION_TS_COMP && +// functionId != FUNCTION_DIFF && +// functionId != FUNCTION_DERIVATIVE && +// functionId != FUNCTION_TS_DUMMY && +// functionId != FUNCTION_TID_TAG) { +// return false; +// } +// } + + return true; +} + +bool hasTagValOutput(SArray* pFunctionIdList) { +// size_t numOfExprs = getNumOfExprs(pQueryInfo); +// SExprInfo* pExpr1 = getExprInfo(pQueryInfo, 0); +// +// if (numOfExprs == 1 && pExpr1->base.functionId == FUNCTION_TS_COMP) { +// return true; +// } +// +// for (int32_t i = 0; i < numOfExprs; ++i) { +// SExprInfo* pExpr = getExprInfo(pQueryInfo, i); +// if (pExpr == NULL) { +// continue; +// } +// +// // ts_comp column required the tag value for join filter +// if (TSDB_COL_IS_TAG(pExpr->base.colInfo.flag)) { +// return true; +// } +// } + + return false; +} + +bool timeWindowInterpoRequired(SArray* pFunctionIdList) { + int32_t num = (int32_t) taosArrayGetSize(pFunctionIdList); + for (int32_t i = 0; i < num; ++i) { + int32_t f = *(int16_t*) taosArrayGet(pFunctionIdList, i); + if (f == FUNCTION_TWA || f == FUNCTION_INTERP) { + return true; + } + } + + return false; +} + +//SQueryAttrInfo setQueryType(SArray* pFunctionIdList) { +// assert(pFunctionIdList != NULL); +// +// +//} \ No newline at end of file diff --git a/source/libs/function/src/thistogram.c b/source/libs/function/src/thistogram.c new file mode 100644 index 0000000000..23238ebab7 --- /dev/null +++ b/source/libs/function/src/thistogram.c @@ -0,0 +1,578 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +#include "os.h" + +#include "thistogram.h" +#include "taosdef.h" +#include "taosmsg.h" +#include "tlosertree.h" + +/** + * + * implement the histogram and percentile_approx based on the paper: + * Yael Ben-Haim, Elad Tom-Tov. A Streaming Parallel Decision Tree Algorithm, + * The Journal of Machine Learning Research.Volume 11, 3/1/2010 pp.849-872 + * https://dl.acm.org/citation.cfm?id=1756034 + * + * @data 2018-12-14 + * @version 0.1 + * + */ +static int32_t histogramCreateBin(SHistogramInfo* pHisto, int32_t index, double val); + +SHistogramInfo* tHistogramCreate(int32_t numOfEntries) { + /* need one redundant slot */ + SHistogramInfo* pHisto = malloc(sizeof(SHistogramInfo) + sizeof(SHistBin) * (numOfEntries + 1)); + +#if !defined(USE_ARRAYLIST) + pHisto->pList = SSkipListCreate(MAX_SKIP_LIST_LEVEL, TSDB_DATA_TYPE_DOUBLE, sizeof(double)); + SInsertSupporter* pss = malloc(sizeof(SInsertSupporter)); + pss->numOfEntries = pHisto->maxEntries; + pss->pSkipList = pHisto->pList; + + int32_t ret = tLoserTreeCreate1(&pHisto->pLoserTree, numOfEntries, pss, compare); + pss->pTree = pHisto->pLoserTree; +#endif + + return tHistogramCreateFrom(pHisto, numOfEntries); +} + +SHistogramInfo* tHistogramCreateFrom(void* pBuf, int32_t numOfBins) { + memset(pBuf, 0, sizeof(SHistogramInfo) + sizeof(SHistBin) * (numOfBins + 1)); + + SHistogramInfo* pHisto = (SHistogramInfo*)pBuf; + pHisto->elems = (SHistBin*)((char*)pBuf + sizeof(SHistogramInfo)); + for(int32_t i = 0; i < numOfBins; ++i) { + pHisto->elems[i].val = -DBL_MAX; + } + + pHisto->maxEntries = numOfBins; + + pHisto->min = DBL_MAX; + pHisto->max = -DBL_MAX; + + return pBuf; +} + +int32_t tHistogramAdd(SHistogramInfo** pHisto, double val) { + if (*pHisto == NULL) { + *pHisto = tHistogramCreate(MAX_HISTOGRAM_BIN); + } + +#if defined(USE_ARRAYLIST) + int32_t idx = histoBinarySearch((*pHisto)->elems, (*pHisto)->numOfEntries, val); + assert(idx >= 0 && idx <= (*pHisto)->maxEntries && (*pHisto)->elems != NULL); + + if ((*pHisto)->elems[idx].val == val && idx >= 0) { + (*pHisto)->elems[idx].num += 1; + + if ((*pHisto)->numOfEntries == 0) { + (*pHisto)->numOfEntries += 1; + } + } else { /* insert a new slot */ + if ((*pHisto)->numOfElems >= 1 && idx < (*pHisto)->numOfEntries) { + if (idx > 0) { + assert((*pHisto)->elems[idx - 1].val <= val); + } else { + assert((*pHisto)->elems[idx].val > val); + } + } else if ((*pHisto)->numOfElems > 0) { + assert((*pHisto)->elems[(*pHisto)->numOfEntries].val <= val); + } + + histogramCreateBin(*pHisto, idx, val); + } +#else + tSkipListKey key = tSkipListCreateKey(TSDB_DATA_TYPE_DOUBLE, &val, tDataTypes[TSDB_DATA_TYPE_DOUBLE].nSize); + SHistBin* entry = calloc(1, sizeof(SHistBin)); + entry->val = val; + + tSkipListNode* pResNode = SSkipListPut((*pHisto)->pList, entry, &key, 0); + + SHistBin* pEntry1 = (SHistBin*)pResNode->pData; + pEntry1->index = -1; + + tSkipListNode* pLast = NULL; + + if (pEntry1->num == 0) { /* it is a new node */ + (*pHisto)->numOfEntries += 1; + pEntry1->num += 1; + + /* number of entries reaches the upper limitation */ + if (pResNode->pForward[0] != NULL) { + /* we need to update the last updated slot in loser tree*/ + pEntry1->delta = ((SHistBin*)pResNode->pForward[0]->pData)->val - val; + + if ((*pHisto)->ordered) { + int32_t lastIndex = (*pHisto)->maxIndex; + SLoserTreeInfo* pTree = (*pHisto)->pLoserTree; + + (*pHisto)->pLoserTree->pNode[lastIndex + pTree->numOfEntries].pData = pResNode; + pEntry1->index = (*pHisto)->pLoserTree->pNode[lastIndex + pTree->numOfEntries].index; + + // update the loser tree + if ((*pHisto)->ordered) { + tLoserTreeAdjust(pTree, pEntry1->index + pTree->numOfEntries); + } + + tSkipListKey kx = + tSkipListCreateKey(TSDB_DATA_TYPE_DOUBLE, &(*pHisto)->max, tDataTypes[TSDB_DATA_TYPE_DOUBLE].nSize); + pLast = tSkipListGetOne((*pHisto)->pList, &kx); + } + } else { + /* this node located at the last position of the skiplist, we do not + * update the loser-tree */ + pEntry1->delta = DBL_MAX; + pLast = pResNode; + } + + if (pResNode->pBackward[0] != &(*pHisto)->pList->pHead) { + SHistBin* pPrevEntry = (SHistBin*)pResNode->pBackward[0]->pData; + pPrevEntry->delta = val - pPrevEntry->val; + + SLoserTreeInfo* pTree = (*pHisto)->pLoserTree; + if ((*pHisto)->ordered) { + tLoserTreeAdjust(pTree, pPrevEntry->index + pTree->numOfEntries); + tLoserTreeDisplay(pTree); + } + } + + if ((*pHisto)->numOfEntries >= (*pHisto)->maxEntries + 1) { + // set the right value for loser-tree + assert((*pHisto)->pLoserTree != NULL); + if (!(*pHisto)->ordered) { + SSkipListPrint((*pHisto)->pList, 1); + + SLoserTreeInfo* pTree = (*pHisto)->pLoserTree; + tSkipListNode* pHead = (*pHisto)->pList->pHead.pForward[0]; + + tSkipListNode* p1 = pHead; + + printf("\n"); + while (p1 != NULL) { + printf("%f\t", ((SHistBin*)(p1->pData))->delta); + p1 = p1->pForward[0]; + } + printf("\n"); + + /* last one in skiplist is ignored */ + for (int32_t i = pTree->numOfEntries; i < pTree->totalEntries; ++i) { + pTree->pNode[i].pData = pHead; + pTree->pNode[i].index = i - pTree->numOfEntries; + SHistBin* pBin = (SHistBin*)pHead->pData; + pBin->index = pTree->pNode[i].index; + + pHead = pHead->pForward[0]; + } + + pLast = pHead; + + for (int32_t i = 0; i < pTree->numOfEntries; ++i) { + pTree->pNode[i].index = -1; + } + + tLoserTreeDisplay(pTree); + + for (int32_t i = pTree->totalEntries - 1; i >= pTree->numOfEntries; i--) { + tLoserTreeAdjust(pTree, i); + } + + tLoserTreeDisplay(pTree); + (*pHisto)->ordered = true; + } + + printf("delta is:%lf\n", pEntry1->delta); + + SSkipListPrint((*pHisto)->pList, 1); + + /* the chosen node */ + tSkipListNode* pNode = (*pHisto)->pLoserTree->pNode[0].pData; + SHistBin* pEntry = (SHistBin*)pNode->pData; + + tSkipListNode* pNext = pNode->pForward[0]; + SHistBin* pNextEntry = (SHistBin*)pNext->pData; + assert(pNextEntry->val - pEntry->val == pEntry->delta); + + double newVal = (pEntry->val * pEntry->num + pNextEntry->val * pNextEntry->num) / (pEntry->num + pNextEntry->num); + pEntry->val = newVal; + pNode->key.dKey = newVal; + pEntry->num = pEntry->num + pNextEntry->num; + + // update delta value in current node + pEntry->delta = (pNextEntry->delta + pNextEntry->val) - pEntry->val; + + // reset delta value in the previous node + SHistBin* pPrevEntry = (SHistBin*)pNode->pBackward[0]->pData; + if (pPrevEntry) { + pPrevEntry->delta = pEntry->val - pPrevEntry->val; + } + + SLoserTreeInfo* pTree = (*pHisto)->pLoserTree; + if (pNextEntry->index != -1) { + (*pHisto)->maxIndex = pNextEntry->index; + + // set the last element in skiplist, of which delta is FLT_MAX; + pTree->pNode[pNextEntry->index + pTree->numOfEntries].pData = pLast; + ((SHistBin*)pLast->pData)->index = pNextEntry->index; + int32_t f = pTree->pNode[pNextEntry->index + pTree->numOfEntries].index; + printf("disappear index is:%d\n", f); + } + + tLoserTreeAdjust(pTree, pEntry->index + pTree->numOfEntries); + // remove the next node in skiplist + tSkipListRemoveNode((*pHisto)->pList, pNext); + SSkipListPrint((*pHisto)->pList, 1); + + tLoserTreeDisplay((*pHisto)->pLoserTree); + } else { // add to heap + if (pResNode->pForward[0] != NULL) { + pEntry1->delta = ((SHistBin*)pResNode->pForward[0]->pData)->val - val; + } else { + pEntry1->delta = DBL_MAX; + } + + if (pResNode->pBackward[0] != &(*pHisto)->pList->pHead) { + SHistBin* pPrevEntry = (SHistBin*)pResNode->pBackward[0]->pData; + pEntry1->delta = val - pPrevEntry->val; + } + + printf("delta is:%9lf\n", pEntry1->delta); + } + + } else { + SHistBin* pEntry = (SHistBin*)pResNode->pData; + assert(pEntry->val == val); + pEntry->num += 1; + } + +#endif + if (val > (*pHisto)->max) { + (*pHisto)->max = val; + } + + if (val < (*pHisto)->min) { + (*pHisto)->min = val; + } + + (*pHisto)->numOfElems += 1; + return 0; +} + +int32_t histoBinarySearch(SHistBin* pEntry, int32_t len, double val) { + int32_t end = len - 1; + int32_t start = 0; + + while (start <= end) { + int32_t mid = (end - start) / 2 + start; + if (pEntry[mid].val == val) { + return mid; + } + + if (pEntry[mid].val < val) { + start = mid + 1; + } else { + end = mid - 1; + } + } + + int32_t ret = start > end ? start : end; + if (ret < 0) { + return 0; + } else { + return ret; + } +} + +static void histogramMergeImpl(SHistBin* pHistBin, int32_t* size) { +#if defined(USE_ARRAYLIST) + int32_t oldSize = *size; + + double delta = DBL_MAX; + int32_t index = -1; + for (int32_t i = 1; i < oldSize; ++i) { + double d = pHistBin[i].val - pHistBin[i - 1].val; + if (d < delta) { + delta = d; + index = i - 1; + } + } + + SHistBin* s1 = &pHistBin[index]; + SHistBin* s2 = &pHistBin[index + 1]; + + double newVal = (s1->val * s1->num + s2->val * s2->num) / (s1->num + s2->num); + s1->val = newVal; + s1->num = s1->num + s2->num; + + memmove(&pHistBin[index + 1], &pHistBin[index + 2], (oldSize - index - 2) * sizeof(SHistBin)); + (*size) -= 1; +#endif +} + +/* optimize this procedure */ +int32_t histogramCreateBin(SHistogramInfo* pHisto, int32_t index, double val) { +#if defined(USE_ARRAYLIST) + int32_t remain = pHisto->numOfEntries - index; + if (remain > 0) { + memmove(&pHisto->elems[index + 1], &pHisto->elems[index], sizeof(SHistBin) * remain); + } + + assert(index >= 0 && index <= pHisto->maxEntries); + + pHisto->elems[index].num = 1; + pHisto->elems[index].val = val; + pHisto->numOfEntries += 1; + + /* we need to merge the slot */ + if (pHisto->numOfEntries == pHisto->maxEntries + 1) { + histogramMergeImpl(pHisto->elems, &pHisto->numOfEntries); + + pHisto->elems[pHisto->maxEntries].val = 0; + pHisto->elems[pHisto->maxEntries].num = 0; + } +#endif + assert(pHisto->numOfEntries <= pHisto->maxEntries); + return 0; +} + +void tHistogramDestroy(SHistogramInfo** pHisto) { + if (*pHisto == NULL) { + return; + } + + free(*pHisto); + *pHisto = NULL; +} + +void tHistogramPrint(SHistogramInfo* pHisto) { + printf("total entries: %d, elements: %"PRId64 "\n", pHisto->numOfEntries, pHisto->numOfElems); +#if defined(USE_ARRAYLIST) + for (int32_t i = 0; i < pHisto->numOfEntries; ++i) { + printf("%d: (%f, %" PRId64 ")\n", i + 1, pHisto->elems[i].val, pHisto->elems[i].num); + } +#else + tSkipListNode* pNode = pHisto->pList->pHead.pForward[0]; + + for (int32_t i = 0; i < pHisto->numOfEntries; ++i) { + SHistBin* pEntry = (SHistBin*)pNode->pData; + printf("%d: (%f, %" PRId64 ")\n", i + 1, pEntry->val, pEntry->num); + pNode = pNode->pForward[0]; + } +#endif +} + +/** + * Estimated number of points in the interval (−inf,b]. + * @param pHisto + * @param v + */ +int64_t tHistogramSum(SHistogramInfo* pHisto, double v) { +#if defined(USE_ARRAYLIST) + int32_t slotIdx = histoBinarySearch(pHisto->elems, pHisto->numOfEntries, v); + if (pHisto->elems[slotIdx].val != v) { + slotIdx -= 1; + + if (slotIdx < 0) { + slotIdx = 0; + assert(v <= pHisto->elems[slotIdx].val); + } else { + assert(v >= pHisto->elems[slotIdx].val); + + if (slotIdx + 1 < pHisto->numOfEntries) { + assert(v < pHisto->elems[slotIdx + 1].val); + } + } + } + + double m1 = (double)pHisto->elems[slotIdx].num; + double v1 = pHisto->elems[slotIdx].val; + + double m2 = (double)pHisto->elems[slotIdx + 1].num; + double v2 = pHisto->elems[slotIdx + 1].val; + + double estNum = m1 + (m2 - m1) * (v - v1) / (v2 - v1); + double s1 = (m1 + estNum) * (v - v1) / (2 * (v2 - v1)); + + for (int32_t i = 0; i < slotIdx; ++i) { + s1 += pHisto->elems[i].num; + } + + s1 = s1 + m1 / 2; + + return (int64_t)s1; +#endif +} + +double* tHistogramUniform(SHistogramInfo* pHisto, double* ratio, int32_t num) { +#if defined(USE_ARRAYLIST) + double* pVal = malloc(num * sizeof(double)); + + for (int32_t i = 0; i < num; ++i) { + double numOfElem = (ratio[i] / 100) * pHisto->numOfElems; + + if (numOfElem == 0) { + pVal[i] = pHisto->min; + continue; + } else if (numOfElem <= pHisto->elems[0].num) { + pVal[i] = pHisto->elems[0].val; + continue; + } else if (numOfElem == pHisto->numOfElems) { + pVal[i] = pHisto->max; + continue; + } + + int32_t j = 0; + int64_t total = 0; + + while (j < pHisto->numOfEntries) { + total += pHisto->elems[j].num; + if (total <= numOfElem && total + pHisto->elems[j + 1].num > numOfElem) { + break; + } + + j += 1; + } + + assert(total <= numOfElem && total + pHisto->elems[j + 1].num > numOfElem); + + double delta = numOfElem - total; + if (fabs(delta) < FLT_EPSILON) { + pVal[i] = pHisto->elems[j].val; + } + + double start = (double)pHisto->elems[j].num; + double range = pHisto->elems[j + 1].num - start; + + if (range == 0) { + pVal[i] = (pHisto->elems[j + 1].val - pHisto->elems[j].val) * delta / start + pHisto->elems[j].val; + } else { + double factor = (-2 * start + sqrt(4 * start * start - 4 * range * (-2 * delta))) / (2 * range); + pVal[i] = pHisto->elems[j].val + (pHisto->elems[j + 1].val - pHisto->elems[j].val) * factor; + } + } +#else + double* pVal = malloc(num * sizeof(double)); + + for (int32_t i = 0; i < num; ++i) { + double numOfElem = ratio[i] * pHisto->numOfElems; + + tSkipListNode* pFirst = pHisto->pList->pHead.pForward[0]; + SHistBin* pEntry = (SHistBin*)pFirst->pData; + if (numOfElem == 0) { + pVal[i] = pHisto->min; + printf("i/numofSlot: %f, v:%f, %f\n", ratio[i], numOfElem, pVal[i]); + continue; + } else if (numOfElem <= pEntry->num) { + pVal[i] = pEntry->val; + printf("i/numofSlot: %f, v:%f, %f\n", ratio[i], numOfElem, pVal[i]); + continue; + } else if (numOfElem == pHisto->numOfElems) { + pVal[i] = pHisto->max; + printf("i/numofSlot: %f, v:%f, %f\n", ratio[i], numOfElem, pVal[i]); + continue; + } + + int32_t j = 0; + int64_t total = 0; + SHistBin* pPrev = pEntry; + + while (j < pHisto->numOfEntries) { + if (total <= numOfElem && total + pEntry->num > numOfElem) { + break; + } + + total += pEntry->num; + pPrev = pEntry; + + pFirst = pFirst->pForward[0]; + pEntry = (SHistBin*)pFirst->pData; + + j += 1; + } + + assert(total <= numOfElem && total + pEntry->num > numOfElem); + + double delta = numOfElem - total; + if (fabs(delta) < FLT_EPSILON) { + // printf("i/numofSlot: %f, v:%f, %f\n", + // (double)i/numOfSlots, numOfElem, pHisto->elems[j].val); + pVal[i] = pPrev->val; + } + + double start = pPrev->num; + double range = pEntry->num - start; + + if (range == 0) { + pVal[i] = (pEntry->val - pPrev->val) * delta / start + pPrev->val; + } else { + double factor = (-2 * start + sqrt(4 * start * start - 4 * range * (-2 * delta))) / (2 * range); + pVal[i] = pPrev->val + (pEntry->val - pPrev->val) * factor; + } + // printf("i/numofSlot: %f, v:%f, %f\n", (double)i/numOfSlots, + // numOfElem, val); + } +#endif + return pVal; +} + +SHistogramInfo* tHistogramMerge(SHistogramInfo* pHisto1, SHistogramInfo* pHisto2, int32_t numOfEntries) { + SHistogramInfo* pResHistogram = tHistogramCreate(numOfEntries); + + // error in histogram info + if (pHisto1->numOfEntries > MAX_HISTOGRAM_BIN || pHisto2->numOfEntries > MAX_HISTOGRAM_BIN) { + return pResHistogram; + } + + SHistBin* pHistoBins = calloc(1, sizeof(SHistBin) * (pHisto1->numOfEntries + pHisto2->numOfEntries)); + int32_t i = 0, j = 0, k = 0; + + while (i < pHisto1->numOfEntries && j < pHisto2->numOfEntries) { + if (pHisto1->elems[i].val < pHisto2->elems[j].val) { + pHistoBins[k++] = pHisto1->elems[i++]; + } else if (pHisto1->elems[i].val > pHisto2->elems[j].val) { + pHistoBins[k++] = pHisto2->elems[j++]; + } else { + pHistoBins[k] = pHisto1->elems[i++]; + pHistoBins[k++].num += pHisto2->elems[j++].num; + } + } + + if (i < pHisto1->numOfEntries) { + int32_t remain = pHisto1->numOfEntries - i; + memcpy(&pHistoBins[k], &pHisto1->elems[i], sizeof(SHistBin) * remain); + k += remain; + } + + if (j < pHisto2->numOfEntries) { + int32_t remain = pHisto2->numOfEntries - j; + memcpy(&pHistoBins[k], &pHisto2->elems[j], sizeof(SHistBin) * remain); + k += remain; + } + + /* update other information */ + pResHistogram->numOfElems = pHisto1->numOfElems + pHisto2->numOfElems; + pResHistogram->min = (pHisto1->min < pHisto2->min) ? pHisto1->min : pHisto2->min; + pResHistogram->max = (pHisto1->max > pHisto2->max) ? pHisto1->max : pHisto2->max; + + while (k > numOfEntries) { + histogramMergeImpl(pHistoBins, &k); + } + + pResHistogram->numOfEntries = k; + memcpy(pResHistogram->elems, pHistoBins, sizeof(SHistBin) * k); + + free(pHistoBins); + return pResHistogram; +} diff --git a/source/libs/function/src/tpercentile.c b/source/libs/function/src/tpercentile.c new file mode 100644 index 0000000000..5d8876fee1 --- /dev/null +++ b/source/libs/function/src/tpercentile.c @@ -0,0 +1,535 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +#include +#include "os.h" + +#include "tpercentile.h" +#include "tpagedfile.h" +#include "taosdef.h" +#include "tcompare.h" +#include "ttypes.h" + +#define DEFAULT_NUM_OF_SLOT 1024 + +int32_t getGroupId(int32_t numOfSlots, int32_t slotIndex, int32_t times) { + return (times * numOfSlots) + slotIndex; +} + +static SFilePage *loadDataFromFilePage(tMemBucket *pMemBucket, int32_t slotIdx) { + SFilePage *buffer = (SFilePage *)calloc(1, pMemBucket->bytes * pMemBucket->pSlots[slotIdx].info.size + sizeof(SFilePage)); + + int32_t groupId = getGroupId(pMemBucket->numOfSlots, slotIdx, pMemBucket->times); + SIDList list = getDataBufPagesIdList(pMemBucket->pBuffer, groupId); + + int32_t offset = 0; + for(int32_t i = 0; i < list->size; ++i) { + SPageInfo* pgInfo = *(SPageInfo**) taosArrayGet(list, i); + + SFilePage* pg = getResBufPage(pMemBucket->pBuffer, pgInfo->pageId); + memcpy(buffer->data + offset, pg->data, (size_t)(pg->num * pMemBucket->bytes)); + + offset += (int32_t)(pg->num * pMemBucket->bytes); + } + + qsort(buffer->data, pMemBucket->pSlots[slotIdx].info.size, pMemBucket->bytes, pMemBucket->comparFn); + return buffer; +} + +static void resetBoundingBox(MinMaxEntry* range, int32_t type) { + if (IS_SIGNED_NUMERIC_TYPE(type)) { + range->i64MaxVal = INT64_MIN; + range->i64MinVal = INT64_MAX; + } else if (IS_UNSIGNED_NUMERIC_TYPE(type)) { + range->u64MaxVal = 0; + range->u64MinVal = UINT64_MAX; + } else { + range->dMaxVal = -DBL_MAX; + range->dMinVal = DBL_MAX; + } +} + +static int32_t setBoundingBox(MinMaxEntry* range, int16_t type, double minval, double maxval) { + if (minval > maxval) { + return -1; + } + + if (IS_SIGNED_NUMERIC_TYPE(type)) { + range->i64MinVal = (int64_t) minval; + range->i64MaxVal = (int64_t) maxval; + } else if (IS_UNSIGNED_NUMERIC_TYPE(type)){ + range->u64MinVal = (uint64_t) minval; + range->u64MaxVal = (uint64_t) maxval; + } else { + range->dMinVal = minval; + range->dMaxVal = maxval; + } + + return 0; +} + +static void resetPosInfo(SSlotInfo* pInfo) { + pInfo->size = 0; + pInfo->pageId = -1; + pInfo->data = NULL; +} + +double findOnlyResult(tMemBucket *pMemBucket) { + assert(pMemBucket->total == 1); + + for (int32_t i = 0; i < pMemBucket->numOfSlots; ++i) { + tMemBucketSlot *pSlot = &pMemBucket->pSlots[i]; + if (pSlot->info.size == 0) { + continue; + } + + int32_t groupId = getGroupId(pMemBucket->numOfSlots, i, pMemBucket->times); + SIDList list = getDataBufPagesIdList(pMemBucket->pBuffer, groupId); + assert(list->size == 1); + + SPageInfo* pgInfo = (SPageInfo*) taosArrayGetP(list, 0); + SFilePage* pPage = getResBufPage(pMemBucket->pBuffer, pgInfo->pageId); + assert(pPage->num == 1); + + double v = 0; + GET_TYPED_DATA(v, double, pMemBucket->type, pPage->data); + return v; + } + + return 0; +} + +int32_t tBucketIntHash(tMemBucket *pBucket, const void *value) { + int64_t v = 0; + GET_TYPED_DATA(v, int64_t, pBucket->type, value); + + int32_t index = -1; + + if (v > pBucket->range.i64MaxVal || v < pBucket->range.i64MinVal) { + return index; + } + + // divide the value range into 1024 buckets + uint64_t span = pBucket->range.i64MaxVal - pBucket->range.i64MinVal; + if (span < pBucket->numOfSlots) { + int64_t delta = v - pBucket->range.i64MinVal; + index = (delta % pBucket->numOfSlots); + } else { + double slotSpan = (double)span / pBucket->numOfSlots; + index = (int32_t)((v - pBucket->range.i64MinVal) / slotSpan); + if (v == pBucket->range.i64MaxVal) { + index -= 1; + } + } + + assert(index >= 0 && index < pBucket->numOfSlots); + return index; +} + +int32_t tBucketUintHash(tMemBucket *pBucket, const void *value) { + int64_t v = 0; + GET_TYPED_DATA(v, uint64_t, pBucket->type, value); + + int32_t index = -1; + + if (v > pBucket->range.u64MaxVal || v < pBucket->range.u64MinVal) { + return index; + } + + // divide the value range into 1024 buckets + uint64_t span = pBucket->range.u64MaxVal - pBucket->range.u64MinVal; + if (span < pBucket->numOfSlots) { + int64_t delta = v - pBucket->range.u64MinVal; + index = (int32_t) (delta % pBucket->numOfSlots); + } else { + double slotSpan = (double)span / pBucket->numOfSlots; + index = (int32_t)((v - pBucket->range.u64MinVal) / slotSpan); + if (v == pBucket->range.u64MaxVal) { + index -= 1; + } + } + + assert(index >= 0 && index < pBucket->numOfSlots); + return index; +} + +int32_t tBucketDoubleHash(tMemBucket *pBucket, const void *value) { + double v = 0; + if (pBucket->type == TSDB_DATA_TYPE_FLOAT) { + v = GET_FLOAT_VAL(value); + } else { + v = GET_DOUBLE_VAL(value); + } + + int32_t index = -1; + + if (v > pBucket->range.dMaxVal || v < pBucket->range.dMinVal) { + return index; + } + + // divide a range of [dMinVal, dMaxVal] into 1024 buckets + double span = pBucket->range.dMaxVal - pBucket->range.dMinVal; + if (span < pBucket->numOfSlots) { + int32_t delta = (int32_t)(v - pBucket->range.dMinVal); + index = (delta % pBucket->numOfSlots); + } else { + double slotSpan = span / pBucket->numOfSlots; + index = (int32_t)((v - pBucket->range.dMinVal) / slotSpan); + if (v == pBucket->range.dMaxVal) { + index -= 1; + } + } + + assert(index >= 0 && index < pBucket->numOfSlots); + return index; +} + +static __perc_hash_func_t getHashFunc(int32_t type) { + if (IS_SIGNED_NUMERIC_TYPE(type)) { + return tBucketIntHash; + } else if (IS_UNSIGNED_NUMERIC_TYPE(type)) { + return tBucketUintHash; + } else { + return tBucketDoubleHash; + } +} + +static void resetSlotInfo(tMemBucket* pBucket) { + for (int32_t i = 0; i < pBucket->numOfSlots; ++i) { + tMemBucketSlot* pSlot = &pBucket->pSlots[i]; + + resetBoundingBox(&pSlot->range, pBucket->type); + resetPosInfo(&pSlot->info); + } +} + +tMemBucket *tMemBucketCreate(int16_t nElemSize, int16_t dataType, double minval, double maxval) { + tMemBucket *pBucket = (tMemBucket *)calloc(1, sizeof(tMemBucket)); + if (pBucket == NULL) { + return NULL; + } + + pBucket->numOfSlots = DEFAULT_NUM_OF_SLOT; + pBucket->bufPageSize = DEFAULT_PAGE_SIZE * 4; // 4k per page + + pBucket->type = dataType; + pBucket->bytes = nElemSize; + pBucket->total = 0; + pBucket->times = 1; + + pBucket->maxCapacity = 200000; + + if (setBoundingBox(&pBucket->range, pBucket->type, minval, maxval) != 0) { +// qError("MemBucket:%p, invalid value range: %f-%f", pBucket, minval, maxval); + free(pBucket); + return NULL; + } + + pBucket->elemPerPage = (pBucket->bufPageSize - sizeof(SFilePage))/pBucket->bytes; + pBucket->comparFn = getKeyComparFunc(pBucket->type, TSDB_ORDER_ASC); + + pBucket->hashFunc = getHashFunc(pBucket->type); + if (pBucket->hashFunc == NULL) { +// qError("MemBucket:%p, not support data type %d, failed", pBucket, pBucket->type); + free(pBucket); + return NULL; + } + + pBucket->pSlots = (tMemBucketSlot *)calloc(pBucket->numOfSlots, sizeof(tMemBucketSlot)); + if (pBucket->pSlots == NULL) { + free(pBucket); + return NULL; + } + + resetSlotInfo(pBucket); + + int32_t ret = createDiskbasedResultBuffer(&pBucket->pBuffer, pBucket->bufPageSize, pBucket->bufPageSize * 512, 1, tsTempDir); + if (ret != 0) { + tMemBucketDestroy(pBucket); + return NULL; + } + +// qDebug("MemBucket:%p, elem size:%d", pBucket, pBucket->bytes); + return pBucket; +} + +void tMemBucketDestroy(tMemBucket *pBucket) { + if (pBucket == NULL) { + return; + } + + destroyResultBuf(pBucket->pBuffer); + tfree(pBucket->pSlots); + tfree(pBucket); +} + +void tMemBucketUpdateBoundingBox(MinMaxEntry *r, const char *data, int32_t dataType) { + if (IS_SIGNED_NUMERIC_TYPE(dataType)) { + int64_t v = 0; + GET_TYPED_DATA(v, int64_t, dataType, data); + + if (r->i64MinVal > v) { + r->i64MinVal = v; + } + + if (r->i64MaxVal < v) { + r->i64MaxVal = v; + } + } else if (IS_UNSIGNED_NUMERIC_TYPE(dataType)) { + uint64_t v = 0; + GET_TYPED_DATA(v, uint64_t, dataType, data); + + if (r->i64MinVal > v) { + r->i64MinVal = v; + } + + if (r->i64MaxVal < v) { + r->i64MaxVal = v; + } + } else if (IS_FLOAT_TYPE(dataType)) { + double v = 0; + GET_TYPED_DATA(v, double, dataType, data); + + if (r->dMinVal > v) { + r->dMinVal = v; + } + + if (r->dMaxVal < v) { + r->dMaxVal = v; + } + } else { + assert(0); + } +} + +/* + * in memory bucket, we only accept data array list + */ +int32_t tMemBucketPut(tMemBucket *pBucket, const void *data, size_t size) { + assert(pBucket != NULL && data != NULL && size > 0); + + int32_t count = 0; + int32_t bytes = pBucket->bytes; + for (int32_t i = 0; i < size; ++i) { + char *d = (char *) data + i * bytes; + + int32_t index = (pBucket->hashFunc)(pBucket, d); + if (index < 0) { + continue; + } + + count += 1; + + tMemBucketSlot *pSlot = &pBucket->pSlots[index]; + tMemBucketUpdateBoundingBox(&pSlot->range, d, pBucket->type); + + // ensure available memory pages to allocate + int32_t groupId = getGroupId(pBucket->numOfSlots, index, pBucket->times); + int32_t pageId = -1; + + if (pSlot->info.data == NULL || pSlot->info.data->num >= pBucket->elemPerPage) { + if (pSlot->info.data != NULL) { + assert(pSlot->info.data->num >= pBucket->elemPerPage && pSlot->info.size > 0); + + // keep the pointer in memory + releaseResBufPage(pBucket->pBuffer, pSlot->info.data); + pSlot->info.data = NULL; + } + + pSlot->info.data = getNewDataBuf(pBucket->pBuffer, groupId, &pageId); + pSlot->info.pageId = pageId; + } + + memcpy(pSlot->info.data->data + pSlot->info.data->num * pBucket->bytes, d, pBucket->bytes); + + pSlot->info.data->num += 1; + pSlot->info.size += 1; + } + + pBucket->total += count; + return 0; +} + +//////////////////////////////////////////////////////////////////////////////////////////// +/* + * + * now, we need to find the minimum value of the next slot for + * interpolating the percentile value + * j is the last slot of current segment, we need to get the first + * slot of the next segment. + */ +static MinMaxEntry getMinMaxEntryOfNextSlotWithData(tMemBucket *pMemBucket, int32_t slotIdx) { + int32_t j = slotIdx + 1; + while (j < pMemBucket->numOfSlots && (pMemBucket->pSlots[j].info.size == 0)) { + ++j; + } + + assert(j < pMemBucket->numOfSlots); + return pMemBucket->pSlots[j].range; +} + +static bool isIdenticalData(tMemBucket *pMemBucket, int32_t index); + +static double getIdenticalDataVal(tMemBucket* pMemBucket, int32_t slotIndex) { + assert(isIdenticalData(pMemBucket, slotIndex)); + + tMemBucketSlot *pSlot = &pMemBucket->pSlots[slotIndex]; + + double finalResult = 0.0; + if (IS_SIGNED_NUMERIC_TYPE(pMemBucket->type)) { + finalResult = (double) pSlot->range.i64MinVal; + } else if (IS_UNSIGNED_NUMERIC_TYPE(pMemBucket->type)) { + finalResult = (double) pSlot->range.u64MinVal; + } else { + finalResult = (double) pSlot->range.dMinVal; + } + + return finalResult; +} + +double getPercentileImpl(tMemBucket *pMemBucket, int32_t count, double fraction) { + int32_t num = 0; + + for (int32_t i = 0; i < pMemBucket->numOfSlots; ++i) { + tMemBucketSlot *pSlot = &pMemBucket->pSlots[i]; + if (pSlot->info.size == 0) { + continue; + } + + // required value in current slot + if (num < (count + 1) && num + pSlot->info.size >= (count + 1)) { + if (pSlot->info.size + num == (count + 1)) { + /* + * now, we need to find the minimum value of the next slot for interpolating the percentile value + * j is the last slot of current segment, we need to get the first slot of the next segment. + */ + MinMaxEntry next = getMinMaxEntryOfNextSlotWithData(pMemBucket, i); + + double maxOfThisSlot = 0; + double minOfNextSlot = 0; + if (IS_SIGNED_NUMERIC_TYPE(pMemBucket->type)) { + maxOfThisSlot = (double) pSlot->range.i64MaxVal; + minOfNextSlot = (double) next.i64MinVal; + } else if (IS_UNSIGNED_NUMERIC_TYPE(pMemBucket->type)) { + maxOfThisSlot = (double) pSlot->range.u64MaxVal; + minOfNextSlot = (double) next.u64MinVal; + } else { + maxOfThisSlot = (double) pSlot->range.dMaxVal; + minOfNextSlot = (double) next.dMinVal; + } + + assert(minOfNextSlot > maxOfThisSlot); + + double val = (1 - fraction) * maxOfThisSlot + fraction * minOfNextSlot; + return val; + } + + if (pSlot->info.size <= pMemBucket->maxCapacity) { + // data in buffer and file are merged together to be processed. + SFilePage *buffer = loadDataFromFilePage(pMemBucket, i); + int32_t currentIdx = count - num; + + char *thisVal = buffer->data + pMemBucket->bytes * currentIdx; + char *nextVal = thisVal + pMemBucket->bytes; + + double td = 1.0, nd = 1.0; + GET_TYPED_DATA(td, double, pMemBucket->type, thisVal); + GET_TYPED_DATA(nd, double, pMemBucket->type, nextVal); + + double val = (1 - fraction) * td + fraction * nd; + tfree(buffer); + + return val; + } else { // incur a second round bucket split + if (isIdenticalData(pMemBucket, i)) { + return getIdenticalDataVal(pMemBucket, i); + } + + // try next round + pMemBucket->times += 1; +// qDebug("MemBucket:%p, start next round data bucketing, time:%d", pMemBucket, pMemBucket->times); + + pMemBucket->range = pSlot->range; + pMemBucket->total = 0; + + resetSlotInfo(pMemBucket); + + int32_t groupId = getGroupId(pMemBucket->numOfSlots, i, pMemBucket->times - 1); + SIDList list = getDataBufPagesIdList(pMemBucket->pBuffer, groupId); + assert(list->size > 0); + + for (int32_t f = 0; f < list->size; ++f) { + SPageInfo *pgInfo = *(SPageInfo **)taosArrayGet(list, f); + SFilePage *pg = getResBufPage(pMemBucket->pBuffer, pgInfo->pageId); + + tMemBucketPut(pMemBucket, pg->data, (int32_t)pg->num); + releaseResBufPageInfo(pMemBucket->pBuffer, pgInfo); + } + + return getPercentileImpl(pMemBucket, count - num, fraction); + } + } else { + num += pSlot->info.size; + } + } + + return 0; +} + +double getPercentile(tMemBucket *pMemBucket, double percent) { + if (pMemBucket->total == 0) { + return 0.0; + } + + // if only one elements exists, return it + if (pMemBucket->total == 1) { + return findOnlyResult(pMemBucket); + } + + percent = fabs(percent); + + // find the min/max value, no need to scan all data in bucket + if (fabs(percent - 100.0) < DBL_EPSILON || (percent < DBL_EPSILON)) { + MinMaxEntry* pRange = &pMemBucket->range; + + if (IS_SIGNED_NUMERIC_TYPE(pMemBucket->type)) { + double v = (double)(fabs(percent - 100) < DBL_EPSILON ? pRange->i64MaxVal : pRange->i64MinVal); + return v; + } else if (IS_UNSIGNED_NUMERIC_TYPE(pMemBucket->type)) { + double v = (double)(fabs(percent - 100) < DBL_EPSILON ? pRange->u64MaxVal : pRange->u64MinVal); + return v; + } else { + return fabs(percent - 100) < DBL_EPSILON? pRange->dMaxVal:pRange->dMinVal; + } + } + + double percentVal = (percent * (pMemBucket->total - 1)) / ((double)100.0); + + // do put data by using buckets + int32_t orderIdx = (int32_t)percentVal; + return getPercentileImpl(pMemBucket, orderIdx, percentVal - orderIdx); +} + +/* + * check if data in one slot are all identical only need to compare with the bounding box + */ +bool isIdenticalData(tMemBucket *pMemBucket, int32_t index) { + tMemBucketSlot *pSeg = &pMemBucket->pSlots[index]; + + if (IS_FLOAT_TYPE(pMemBucket->type)) { + return fabs(pSeg->range.dMaxVal - pSeg->range.dMinVal) < DBL_EPSILON; + } else { + return pSeg->range.i64MinVal == pSeg->range.i64MaxVal; + } +} diff --git a/source/libs/function/src/tscalarfunction.c b/source/libs/function/src/tscalarfunction.c new file mode 100644 index 0000000000..3e19a09a4d --- /dev/null +++ b/source/libs/function/src/tscalarfunction.c @@ -0,0 +1,10 @@ +#include "tscalarfunction.h" + +SScalarFunctionInfo scalarFunc[1] = { + { + + }, + +}; + + diff --git a/source/libs/function/src/ttszip.c b/source/libs/function/src/ttszip.c new file mode 100644 index 0000000000..fed857a8e7 --- /dev/null +++ b/source/libs/function/src/ttszip.c @@ -0,0 +1,1108 @@ +#include "ttszip.h" +#include +#include "taoserror.h" +#include "tcompression.h" +#include "tutil.h" + +static int32_t getDataStartOffset(); +static void TSBufUpdateGroupInfo(STSBuf* pTSBuf, int32_t index, STSGroupBlockInfo* pBlockInfo); +static STSBuf* allocResForTSBuf(STSBuf* pTSBuf); +static int32_t STSBufUpdateHeader(STSBuf* pTSBuf, STSBufFileHeader* pHeader); + +/** + * todo error handling + * support auto closeable tmp file + * @param path + * @return + */ +STSBuf* tsBufCreate(bool autoDelete, int32_t order) { + STSBuf* pTSBuf = calloc(1, sizeof(STSBuf)); + if (pTSBuf == NULL) { + return NULL; + } + + pTSBuf->autoDelete = autoDelete; + + taosGetTmpfilePath(tsTempDir, "join", pTSBuf->path); + pTSBuf->f = fopen(pTSBuf->path, "wb+"); + if (pTSBuf->f == NULL) { + free(pTSBuf); + return NULL; + } + + if (!autoDelete) { + remove(pTSBuf->path); + } + + if (NULL == allocResForTSBuf(pTSBuf)) { + return NULL; + } + + // update the header info + STSBufFileHeader header = {.magic = TS_COMP_FILE_MAGIC, .numOfGroup = pTSBuf->numOfGroups, .tsOrder = TSDB_ORDER_ASC}; + STSBufUpdateHeader(pTSBuf, &header); + + tsBufResetPos(pTSBuf); + pTSBuf->cur.order = TSDB_ORDER_ASC; + + pTSBuf->tsOrder = order; + + return pTSBuf; +} + +STSBuf* tsBufCreateFromFile(const char* path, bool autoDelete) { + STSBuf* pTSBuf = calloc(1, sizeof(STSBuf)); + if (pTSBuf == NULL) { + return NULL; + } + + pTSBuf->autoDelete = autoDelete; + + tstrncpy(pTSBuf->path, path, sizeof(pTSBuf->path)); + + pTSBuf->f = fopen(pTSBuf->path, "rb+"); + if (pTSBuf->f == NULL) { + free(pTSBuf); + return NULL; + } + + if (allocResForTSBuf(pTSBuf) == NULL) { + return NULL; + } + + // validate the file magic number + STSBufFileHeader header = {0}; + int32_t ret = fseek(pTSBuf->f, 0, SEEK_SET); + UNUSED(ret); + size_t sz = fread(&header, 1, sizeof(STSBufFileHeader), pTSBuf->f); + UNUSED(sz); + + // invalid file + if (header.magic != TS_COMP_FILE_MAGIC) { + tsBufDestroy(pTSBuf); + return NULL; + } + + if (header.numOfGroup > pTSBuf->numOfAlloc) { + pTSBuf->numOfAlloc = header.numOfGroup; + STSGroupBlockInfoEx* tmp = realloc(pTSBuf->pData, sizeof(STSGroupBlockInfoEx) * pTSBuf->numOfAlloc); + if (tmp == NULL) { + tsBufDestroy(pTSBuf); + return NULL; + } + + pTSBuf->pData = tmp; + } + + pTSBuf->numOfGroups = header.numOfGroup; + + // check the ts order + pTSBuf->tsOrder = header.tsOrder; + if (pTSBuf->tsOrder != TSDB_ORDER_ASC && pTSBuf->tsOrder != TSDB_ORDER_DESC) { +// tscError("invalid order info in buf:%d", pTSBuf->tsOrder); + tsBufDestroy(pTSBuf); + return NULL; + } + + size_t infoSize = sizeof(STSGroupBlockInfo) * pTSBuf->numOfGroups; + + STSGroupBlockInfo* buf = (STSGroupBlockInfo*)calloc(1, infoSize); + if (buf == NULL) { + tsBufDestroy(pTSBuf); + return NULL; + } + + //int64_t pos = ftell(pTSBuf->f); //pos not used + sz = fread(buf, infoSize, 1, pTSBuf->f); + UNUSED(sz); + + // the length value for each vnode is not kept in file, so does not set the length value + for (int32_t i = 0; i < pTSBuf->numOfGroups; ++i) { + STSGroupBlockInfoEx* pBlockList = &pTSBuf->pData[i]; + memcpy(&pBlockList->info, &buf[i], sizeof(STSGroupBlockInfo)); + } + free(buf); + + ret = fseek(pTSBuf->f, 0, SEEK_END); + UNUSED(ret); + + struct stat fileStat; + if (fstat(fileno(pTSBuf->f), &fileStat) != 0) { + tsBufDestroy(pTSBuf); + return NULL; + } + + pTSBuf->fileSize = (uint32_t)fileStat.st_size; + tsBufResetPos(pTSBuf); + + // ascending by default + pTSBuf->cur.order = TSDB_ORDER_ASC; + +// tscDebug("create tsBuf from file:%s, fd:%d, size:%d, numOfGroups:%d, autoDelete:%d", pTSBuf->path, fileno(pTSBuf->f), +// pTSBuf->fileSize, pTSBuf->numOfGroups, pTSBuf->autoDelete); + + return pTSBuf; +} + +void* tsBufDestroy(STSBuf* pTSBuf) { + if (pTSBuf == NULL) { + return NULL; + } + + tfree(pTSBuf->assistBuf); + tfree(pTSBuf->tsData.rawBuf); + + tfree(pTSBuf->pData); + tfree(pTSBuf->block.payload); + + if (!pTSBuf->remainOpen) { + fclose(pTSBuf->f); + } + + if (pTSBuf->autoDelete) { +// ("tsBuf %p destroyed, delete tmp file:%s", pTSBuf, pTSBuf->path); + remove(pTSBuf->path); + } else { +// tscDebug("tsBuf %p destroyed, tmp file:%s, remains", pTSBuf, pTSBuf->path); + } + + taosVariantDestroy(&pTSBuf->block.tag); + free(pTSBuf); + return NULL; +} + +static STSGroupBlockInfoEx* tsBufGetLastGroupInfo(STSBuf* pTSBuf) { + int32_t last = pTSBuf->numOfGroups - 1; + + assert(last >= 0); + return &pTSBuf->pData[last]; +} + +static STSGroupBlockInfoEx* addOneGroupInfo(STSBuf* pTSBuf, int32_t id) { + if (pTSBuf->numOfAlloc <= pTSBuf->numOfGroups) { + uint32_t newSize = (uint32_t)(pTSBuf->numOfAlloc * 1.5); + assert((int32_t)newSize > pTSBuf->numOfAlloc); + + STSGroupBlockInfoEx* tmp = (STSGroupBlockInfoEx*)realloc(pTSBuf->pData, sizeof(STSGroupBlockInfoEx) * newSize); + if (tmp == NULL) { + return NULL; + } + + pTSBuf->pData = tmp; + pTSBuf->numOfAlloc = newSize; + memset(&pTSBuf->pData[pTSBuf->numOfGroups], 0, sizeof(STSGroupBlockInfoEx) * (newSize - pTSBuf->numOfGroups)); + } + + if (pTSBuf->numOfGroups > 0) { + STSGroupBlockInfoEx* pPrevBlockInfoEx = tsBufGetLastGroupInfo(pTSBuf); + + // update prev vnode length info in file + TSBufUpdateGroupInfo(pTSBuf, pTSBuf->numOfGroups - 1, &pPrevBlockInfoEx->info); + } + + // set initial value for vnode block + STSGroupBlockInfo* pBlockInfo = &pTSBuf->pData[pTSBuf->numOfGroups].info; + pBlockInfo->id = id; + pBlockInfo->offset = pTSBuf->fileSize; + assert(pBlockInfo->offset >= getDataStartOffset()); + + // update vnode info in file + TSBufUpdateGroupInfo(pTSBuf, pTSBuf->numOfGroups, pBlockInfo); + + // add one vnode info + pTSBuf->numOfGroups += 1; + + // update the header info + STSBufFileHeader header = { + .magic = TS_COMP_FILE_MAGIC, .numOfGroup = pTSBuf->numOfGroups, .tsOrder = pTSBuf->tsOrder}; + + STSBufUpdateHeader(pTSBuf, &header); + return tsBufGetLastGroupInfo(pTSBuf); +} + +static void shrinkBuffer(STSList* ptsData) { + // shrink tmp buffer size if it consumes too many memory compared to the pre-defined size + if (ptsData->allocSize >= ptsData->threshold * 2) { + char* rawBuf = realloc(ptsData->rawBuf, MEM_BUF_SIZE); + if(rawBuf) { + ptsData->rawBuf = rawBuf; + ptsData->allocSize = MEM_BUF_SIZE; + } + } +} + +static int32_t getTagAreaLength(SVariant* pa) { + int32_t t = sizeof(pa->nLen) * 2 + sizeof(pa->nType); + if (pa->nType != TSDB_DATA_TYPE_NULL) { + t += pa->nLen; + } + + return t; +} + +static void writeDataToDisk(STSBuf* pTSBuf) { + if (pTSBuf->tsData.len == 0) { + return; + } + + STSBlock* pBlock = &pTSBuf->block; + STSList* pTsData = &pTSBuf->tsData; + + pBlock->numOfElem = pTsData->len / TSDB_KEYSIZE; + pBlock->compLen = + tsCompressTimestamp(pTsData->rawBuf, pTsData->len, pTsData->len/TSDB_KEYSIZE, pBlock->payload, pTsData->allocSize, + TWO_STAGE_COMP, pTSBuf->assistBuf, pTSBuf->bufSize); + + int64_t r = fseek(pTSBuf->f, pTSBuf->fileSize, SEEK_SET); + assert(r == 0); + + /* + * format for output data: + * 1. tags, number of ts, size after compressed, payload, size after compressed + * 2. tags, number of ts, size after compressed, payload, size after compressed + * + * both side has the compressed length is used to support load data forwards/backwords. + */ + int32_t metaLen = 0; + metaLen += (int32_t)fwrite(&pBlock->tag.nType, 1, sizeof(pBlock->tag.nType), pTSBuf->f); + + int32_t trueLen = pBlock->tag.nLen; + if (pBlock->tag.nType == TSDB_DATA_TYPE_BINARY || pBlock->tag.nType == TSDB_DATA_TYPE_NCHAR) { + metaLen += (int32_t)fwrite(&pBlock->tag.nLen, 1, sizeof(pBlock->tag.nLen), pTSBuf->f); + metaLen += (int32_t)fwrite(pBlock->tag.pz, 1, (size_t)pBlock->tag.nLen, pTSBuf->f); + } else if (pBlock->tag.nType == TSDB_DATA_TYPE_FLOAT) { + metaLen += (int32_t)fwrite(&pBlock->tag.nLen, 1, sizeof(pBlock->tag.nLen), pTSBuf->f); + float tfloat = (float)pBlock->tag.d; + metaLen += (int32_t)fwrite(&tfloat, 1, (size_t) pBlock->tag.nLen, pTSBuf->f); + } else if (pBlock->tag.nType != TSDB_DATA_TYPE_NULL) { + metaLen += (int32_t)fwrite(&pBlock->tag.nLen, 1, sizeof(pBlock->tag.nLen), pTSBuf->f); + metaLen += (int32_t)fwrite(&pBlock->tag.i64, 1, (size_t) pBlock->tag.nLen, pTSBuf->f); + } else { + trueLen = 0; + metaLen += (int32_t)fwrite(&trueLen, 1, sizeof(pBlock->tag.nLen), pTSBuf->f); + } + + fwrite(&pBlock->numOfElem, sizeof(pBlock->numOfElem), 1, pTSBuf->f); + fwrite(&pBlock->compLen, sizeof(pBlock->compLen), 1, pTSBuf->f); + fwrite(pBlock->payload, (size_t)pBlock->compLen, 1, pTSBuf->f); + fwrite(&pBlock->compLen, sizeof(pBlock->compLen), 1, pTSBuf->f); + + metaLen += (int32_t) fwrite(&trueLen, 1, sizeof(pBlock->tag.nLen), pTSBuf->f); + assert(metaLen == getTagAreaLength(&pBlock->tag)); + + int32_t blockSize = metaLen + sizeof(pBlock->numOfElem) + sizeof(pBlock->compLen) * 2 + pBlock->compLen; + pTSBuf->fileSize += blockSize; + + pTSBuf->tsData.len = 0; + + STSGroupBlockInfoEx* pGroupBlockInfoEx = tsBufGetLastGroupInfo(pTSBuf); + + pGroupBlockInfoEx->info.compLen += blockSize; + pGroupBlockInfoEx->info.numOfBlocks += 1; + + shrinkBuffer(&pTSBuf->tsData); +} + +static void expandBuffer(STSList* ptsData, int32_t inputSize) { + if (ptsData->allocSize - ptsData->len < inputSize) { + int32_t newSize = inputSize + ptsData->len; + char* tmp = realloc(ptsData->rawBuf, (size_t)newSize); + if (tmp == NULL) { + // todo + } + + ptsData->rawBuf = tmp; + ptsData->allocSize = newSize; + } +} + +STSBlock* readDataFromDisk(STSBuf* pTSBuf, int32_t order, bool decomp) { + STSBlock* pBlock = &pTSBuf->block; + + // clear the memory buffer + pBlock->compLen = 0; + pBlock->padding = 0; + pBlock->numOfElem = 0; + + int32_t offset = -1; + + if (order == TSDB_ORDER_DESC) { + /* + * set the right position for the reversed traverse, the reversed traverse is started from + * the end of each comp data block + */ + int32_t prev = -(int32_t) (sizeof(pBlock->padding) + sizeof(pBlock->tag.nLen)); + int32_t ret = fseek(pTSBuf->f, prev, SEEK_CUR); + size_t sz = fread(&pBlock->padding, 1, sizeof(pBlock->padding), pTSBuf->f); + sz = fread(&pBlock->tag.nLen, 1, sizeof(pBlock->tag.nLen), pTSBuf->f); + UNUSED(sz); + + pBlock->compLen = pBlock->padding; + + offset = pBlock->compLen + sizeof(pBlock->compLen) * 2 + sizeof(pBlock->numOfElem) + getTagAreaLength(&pBlock->tag); + ret = fseek(pTSBuf->f, -offset, SEEK_CUR); + UNUSED(ret); + } + + fread(&pBlock->tag.nType, sizeof(pBlock->tag.nType), 1, pTSBuf->f); + fread(&pBlock->tag.nLen, sizeof(pBlock->tag.nLen), 1, pTSBuf->f); + + // NOTE: mix types tags are not supported + size_t sz = 0; + if (pBlock->tag.nType == TSDB_DATA_TYPE_BINARY || pBlock->tag.nType == TSDB_DATA_TYPE_NCHAR) { + char* tp = realloc(pBlock->tag.pz, pBlock->tag.nLen + 1); + assert(tp != NULL); + + memset(tp, 0, pBlock->tag.nLen + 1); + pBlock->tag.pz = tp; + + sz = fread(pBlock->tag.pz, (size_t)pBlock->tag.nLen, 1, pTSBuf->f); + UNUSED(sz); + } else if (pBlock->tag.nType == TSDB_DATA_TYPE_FLOAT) { + float tfloat = 0; + sz = fread(&tfloat, (size_t) pBlock->tag.nLen, 1, pTSBuf->f); + pBlock->tag.d = (double)tfloat; + UNUSED(sz); + } else if (pBlock->tag.nType != TSDB_DATA_TYPE_NULL) { //TODO check the return value + sz = fread(&pBlock->tag.i64, (size_t) pBlock->tag.nLen, 1, pTSBuf->f); + UNUSED(sz); + } + + sz = fread(&pBlock->numOfElem, sizeof(pBlock->numOfElem), 1, pTSBuf->f); + UNUSED(sz); + sz = fread(&pBlock->compLen, sizeof(pBlock->compLen), 1, pTSBuf->f); + UNUSED(sz); + sz = fread(pBlock->payload, (size_t)pBlock->compLen, 1, pTSBuf->f); + + if (decomp) { + pTSBuf->tsData.len = + tsDecompressTimestamp(pBlock->payload, pBlock->compLen, pBlock->numOfElem, pTSBuf->tsData.rawBuf, + pTSBuf->tsData.allocSize, TWO_STAGE_COMP, pTSBuf->assistBuf, pTSBuf->bufSize); + } + + // read the comp length at the length of comp block + sz = fread(&pBlock->padding, sizeof(pBlock->padding), 1, pTSBuf->f); + assert(pBlock->padding == pBlock->compLen); + + int32_t n = 0; + sz = fread(&n, sizeof(pBlock->tag.nLen), 1, pTSBuf->f); + if (pBlock->tag.nType == TSDB_DATA_TYPE_NULL) { + assert(n == 0); + } else { + assert(n == pBlock->tag.nLen); + } + + UNUSED(sz); + + // for backwards traverse, set the start position at the end of previous block + if (order == TSDB_ORDER_DESC) { + int32_t r = fseek(pTSBuf->f, -offset, SEEK_CUR); + UNUSED(r); + } + + return pBlock; +} + +// set the order of ts buffer if the ts order has not been set yet +static int32_t setCheckTSOrder(STSBuf* pTSBuf, const char* pData, int32_t len) { + STSList* ptsData = &pTSBuf->tsData; + + if (pTSBuf->tsOrder == -1) { + if (ptsData->len > 0) { + TSKEY lastKey = *(TSKEY*)(ptsData->rawBuf + ptsData->len - TSDB_KEYSIZE); + + if (lastKey > *(TSKEY*)pData) { + pTSBuf->tsOrder = TSDB_ORDER_DESC; + } else { + pTSBuf->tsOrder = TSDB_ORDER_ASC; + } + } else if (len > TSDB_KEYSIZE) { + // no data in current vnode, more than one ts is added, check the orders + TSKEY k1 = *(TSKEY*)(pData); + TSKEY k2 = *(TSKEY*)(pData + TSDB_KEYSIZE); + + if (k1 < k2) { + pTSBuf->tsOrder = TSDB_ORDER_ASC; + } else if (k1 > k2) { + pTSBuf->tsOrder = TSDB_ORDER_DESC; + } else { + // todo handle error + } + } + } else { + // todo the timestamp order is set, check the asc/desc order of appended data + } + + return TSDB_CODE_SUCCESS; +} + +void tsBufAppend(STSBuf* pTSBuf, int32_t id, SVariant* tag, const char* pData, int32_t len) { + STSGroupBlockInfoEx* pBlockInfo = NULL; + STSList* ptsData = &pTSBuf->tsData; + + if (pTSBuf->numOfGroups == 0 || tsBufGetLastGroupInfo(pTSBuf)->info.id != id) { + writeDataToDisk(pTSBuf); + shrinkBuffer(ptsData); + + pBlockInfo = addOneGroupInfo(pTSBuf, id); + } else { + pBlockInfo = tsBufGetLastGroupInfo(pTSBuf); + } + + assert(pBlockInfo->info.id == id); + + if ((taosVariantCompare(&pTSBuf->block.tag, tag) != 0) && ptsData->len > 0) { + // new arrived data with different tags value, save current value into disk first + writeDataToDisk(pTSBuf); + } else { + expandBuffer(ptsData, len); + } + + taosVariantAssign(&pTSBuf->block.tag, tag); + memcpy(ptsData->rawBuf + ptsData->len, pData, (size_t)len); + + // todo check return value + setCheckTSOrder(pTSBuf, pData, len); + + ptsData->len += len; + pBlockInfo->len += len; + + pTSBuf->numOfTotal += len / TSDB_KEYSIZE; + + // the size of raw data exceeds the size of the default prepared buffer, so + // during getBufBlock, the output buffer needs to be large enough. + if (ptsData->len >= ptsData->threshold) { + writeDataToDisk(pTSBuf); + shrinkBuffer(ptsData); + } + + tsBufResetPos(pTSBuf); +} + +void tsBufFlush(STSBuf* pTSBuf) { + if (pTSBuf->tsData.len <= 0) { + return; + } + + writeDataToDisk(pTSBuf); + shrinkBuffer(&pTSBuf->tsData); + + STSGroupBlockInfoEx* pBlockInfoEx = tsBufGetLastGroupInfo(pTSBuf); + + // update prev vnode length info in file + TSBufUpdateGroupInfo(pTSBuf, pTSBuf->numOfGroups - 1, &pBlockInfoEx->info); + + // save the ts order into header + STSBufFileHeader header = { + .magic = TS_COMP_FILE_MAGIC, .numOfGroup = pTSBuf->numOfGroups, .tsOrder = pTSBuf->tsOrder}; + STSBufUpdateHeader(pTSBuf, &header); +} + +static int32_t tsBufFindGroupById(STSGroupBlockInfoEx* pGroupInfoEx, int32_t numOfGroups, int32_t id) { + int32_t j = -1; + for (int32_t i = 0; i < numOfGroups; ++i) { + if (pGroupInfoEx[i].info.id == id) { + j = i; + break; + } + } + + return j; +} + +// todo opt performance by cache blocks info +static int32_t tsBufFindBlock(STSBuf* pTSBuf, STSGroupBlockInfo* pBlockInfo, int32_t blockIndex) { + if (fseek(pTSBuf->f, pBlockInfo->offset, SEEK_SET) != 0) { + return -1; + } + + // sequentially read the compressed data blocks, start from the beginning of the comp data block of this vnode + int32_t i = 0; + bool decomp = false; + + while ((i++) <= blockIndex) { + if (readDataFromDisk(pTSBuf, TSDB_ORDER_ASC, decomp) == NULL) { + return -1; + } + } + + // set the file position to be the end of previous comp block + if (pTSBuf->cur.order == TSDB_ORDER_DESC) { + STSBlock* pBlock = &pTSBuf->block; + int32_t compBlockSize = + pBlock->compLen + sizeof(pBlock->compLen) * 2 + sizeof(pBlock->numOfElem) + getTagAreaLength(&pBlock->tag); + int32_t ret = fseek(pTSBuf->f, -compBlockSize, SEEK_CUR); + UNUSED(ret); + } + + return 0; +} + +static int32_t tsBufFindBlockByTag(STSBuf* pTSBuf, STSGroupBlockInfo* pBlockInfo, SVariant* tag) { + bool decomp = false; + + int64_t offset = 0; + if (pTSBuf->cur.order == TSDB_ORDER_ASC) { + offset = pBlockInfo->offset; + } else { // reversed traverse starts from the end of block + offset = pBlockInfo->offset + pBlockInfo->compLen; + } + + if (fseek(pTSBuf->f, (int32_t)offset, SEEK_SET) != 0) { + return -1; + } + + for (int32_t i = 0; i < pBlockInfo->numOfBlocks; ++i) { + if (readDataFromDisk(pTSBuf, pTSBuf->cur.order, decomp) == NULL) { + return -1; + } + + if (taosVariantCompare(&pTSBuf->block.tag, tag) == 0) { + return (pTSBuf->cur.order == TSDB_ORDER_ASC)? i: (pBlockInfo->numOfBlocks - (i + 1)); + } + } + + return -1; +} + +static void tsBufGetBlock(STSBuf* pTSBuf, int32_t groupIndex, int32_t blockIndex) { + STSGroupBlockInfo* pBlockInfo = &pTSBuf->pData[groupIndex].info; + if (pBlockInfo->numOfBlocks <= blockIndex) { + assert(false); + } + + STSCursor* pCur = &pTSBuf->cur; + if (pCur->vgroupIndex == groupIndex && ((pCur->blockIndex <= blockIndex && pCur->order == TSDB_ORDER_ASC) || + (pCur->blockIndex >= blockIndex && pCur->order == TSDB_ORDER_DESC))) { + int32_t i = 0; + bool decomp = false; + int32_t step = abs(blockIndex - pCur->blockIndex); + + while ((++i) <= step) { + if (readDataFromDisk(pTSBuf, pCur->order, decomp) == NULL) { + return; + } + } + } else { + if (tsBufFindBlock(pTSBuf, pBlockInfo, blockIndex) == -1) { + assert(false); + } + } + + STSBlock* pBlock = &pTSBuf->block; + + size_t s = pBlock->numOfElem * TSDB_KEYSIZE; + + /* + * In order to accommodate all the qualified data, the actual buffer size for one block with identical tags value + * may exceed the maximum allowed size during *tsBufAppend* function by invoking expandBuffer function + */ + if (s > pTSBuf->tsData.allocSize) { + expandBuffer(&pTSBuf->tsData, (int32_t)s); + } + + pTSBuf->tsData.len = + tsDecompressTimestamp(pBlock->payload, pBlock->compLen, pBlock->numOfElem, pTSBuf->tsData.rawBuf, + pTSBuf->tsData.allocSize, TWO_STAGE_COMP, pTSBuf->assistBuf, pTSBuf->bufSize); + + assert((pTSBuf->tsData.len / TSDB_KEYSIZE == pBlock->numOfElem) && (pTSBuf->tsData.allocSize >= pTSBuf->tsData.len)); + + pCur->vgroupIndex = groupIndex; + pCur->blockIndex = blockIndex; + + pCur->tsIndex = (pCur->order == TSDB_ORDER_ASC) ? 0 : pBlock->numOfElem - 1; +} + +static int32_t doUpdateGroupInfo(STSBuf* pTSBuf, int64_t offset, STSGroupBlockInfo* pVInfo) { + if (offset < 0 || offset >= getDataStartOffset()) { + return -1; + } + + if (fseek(pTSBuf->f, (int32_t)offset, SEEK_SET) != 0) { + return -1; + } + + fwrite(pVInfo, sizeof(STSGroupBlockInfo), 1, pTSBuf->f); + return 0; +} + +STSGroupBlockInfo* tsBufGetGroupBlockInfo(STSBuf* pTSBuf, int32_t id) { + int32_t j = tsBufFindGroupById(pTSBuf->pData, pTSBuf->numOfGroups, id); + if (j == -1) { + return NULL; + } + + return &pTSBuf->pData[j].info; +} + +int32_t STSBufUpdateHeader(STSBuf* pTSBuf, STSBufFileHeader* pHeader) { + if ((pTSBuf->f == NULL) || pHeader == NULL || pHeader->numOfGroup == 0 || pHeader->magic != TS_COMP_FILE_MAGIC) { + return -1; + } + + assert(pHeader->tsOrder == TSDB_ORDER_ASC || pHeader->tsOrder == TSDB_ORDER_DESC); + + int32_t r = fseek(pTSBuf->f, 0, SEEK_SET); + if (r != 0) { +// qError("fseek failed, errno:%d", errno); + return -1; + } + + size_t ws = fwrite(pHeader, sizeof(STSBufFileHeader), 1, pTSBuf->f); + if (ws != 1) { +// qError("ts update header fwrite failed, size:%d, expected size:%d", (int32_t)ws, (int32_t)sizeof(STSBufFileHeader)); + return -1; + } + return 0; +} + +bool tsBufNextPos(STSBuf* pTSBuf) { + if (pTSBuf == NULL || pTSBuf->numOfGroups == 0) { + return false; + } + + STSCursor* pCur = &pTSBuf->cur; + + // get the first/last position according to traverse order + if (pCur->vgroupIndex == -1) { + if (pCur->order == TSDB_ORDER_ASC) { + tsBufGetBlock(pTSBuf, 0, 0); + + if (pTSBuf->block.numOfElem == 0) { // the whole list is empty, return + tsBufResetPos(pTSBuf); + return false; + } else { + return true; + } + + } else { // get the last timestamp record in the last block of the last vnode + assert(pTSBuf->numOfGroups > 0); + + int32_t groupIndex = pTSBuf->numOfGroups - 1; + pCur->vgroupIndex = groupIndex; + + int32_t id = pTSBuf->pData[pCur->vgroupIndex].info.id; + STSGroupBlockInfo* pBlockInfo = tsBufGetGroupBlockInfo(pTSBuf, id); + int32_t blockIndex = pBlockInfo->numOfBlocks - 1; + + tsBufGetBlock(pTSBuf, groupIndex, blockIndex); + + pCur->tsIndex = pTSBuf->block.numOfElem - 1; + if (pTSBuf->block.numOfElem == 0) { + tsBufResetPos(pTSBuf); + return false; + } else { + return true; + } + } + } + + int32_t step = pCur->order == TSDB_ORDER_ASC ? 1 : -1; + + while (1) { + assert(pTSBuf->tsData.len == pTSBuf->block.numOfElem * TSDB_KEYSIZE); + + if ((pCur->order == TSDB_ORDER_ASC && pCur->tsIndex >= pTSBuf->block.numOfElem - 1) || + (pCur->order == TSDB_ORDER_DESC && pCur->tsIndex <= 0)) { + int32_t id = pTSBuf->pData[pCur->vgroupIndex].info.id; + + STSGroupBlockInfo* pBlockInfo = tsBufGetGroupBlockInfo(pTSBuf, id); + if (pBlockInfo == NULL || (pCur->blockIndex >= pBlockInfo->numOfBlocks - 1 && pCur->order == TSDB_ORDER_ASC) || + (pCur->blockIndex <= 0 && pCur->order == TSDB_ORDER_DESC)) { + if ((pCur->vgroupIndex >= pTSBuf->numOfGroups - 1 && pCur->order == TSDB_ORDER_ASC) || + (pCur->vgroupIndex <= 0 && pCur->order == TSDB_ORDER_DESC)) { + pCur->vgroupIndex = -1; + return false; + } + + if (pBlockInfo == NULL) { + return false; + } + + int32_t blockIndex = (pCur->order == TSDB_ORDER_ASC) ? 0 : (pBlockInfo->numOfBlocks - 1); + tsBufGetBlock(pTSBuf, pCur->vgroupIndex + step, blockIndex); + break; + + } else { + tsBufGetBlock(pTSBuf, pCur->vgroupIndex, pCur->blockIndex + step); + break; + } + } else { + pCur->tsIndex += step; + break; + } + } + + return true; +} + +void tsBufResetPos(STSBuf* pTSBuf) { + if (pTSBuf == NULL) { + return; + } + + pTSBuf->cur = (STSCursor){.tsIndex = -1, .blockIndex = -1, .vgroupIndex = -1, .order = pTSBuf->cur.order}; +} + +STSElem tsBufGetElem(STSBuf* pTSBuf) { + STSElem elem1 = {.id = -1}; + if (pTSBuf == NULL) { + return elem1; + } + + STSCursor* pCur = &pTSBuf->cur; + if (pCur != NULL && pCur->vgroupIndex < 0) { + return elem1; + } + + STSBlock* pBlock = &pTSBuf->block; + + elem1.id = pTSBuf->pData[pCur->vgroupIndex].info.id; + elem1.ts = *(TSKEY*)(pTSBuf->tsData.rawBuf + pCur->tsIndex * TSDB_KEYSIZE); + elem1.tag = &pBlock->tag; + + return elem1; +} + +/** + * current only support ts comp data from two vnode merge + * @param pDestBuf + * @param pSrcBuf + * @param id + * @return + */ +int32_t tsBufMerge(STSBuf* pDestBuf, const STSBuf* pSrcBuf) { + if (pDestBuf == NULL || pSrcBuf == NULL || pSrcBuf->numOfGroups <= 0) { + return 0; + } + + if (pDestBuf->numOfGroups + pSrcBuf->numOfGroups > TS_COMP_FILE_GROUP_MAX) { + return -1; + } + + // src can only have one vnode index + assert(pSrcBuf->numOfGroups == 1); + + // there are data in buffer, flush to disk first + tsBufFlush(pDestBuf); + + // compared with the last vnode id + int32_t id = tsBufGetLastGroupInfo((STSBuf*) pSrcBuf)->info.id; + if (id != tsBufGetLastGroupInfo(pDestBuf)->info.id) { + int32_t oldSize = pDestBuf->numOfGroups; + int32_t newSize = oldSize + pSrcBuf->numOfGroups; + + if (pDestBuf->numOfAlloc < newSize) { + pDestBuf->numOfAlloc = newSize; + + STSGroupBlockInfoEx* tmp = realloc(pDestBuf->pData, sizeof(STSGroupBlockInfoEx) * newSize); + if (tmp == NULL) { + return -1; + } + + pDestBuf->pData = tmp; + } + + // directly copy the vnode index information + memcpy(&pDestBuf->pData[oldSize], pSrcBuf->pData, (size_t)pSrcBuf->numOfGroups * sizeof(STSGroupBlockInfoEx)); + + // set the new offset value + for (int32_t i = 0; i < pSrcBuf->numOfGroups; ++i) { + STSGroupBlockInfoEx* pBlockInfoEx = &pDestBuf->pData[i + oldSize]; + pBlockInfoEx->info.offset = (pSrcBuf->pData[i].info.offset - getDataStartOffset()) + pDestBuf->fileSize; + pBlockInfoEx->info.id = id; + } + + pDestBuf->numOfGroups = newSize; + } else { + STSGroupBlockInfoEx* pBlockInfoEx = tsBufGetLastGroupInfo(pDestBuf); + + pBlockInfoEx->len += pSrcBuf->pData[0].len; + pBlockInfoEx->info.numOfBlocks += pSrcBuf->pData[0].info.numOfBlocks; + pBlockInfoEx->info.compLen += pSrcBuf->pData[0].info.compLen; + pBlockInfoEx->info.id = id; + } + + int32_t r = fseek(pDestBuf->f, 0, SEEK_END); + assert(r == 0); + + int64_t offset = getDataStartOffset(); + int32_t size = (int32_t)pSrcBuf->fileSize - (int32_t)offset; + int64_t written = taosFSendFile(pDestBuf->f, pSrcBuf->f, &offset, size); + + if (written == -1 || written != size) { + return -1; + } + + pDestBuf->numOfTotal += pSrcBuf->numOfTotal; + + int32_t oldSize = pDestBuf->fileSize; + + // file meta data may be cached, close and reopen the file for accurate file size. + fclose(pDestBuf->f); + pDestBuf->f = fopen(pDestBuf->path, "rb+"); + if (pDestBuf->f == NULL) { + return -1; + } + + struct stat fileStat; + if (fstat(fileno(pDestBuf->f), &fileStat) != 0) { + return -1; + } + pDestBuf->fileSize = (uint32_t)fileStat.st_size; + + assert(pDestBuf->fileSize == oldSize + size); + + return 0; +} + +STSBuf* tsBufCreateFromCompBlocks(const char* pData, int32_t numOfBlocks, int32_t len, int32_t order, int32_t id) { + STSBuf* pTSBuf = tsBufCreate(true, order); + + STSGroupBlockInfo* pBlockInfo = &(addOneGroupInfo(pTSBuf, 0)->info); + pBlockInfo->numOfBlocks = numOfBlocks; + pBlockInfo->compLen = len; + pBlockInfo->offset = getDataStartOffset(); + pBlockInfo->id = id; + + // update prev vnode length info in file + TSBufUpdateGroupInfo(pTSBuf, pTSBuf->numOfGroups - 1, pBlockInfo); + + int32_t ret = fseek(pTSBuf->f, pBlockInfo->offset, SEEK_SET); + if (ret == -1) { +// qError("fseek failed, errno:%d", errno); + tsBufDestroy(pTSBuf); + return NULL; + } + size_t sz = fwrite((void*)pData, 1, len, pTSBuf->f); + if (sz != len) { +// qError("ts data fwrite failed, write size:%d, expected size:%d", (int32_t)sz, len); + tsBufDestroy(pTSBuf); + return NULL; + } + pTSBuf->fileSize += len; + + pTSBuf->tsOrder = order; + assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC); + + STSBufFileHeader header = { + .magic = TS_COMP_FILE_MAGIC, .numOfGroup = pTSBuf->numOfGroups, .tsOrder = pTSBuf->tsOrder}; + if (STSBufUpdateHeader(pTSBuf, &header) < 0) { + tsBufDestroy(pTSBuf); + return NULL; + } + + // TODO taosFsync?? +// if (taosFsync(fileno(pTSBuf->f)) == -1) { +//// qError("fsync failed, errno:%d", errno); +// tsBufDestroy(pTSBuf); +// return NULL; +// } + + return pTSBuf; +} + +STSElem tsBufGetElemStartPos(STSBuf* pTSBuf, int32_t id, SVariant* tag) { + STSElem elem = {.id = -1}; + + if (pTSBuf == NULL) { + return elem; + } + + int32_t j = tsBufFindGroupById(pTSBuf->pData, pTSBuf->numOfGroups, id); + if (j == -1) { + return elem; + } + + // for debug purpose + // tsBufDisplay(pTSBuf); + + STSCursor* pCur = &pTSBuf->cur; + STSGroupBlockInfo* pBlockInfo = &pTSBuf->pData[j].info; + + int32_t blockIndex = tsBufFindBlockByTag(pTSBuf, pBlockInfo, tag); + if (blockIndex < 0) { + return elem; + } + + pCur->vgroupIndex = j; + pCur->blockIndex = blockIndex; + tsBufGetBlock(pTSBuf, j, blockIndex); + + return tsBufGetElem(pTSBuf); +} + +STSCursor tsBufGetCursor(STSBuf* pTSBuf) { + STSCursor c = {.vgroupIndex = -1}; + if (pTSBuf == NULL) { + return c; + } + + return pTSBuf->cur; +} + +void tsBufSetCursor(STSBuf* pTSBuf, STSCursor* pCur) { + if (pTSBuf == NULL || pCur == NULL) { + return; + } + + // assert(pCur->vgroupIndex != -1 && pCur->tsIndex >= 0 && pCur->blockIndex >= 0); + if (pCur->vgroupIndex != -1) { + tsBufGetBlock(pTSBuf, pCur->vgroupIndex, pCur->blockIndex); + } + + pTSBuf->cur = *pCur; +} + +void tsBufSetTraverseOrder(STSBuf* pTSBuf, int32_t order) { + if (pTSBuf == NULL) { + return; + } + + pTSBuf->cur.order = order; +} + +STSBuf* tsBufClone(STSBuf* pTSBuf) { + if (pTSBuf == NULL) { + return NULL; + } + + tsBufFlush(pTSBuf); + + return tsBufCreateFromFile(pTSBuf->path, false); +} + +void tsBufDisplay(STSBuf* pTSBuf) { + printf("-------start of ts comp file-------\n"); + printf("number of vnode:%d\n", pTSBuf->numOfGroups); + + int32_t old = pTSBuf->cur.order; + pTSBuf->cur.order = TSDB_ORDER_ASC; + + tsBufResetPos(pTSBuf); + + while (tsBufNextPos(pTSBuf)) { + STSElem elem = tsBufGetElem(pTSBuf); + if (elem.tag->nType == TSDB_DATA_TYPE_BIGINT) { + printf("%d-%" PRId64 "-%" PRId64 "\n", elem.id, elem.tag->i64, elem.ts); + } + } + + pTSBuf->cur.order = old; + printf("-------end of ts comp file-------\n"); +} + +static int32_t getDataStartOffset() { + return sizeof(STSBufFileHeader) + TS_COMP_FILE_GROUP_MAX * sizeof(STSGroupBlockInfo); +} + +// update prev vnode length info in file +static void TSBufUpdateGroupInfo(STSBuf* pTSBuf, int32_t index, STSGroupBlockInfo* pBlockInfo) { + int32_t offset = sizeof(STSBufFileHeader) + index * sizeof(STSGroupBlockInfo); + doUpdateGroupInfo(pTSBuf, offset, pBlockInfo); +} + +static STSBuf* allocResForTSBuf(STSBuf* pTSBuf) { + const int32_t INITIAL_GROUPINFO_SIZE = 4; + + pTSBuf->numOfAlloc = INITIAL_GROUPINFO_SIZE; + pTSBuf->pData = calloc(pTSBuf->numOfAlloc, sizeof(STSGroupBlockInfoEx)); + if (pTSBuf->pData == NULL) { + tsBufDestroy(pTSBuf); + return NULL; + } + + pTSBuf->tsData.rawBuf = malloc(MEM_BUF_SIZE); + if (pTSBuf->tsData.rawBuf == NULL) { + tsBufDestroy(pTSBuf); + return NULL; + } + + pTSBuf->bufSize = MEM_BUF_SIZE; + pTSBuf->tsData.threshold = MEM_BUF_SIZE; + pTSBuf->tsData.allocSize = MEM_BUF_SIZE; + + pTSBuf->assistBuf = malloc(MEM_BUF_SIZE); + if (pTSBuf->assistBuf == NULL) { + tsBufDestroy(pTSBuf); + return NULL; + } + + pTSBuf->block.payload = malloc(MEM_BUF_SIZE); + if (pTSBuf->block.payload == NULL) { + tsBufDestroy(pTSBuf); + return NULL; + } + + pTSBuf->fileSize += getDataStartOffset(); + return pTSBuf; +} + +int32_t tsBufGetNumOfGroup(STSBuf* pTSBuf) { + if (pTSBuf == NULL) { + return 0; + } + + return pTSBuf->numOfGroups; +} + +void tsBufGetGroupIdList(STSBuf* pTSBuf, int32_t* num, int32_t** id) { + int32_t size = tsBufGetNumOfGroup(pTSBuf); + if (num != NULL) { + *num = size; + } + + *id = NULL; + if (size == 0) { + return; + } + + (*id) = malloc(tsBufGetNumOfGroup(pTSBuf) * sizeof(int32_t)); + + for(int32_t i = 0; i < size; ++i) { + (*id)[i] = pTSBuf->pData[i].info.id; + } +} + +int32_t dumpFileBlockByGroupId(STSBuf* pTSBuf, int32_t groupIndex, void* buf, int32_t* len, int32_t* numOfBlocks) { + assert(groupIndex >= 0 && groupIndex < pTSBuf->numOfGroups); + STSGroupBlockInfo *pBlockInfo = &pTSBuf->pData[groupIndex].info; + + *len = 0; + *numOfBlocks = 0; + + if (fseek(pTSBuf->f, pBlockInfo->offset, SEEK_SET) != 0) { + int code = TAOS_SYSTEM_ERROR(ferror(pTSBuf->f)); +// qError("%p: fseek failed: %s", pSql, tstrerror(code)); + return code; + } + + size_t s = fread(buf, 1, pBlockInfo->compLen, pTSBuf->f); + if (s != pBlockInfo->compLen) { + int code = TAOS_SYSTEM_ERROR(ferror(pTSBuf->f)); +// tscError("%p: fread didn't return expected data: %s", pSql, tstrerror(code)); + return code; + } + + *len = pBlockInfo->compLen; + *numOfBlocks = pBlockInfo->numOfBlocks; + + return TSDB_CODE_SUCCESS; +} + +STSElem tsBufFindElemStartPosByTag(STSBuf* pTSBuf, SVariant* pTag) { + STSElem el = {.id = -1}; + + for (int32_t i = 0; i < pTSBuf->numOfGroups; ++i) { + el = tsBufGetElemStartPos(pTSBuf, pTSBuf->pData[i].info.id, pTag); + if (el.id == pTSBuf->pData[i].info.id) { + return el; + } + } + + return el; +} + +bool tsBufIsValidElem(STSElem* pElem) { + return pElem->id >= 0; +} diff --git a/source/libs/parser/CMakeLists.txt b/source/libs/parser/CMakeLists.txt index 8f6d8d2880..155b72c1f9 100644 --- a/source/libs/parser/CMakeLists.txt +++ b/source/libs/parser/CMakeLists.txt @@ -8,5 +8,7 @@ target_include_directories( target_link_libraries( parser - PRIVATE os util common catalog transport -) \ No newline at end of file + PRIVATE os util common catalog function transport +) + +ADD_SUBDIRECTORY(test) \ No newline at end of file diff --git a/source/libs/parser/inc/astGenerator.h b/source/libs/parser/inc/astGenerator.h index af489a4e37..f7c7b9d6cc 100644 --- a/source/libs/parser/inc/astGenerator.h +++ b/source/libs/parser/inc/astGenerator.h @@ -42,12 +42,6 @@ enum SQL_NODE_FROM_TYPE { SQL_NODE_FROM_TABLELIST = 2, }; -//enum SQL_EXPR_FLAG { -// EXPR_FLAG_TS_ERROR = 1, -// EXPR_FLAG_NS_TIMESTAMP = 2, -// EXPR_FLAG_TIMESTAMP_VAR = 3, -//}; - extern char tTokenTypeSwitcher[13]; #define toTSDBType(x) \ @@ -239,7 +233,7 @@ typedef struct tSqlExpr { // The complete string of the function(col, param), and the function name is kept in exprToken struct { SToken operand; - struct SArray *paramList; // function parameters list + struct SArray *paramList; // function parameters list } Expr; SToken columnName; // table column info @@ -252,6 +246,7 @@ typedef struct tSqlExpr { // used in select clause. select from xxx typedef struct tSqlExprItem { tSqlExpr *pNode; // The list of expressions + int32_t functionId; char *aliasName; // alias name, null-terminated string bool distinct; } tSqlExprItem; @@ -267,7 +262,7 @@ SRelationInfo *addSubquery(SRelationInfo *pRelationInfo, SArray *pSub, SToken *p // sql expr leaf node tSqlExpr *tSqlExprCreateIdValue(SToken *pToken, int32_t optrType); tSqlExpr *tSqlExprCreateFunction(SArray *pParam, SToken *pFuncToken, SToken *endToken, int32_t optType); -SArray * tAppendFuncName(SArray *pList, SToken *pToken); +SArray * tRecordFuncName(SArray *pList, SToken *pToken); tSqlExpr *tSqlExprCreate(tSqlExpr *pLeft, tSqlExpr *pRight, int32_t optrType); tSqlExpr *tSqlExprClone(tSqlExpr *pSrc); @@ -277,6 +272,7 @@ bool tSqlExprIsParentOfLeaf(tSqlExpr *pExpr); void tSqlExprDestroy(tSqlExpr *pExpr); SArray * tSqlExprListAppend(SArray *pList, tSqlExpr *pNode, SToken *pDistinct, SToken *pToken); void tSqlExprListDestroy(SArray *pList); +void tSqlExprEvaluate(tSqlExpr* pExpr); SSqlNode *tSetQuerySqlNode(SToken *pSelectToken, SArray *pSelNodeList, SRelationInfo *pFrom, tSqlExpr *pWhere, SArray *pGroupby, SArray *pSortOrder, SIntervalVal *pInterval, SSessionWindowVal *ps, @@ -299,6 +295,8 @@ SArray *appendSelectClause(SArray *pList, void *pSubclause); void setCreatedTableName(SSqlInfo *pInfo, SToken *pTableNameToken, SToken *pIfNotExists); void* destroyCreateTableSql(SCreateTableSql* pCreate); +void setDropFuncInfo(SSqlInfo *pInfo, int32_t type, SToken* pToken); +void setCreateFuncInfo(SSqlInfo *pInfo, int32_t type, SToken *pName, SToken *pPath, SField *output, SToken* bufSize, int32_t funcType); void SqlInfoDestroy(SSqlInfo *pInfo); diff --git a/source/libs/parser/inc/parserInt.h b/source/libs/parser/inc/parserInt.h index ce1504b4e2..27c9140bcd 100644 --- a/source/libs/parser/inc/parserInt.h +++ b/source/libs/parser/inc/parserInt.h @@ -35,6 +35,21 @@ typedef struct SInsertStmtInfo { char *sql; // current sql statement position } SInsertStmtInfo; +typedef struct SInternalField { + TAOS_FIELD field; + bool visible; + SExprInfo *pExpr; +} SInternalField; + +typedef struct SMsgBuf { + int32_t len; + char *buf; +} SMsgBuf; + +void clearTableMetaInfo(STableMetaInfo* pTableMetaInfo); + +void clearAllTableMetaInfo(SQueryStmtInfo* pQueryInfo, bool removeMeta, uint64_t id); + /** * Validate the sql info, according to the corresponding metadata info from catalog. * @param pCatalog @@ -44,15 +59,33 @@ typedef struct SInsertStmtInfo { * @param msg * @return */ -int32_t qParserValidateSqlNode(struct SCatalog* pCatalog, SSqlInfo* pSqlInfo, SQueryStmtInfo* pQueryInfo, int64_t id, char* msg); +int32_t qParserValidateSqlNode(struct SCatalog* pCatalog, SSqlInfo* pSqlInfo, SQueryStmtInfo* pQueryInfo, int64_t id, char* msg, int32_t msgLen); /** - * - * @param pSqlNode - * @param pMetaInfo + * Evaluate the numeric and timestamp arithmetic expression in the WHERE clause. + * @param pNode + * @param tsPrecision + * @param msg + * @param msgBufLen * @return */ -int32_t qParserExtractRequestedMetaInfo(const struct SSqlNode* pSqlNode, SMetaReq* pMetaInfo); +int32_t evaluateSqlNode(SSqlNode* pNode, int32_t tsPrecision, SMsgBuf* pMsgBuf); + +int32_t validateSqlNode(SSqlNode* pSqlNode, SQueryStmtInfo* pQueryInfo, SMsgBuf* pMsgBuf); + +void initQueryInfo(SQueryStmtInfo* pQueryInfo); + +int32_t checkForInvalidExpr(SQueryStmtInfo* pQueryInfo, SMsgBuf* pMsgBuf); + +/** + * Extract request meta info from the sql statement + * @param pSqlInfo + * @param pMetaInfo + * @param msg + * @param msgBufLen + * @return + */ +int32_t qParserExtractRequestedMetaInfo(const SSqlInfo* pSqlInfo, SMetaReq* pMetaInfo, char* msg, int32_t msgBufLen); #ifdef __cplusplus } diff --git a/source/libs/parser/inc/parserUtil.h b/source/libs/parser/inc/parserUtil.h index 4e6af0a901..f37e84927b 100644 --- a/source/libs/parser/inc/parserUtil.h +++ b/source/libs/parser/inc/parserUtil.h @@ -20,6 +20,47 @@ extern "C" { #endif +#include "os.h" +#include "ttoken.h" +#include "parserInt.h" + +#define UTIL_TABLE_IS_SUPER_TABLE(metaInfo) \ + (((metaInfo)->pTableMeta != NULL) && ((metaInfo)->pTableMeta->tableType == TSDB_SUPER_TABLE)) + +#define UTIL_TABLE_IS_CHILD_TABLE(metaInfo) \ + (((metaInfo)->pTableMeta != NULL) && ((metaInfo)->pTableMeta->tableType == TSDB_CHILD_TABLE)) + +#define UTIL_TABLE_IS_NORMAL_TABLE(metaInfo) \ + (!(UTIL_TABLE_IS_SUPER_TABLE(metaInfo) || UTIL_TABLE_IS_CHILD_TABLE(metaInfo))) + +#define UTIL_TABLE_IS_TMP_TABLE(metaInfo) \ + (((metaInfo)->pTableMeta != NULL) && ((metaInfo)->pTableMeta->tableType == TSDB_TEMP_TABLE)) + +TAOS_FIELD createField(const SSchema* pSchema); +SSchema createSchema(uint8_t type, int16_t bytes, int16_t colId, const char* name); + +SInternalField* insertFieldInfo(SFieldInfo* pFieldInfo, int32_t index, SSchema* field); +int32_t getNumOfFields(SFieldInfo* pFieldInfo); +SInternalField* getInternalField(SFieldInfo* pFieldInfo, int32_t index); + +int32_t parserValidateIdToken(SToken* pToken); +int32_t buildInvalidOperationMsg(SMsgBuf* pMsgBuf, const char* msg); +int32_t buildSyntaxErrMsg(char* dst, int32_t dstBufLen, const char* additionalInfo, const char* sourceStr); + +int32_t createProjectionExpr(SQueryStmtInfo* pQueryInfo, STableMetaInfo* pTableMetaInfo, SExprInfo*** pExpr, int32_t* num); +STableMetaInfo* addEmptyMetaInfo(SQueryStmtInfo* pQueryInfo); + +void columnListCopyAll(SArray* dst, const SArray* src); + +void columnListDestroy(SArray* pColumnList); + +SColumn* columnListInsert(SArray* pColumnList, int32_t columnIndex, uint64_t uid, SSchema* pSchema); +SColumn* insertPrimaryTsColumn(SArray* pColumnList, uint64_t tableUid); + +void cleanupTagCond(STagCond* pTagCond); +void cleanupColumnCond(SArray** pCond); + +uint32_t convertRelationalOperator(SToken *pToken); #ifdef __cplusplus } diff --git a/source/libs/parser/inc/queryInfoUtil.h b/source/libs/parser/inc/queryInfoUtil.h new file mode 100644 index 0000000000..f1515189b7 --- /dev/null +++ b/source/libs/parser/inc/queryInfoUtil.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TDENGINE_QUERYINFOUTIL_H +#define TDENGINE_QUERYINFOUTIL_H + +#ifdef __cplusplus +extern "C" { +#endif +#include "parserInt.h" + +SSchema* getTbnameColumnSchema(); + +int32_t getNumOfColumns(const STableMeta* pTableMeta); +int32_t getNumOfTags(const STableMeta* pTableMeta); +SSchema *getTableColumnSchema(const STableMeta *pTableMeta); +SSchema *getTableTagSchema(const STableMeta* pTableMeta); +SSchema *getOneColumnSchema(const STableMeta* pTableMeta, int32_t colIndex); + +size_t getNumOfExprs(SQueryStmtInfo* pQueryInfo); +//SExprInfo* createExprInfo(STableMetaInfo* pTableMetaInfo, int16_t functionId, SColumnIndex* pColIndex, struct tExprNode* pParamExpr, SSchema* pResSchema, int16_t interSize); +SExprInfo* createBinaryExprInfo(struct tExprNode* pNode, SSchema* pResSchema); +void destroyExprInfoList(); + +void addExprInfo(SQueryStmtInfo* pQueryInfo, int32_t index, SExprInfo* pExprInfo); +void updateExprInfo(SExprInfo* pExprInfo, int16_t functionId, int32_t colId, int16_t srcColumnIndex, int16_t resType, int16_t resSize); + +SExprInfo* getExprInfo(SQueryStmtInfo* pQueryInfo, int32_t index); +int32_t copyAllExprInfo(SArray* dst, const SArray* src, bool deepcopy); + +void addExprInfoParam(SSqlExpr* pExpr, char* argument, int32_t type, int32_t bytes); + +void cleanupFieldInfo(SFieldInfo* pFieldInfo); + +STableComInfo getTableInfo(const STableMeta* pTableMeta); + +#ifdef __cplusplus +} +#endif + +#endif // TDENGINE_QUERYINFOUTIL_H diff --git a/source/libs/parser/inc/sql.y b/source/libs/parser/inc/sql.y index efad59f780..0296b1cde9 100644 --- a/source/libs/parser/inc/sql.y +++ b/source/libs/parser/inc/sql.y @@ -325,7 +325,7 @@ alter_db_optr(Y) ::= alter_db_optr(Z) cachelast(X). { Y = Z; Y.cachelast = str alter_topic_optr(Y) ::= alter_db_optr(Z). { Y = Z; Y.dbType = TSDB_DB_TYPE_TOPIC; } alter_topic_optr(Y) ::= alter_topic_optr(Z) partitions(X). { Y = Z; Y.partitions = strtol(X.z, NULL, 10); } -%type typename {TAOS_FIELD} +%type typename {SField} typename(A) ::= ids(X). { X.type = 0; tSetColumnType (&A, &X); @@ -425,11 +425,11 @@ create_table_args(A) ::= ifnotexists(U) ids(V) cpxName(Z) AS select(S). { setCreatedTableName(pInfo, &V, &U); } -%type column{TAOS_FIELD} +%type column{SField} %type columnlist{SArray*} %destructor columnlist {taosArrayDestroy($$);} columnlist(A) ::= columnlist(X) COMMA column(Y). {taosArrayPush(X, &Y); A = X; } -columnlist(A) ::= column(X). {A = taosArrayInit(4, sizeof(TAOS_FIELD)); taosArrayPush(A, &X);} +columnlist(A) ::= column(X). {A = taosArrayInit(4, sizeof(SField)); taosArrayPush(A, &X);} // The information used for a column is the name and type of column: // tinyint smallint int bigint float double bool timestamp binary(x) nchar(x) @@ -601,7 +601,7 @@ fill_opt(N) ::= FILL LP ID(Y) COMMA tagitemlist(X) RP. { toTSDBType(Y.type); taosVariantCreate(&A, Y.z, Y.n, Y.type); - tVariantListInsert(X, &A, -1, 0); + tListItemInsert(X, &A, -1, 0); N = X; } @@ -719,10 +719,10 @@ expr(A) ::= BOOL(X). { A = tSqlExprCreateIdValue(&X, TK_BOOL);} expr(A) ::= NULL(X). { A = tSqlExprCreateIdValue(&X, TK_NULL);} // ordinary functions: min(x), max(x), top(k, 20) -expr(A) ::= ID(X) LP exprlist(Y) RP(E). { tAppendFuncName(pInfo->funcs, &X); A = tSqlExprCreateFunction(Y, &X, &E, X.type); } +expr(A) ::= ID(X) LP exprlist(Y) RP(E). { tRecordFuncName(pInfo->funcs, &X); A = tSqlExprCreateFunction(Y, &X, &E, X.type); } // for parsing sql functions with wildcard for parameters. e.g., count(*)/first(*)/last(*) operation -expr(A) ::= ID(X) LP STAR RP(Y). { tAppendFuncName(pInfo->funcs, &X); A = tSqlExprCreateFunction(NULL, &X, &Y, X.type); } +expr(A) ::= ID(X) LP STAR RP(Y). { tRecordFuncName(pInfo->funcs, &X); A = tSqlExprCreateFunction(NULL, &X, &Y, X.type); } // is (not) null expression expr(A) ::= expr(X) IS NULL. {A = tSqlExprCreate(X, NULL, TK_ISNULL);} diff --git a/source/libs/parser/src/astGenerator.c b/source/libs/parser/src/astGenerator.c index bc4d13e02a..e9517758d1 100644 --- a/source/libs/parser/src/astGenerator.c +++ b/source/libs/parser/src/astGenerator.c @@ -18,52 +18,6 @@ #include "astGenerator.h" #include "tmsgtype.h" -int32_t tStrToInteger(const char* z, int16_t type, int32_t n, int64_t* value, bool issigned) { - errno = 0; - int32_t ret = 0; - - char* endPtr = NULL; - if (type == TK_FLOAT) { - double v = strtod(z, &endPtr); - if ((errno == ERANGE && v == HUGE_VALF) || isinf(v) || isnan(v)) { - ret = -1; - } else if ((issigned && (v < INT64_MIN || v > INT64_MAX)) || ((!issigned) && (v < 0 || v > UINT64_MAX))) { - ret = -1; - } else { - *value = (int64_t) round(v); - } - - errno = 0; - return ret; - } - - int32_t radix = 10; - if (type == TK_HEX) { - radix = 16; - } else if (type == TK_BIN) { - radix = 2; - } - - // the string may be overflow according to errno - if (!issigned) { - const char *p = z; - while(*p != 0 && *p == ' ') p++; - if (*p != 0 && *p == '-') { return -1;} - - *value = strtoull(z, &endPtr, radix); - } else { - *value = strtoll(z, &endPtr, radix); - } - - // not a valid integer number, return error - if (endPtr - z != n || errno == ERANGE) { - ret = -1; - } - - errno = 0; - return ret; -} - SArray *tListItemAppend(SArray *pList, SVariant *pVar, uint8_t sortOrder) { if (pList == NULL) { pList = taosArrayInit(4, sizeof(SListItem)); @@ -173,7 +127,6 @@ SRelationInfo *addSubquery(SRelationInfo *pRelationInfo, SArray *pSub, SToken *p } // sql expr leaf node -// todo Evalute the value during the validation process of AST. tSqlExpr *tSqlExprCreateIdValue(SToken *pToken, int32_t optrType) { tSqlExpr *pSqlExpr = calloc(1, sizeof(tSqlExpr)); @@ -189,34 +142,10 @@ tSqlExpr *tSqlExprCreateIdValue(SToken *pToken, int32_t optrType) { pSqlExpr->tokenId = optrType; pSqlExpr->type = SQL_NODE_VALUE; } else if (optrType == TK_INTEGER || optrType == TK_STRING || optrType == TK_FLOAT || optrType == TK_BOOL) { -// if (pToken) { -// toTSDBType(pToken->type); -// tVariantCreate(&pSqlExpr->value, pToken); -// } pSqlExpr->tokenId = optrType; pSqlExpr->type = SQL_NODE_VALUE; - } else if (optrType == TK_NOW) { - // use nanosecond by default TODO set value after getting database precision -// pSqlExpr->value.i64 = taosGetTimestamp(TSDB_TIME_PRECISION_NANO); -// pSqlExpr->value.nType = TSDB_DATA_TYPE_BIGINT; - pSqlExpr->tokenId = TK_TIMESTAMP; // TK_TIMESTAMP used to denote the time value is in microsecond - pSqlExpr->type = SQL_NODE_VALUE; -// pSqlExpr->flags |= 1 << EXPR_FLAG_NS_TIMESTAMP; - } else if (optrType == TK_VARIABLE) { - // use nanosecond by default - // TODO set value after getting database precision -// if (pToken) { -// char unit = 0; -// int32_t ret = parseAbsoluteDuration(pToken->z, pToken->n, &pSqlExpr->value.i64, &unit, TSDB_TIME_PRECISION_NANO); -// if (ret != TSDB_CODE_SUCCESS) { -// terrno = TSDB_CODE_TSC_SQL_SYNTAX_ERROR; -// } -// } - -// pSqlExpr->flags |= 1 << EXPR_FLAG_NS_TIMESTAMP; -// pSqlExpr->flags |= 1 << EXPR_FLAG_TIMESTAMP_VAR; -// pSqlExpr->value.nType = TSDB_DATA_TYPE_BIGINT; - pSqlExpr->tokenId = TK_TIMESTAMP; + } else if (optrType == TK_NOW || optrType == TK_VARIABLE) { + pSqlExpr->tokenId = optrType; // TK_TIMESTAMP used to denote this is a timestamp value pSqlExpr->type = SQL_NODE_VALUE; } else { // Here it must be the column name (tk_id) if it is not a number or string. @@ -252,7 +181,7 @@ tSqlExpr *tSqlExprCreateFunction(SArray *pParam, SToken *pFuncToken, SToken *end return pExpr; } -SArray *tAppendFuncName(SArray *pList, SToken *pToken) { +SArray *tRecordFuncName(SArray *pList, SToken *pToken) { assert(pList != NULL && pToken != NULL); taosArrayPush(pList, pToken); return pList; @@ -269,88 +198,7 @@ tSqlExpr *tSqlExprCreate(tSqlExpr *pLeft, tSqlExpr *pRight, int32_t optrType) { pExpr->exprToken.type = pLeft->exprToken.type; } - if ((pLeft != NULL && pRight != NULL) && - (optrType == TK_PLUS || optrType == TK_MINUS || optrType == TK_STAR || optrType == TK_DIVIDE || optrType == TK_REM)) { - /* - * if a exprToken is noted as the TK_TIMESTAMP, the time precision is microsecond - * Otherwise, the time precision is adaptive, determined by the time precision from databases. - */ - if ((pLeft->tokenId == TK_INTEGER && pRight->tokenId == TK_INTEGER) || - (pLeft->tokenId == TK_TIMESTAMP && pRight->tokenId == TK_TIMESTAMP)) { - pExpr->value.nType = TSDB_DATA_TYPE_BIGINT; - pExpr->tokenId = pLeft->tokenId; - pExpr->type = SQL_NODE_VALUE; - - switch (optrType) { - case TK_PLUS: { - pExpr->value.i64 = pLeft->value.i64 + pRight->value.i64; - break; - } - - case TK_MINUS: { - pExpr->value.i64 = pLeft->value.i64 - pRight->value.i64; - break; - } - case TK_STAR: { - pExpr->value.i64 = pLeft->value.i64 * pRight->value.i64; - break; - } - case TK_DIVIDE: { - pExpr->tokenId = TK_FLOAT; - pExpr->value.nType = TSDB_DATA_TYPE_DOUBLE; - pExpr->value.d = (double)pLeft->value.i64 / pRight->value.i64; - break; - } - case TK_REM: { - pExpr->value.i64 = pLeft->value.i64 % pRight->value.i64; - break; - } - } - - tSqlExprDestroy(pLeft); - tSqlExprDestroy(pRight); - } else if ((pLeft->tokenId == TK_FLOAT && pRight->tokenId == TK_INTEGER) || - (pLeft->tokenId == TK_INTEGER && pRight->tokenId == TK_FLOAT) || - (pLeft->tokenId == TK_FLOAT && pRight->tokenId == TK_FLOAT)) { - pExpr->value.nType = TSDB_DATA_TYPE_DOUBLE; - pExpr->tokenId = TK_FLOAT; - pExpr->type = SQL_NODE_VALUE; - - double left = (pLeft->value.nType == TSDB_DATA_TYPE_DOUBLE) ? pLeft->value.d : pLeft->value.i64; - double right = (pRight->value.nType == TSDB_DATA_TYPE_DOUBLE) ? pRight->value.d : pRight->value.i64; - - switch (optrType) { - case TK_PLUS: { - pExpr->value.d = left + right; - break; - } - case TK_MINUS: { - pExpr->value.d = left - right; - break; - } - case TK_STAR: { - pExpr->value.d = left * right; - break; - } - case TK_DIVIDE: { - pExpr->value.d = left / right; - break; - } - case TK_REM: { - pExpr->value.d = left - ((int64_t)(left / right)) * right; - break; - } - } - - tSqlExprDestroy(pLeft); - tSqlExprDestroy(pRight); - - } else { - pExpr->tokenId = optrType; - pExpr->pLeft = pLeft; - pExpr->pRight = pRight; - } - } else if (optrType == TK_IN) { + if (optrType == TK_IN) { pExpr->tokenId = optrType; pExpr->pLeft = pLeft; @@ -502,6 +350,105 @@ void tSqlExprListDestroy(SArray *pList) { taosArrayDestroyEx(pList, freeExprElem); } +void tSqlExprEvaluate(tSqlExpr* pExpr) { + tSqlExpr *pLeft = pExpr->pLeft; + tSqlExpr *pRight = pExpr->pRight; + + if (pLeft == NULL || pRight == NULL) { + return; + } + + int32_t optrType = pExpr->tokenId; + + if ((optrType == TK_PLUS || optrType == TK_MINUS || optrType == TK_STAR || optrType == TK_DIVIDE || + optrType == TK_REM)) { + /* + * if a exprToken is noted as the TK_TIMESTAMP, the time precision is microsecond + * Otherwise, the time precision is adaptive, determined by the time precision from databases. + */ + int32_t ltoken = pLeft->tokenId; + int32_t rtoken = pRight->tokenId; + + if ((ltoken == TK_INTEGER && rtoken == TK_INTEGER) || (ltoken == TK_TIMESTAMP && rtoken == TK_TIMESTAMP)) { + pExpr->value.nType = TSDB_DATA_TYPE_BIGINT; + pExpr->tokenId = ltoken; + pExpr->type = SQL_NODE_VALUE; + + switch (optrType) { + case TK_PLUS: { + pExpr->value.i64 = pLeft->value.i64 + pRight->value.i64; + break; + } + case TK_MINUS: { + pExpr->value.i64 = pLeft->value.i64 - pRight->value.i64; + break; + } + case TK_STAR: { + pExpr->value.i64 = pLeft->value.i64 * pRight->value.i64; + break; + } + case TK_DIVIDE: { + pExpr->tokenId = TK_FLOAT; + pExpr->value.nType = TSDB_DATA_TYPE_DOUBLE; + pExpr->value.d = (double)pLeft->value.i64 / pRight->value.i64; + break; + } + case TK_REM: { + pExpr->value.i64 = pLeft->value.i64 % pRight->value.i64; + break; + } + default: + assert(0); + } + + tSqlExprDestroy(pLeft); + tSqlExprDestroy(pRight); + + pExpr->pLeft = NULL; + pExpr->pRight = NULL; + } else if ((ltoken == TK_FLOAT && rtoken == TK_INTEGER) || (ltoken == TK_INTEGER && rtoken == TK_FLOAT) || + (ltoken == TK_FLOAT && rtoken == TK_FLOAT)) { + pExpr->value.nType = TSDB_DATA_TYPE_DOUBLE; + pExpr->tokenId = TK_FLOAT; + pExpr->type = SQL_NODE_VALUE; + + double left = (pLeft->value.nType == TSDB_DATA_TYPE_DOUBLE) ? pLeft->value.d : pLeft->value.i64; + double right = (pRight->value.nType == TSDB_DATA_TYPE_DOUBLE) ? pRight->value.d : pRight->value.i64; + + switch (optrType) { + case TK_PLUS: { + pExpr->value.d = left + right; + break; + } + case TK_MINUS: { + pExpr->value.d = left - right; + break; + } + case TK_STAR: { + pExpr->value.d = left * right; + break; + } + case TK_DIVIDE: { + pExpr->value.d = left / right; + break; + } + case TK_REM: { + pExpr->value.d = left - ((int64_t)(left / right)) * right; + break; + } + default: + assert(0); + } + + tSqlExprDestroy(pLeft); + tSqlExprDestroy(pRight); + + pExpr->pLeft = NULL; + pExpr->pRight = NULL; + } + } +} + SSqlNode *tSetQuerySqlNode(SToken *pSelectToken, SArray *pSelNodeList, SRelationInfo *pFrom, tSqlExpr *pWhere, SArray *pGroupby, SArray *pSortOrder, SIntervalVal *pInterval, SSessionWindowVal *pSession, SWindowStateVal *pWindowStateVal, SToken *pSliding, SArray *pFill, SLimit *pLimit, @@ -681,6 +628,7 @@ SAlterTableInfo *tSetAlterTableInfo(SToken *pTableName, SArray *pCols, SArray *p return pAlterTable; } + SCreatedTableInfo createNewChildTableInfo(SToken *pTableName, SArray *pTagNames, SArray *pTagVals, SToken *pToken, SToken* igExists) { SCreatedTableInfo info; memset(&info, 0, sizeof(SCreatedTableInfo)); @@ -762,6 +710,7 @@ SSqlInfo* setSqlInfo(SSqlInfo *pInfo, void *pSqlExprInfo, SToken *pTableName, in return pInfo; } + SArray* setSubclause(SArray* pList, void *pSqlNode) { if (pList == NULL) { pList = taosArrayInit(1, POINTER_BYTES); @@ -770,6 +719,7 @@ SArray* setSubclause(SArray* pList, void *pSqlNode) { taosArrayPush(pList, &pSqlNode); return pList; } + SArray* appendSelectClause(SArray *pList, void *pSubclause) { taosArrayPush(pList, &pSubclause); return pList; @@ -792,6 +742,34 @@ void* destroyCreateTableSql(SCreateTableSql* pCreate) { return NULL; } +void setDropFuncInfo(SSqlInfo *pInfo, int32_t type, SToken* pToken) { + pInfo->type = type; + + if (pInfo->pMiscInfo == NULL) { + pInfo->pMiscInfo = (SMiscInfo *)calloc(1, sizeof(SMiscInfo)); + pInfo->pMiscInfo->a = taosArrayInit(4, sizeof(SToken)); + } + + taosArrayPush(pInfo->pMiscInfo->a, pToken); +} + +void setCreateFuncInfo(SSqlInfo *pInfo, int32_t type, SToken *pName, SToken *pPath, SField *output, SToken* bufSize, int32_t funcType) { + pInfo->type = type; + if (pInfo->pMiscInfo == NULL) { + pInfo->pMiscInfo = calloc(1, sizeof(SMiscInfo)); + } + + pInfo->pMiscInfo->funcOpt.name = *pName; + pInfo->pMiscInfo->funcOpt.path = *pPath; + pInfo->pMiscInfo->funcOpt.output = *output; + pInfo->pMiscInfo->funcOpt.type = funcType; + if (bufSize->n > 0) { + pInfo->pMiscInfo->funcOpt.bufSize = strtol(bufSize->z, NULL, 10); + } else { + pInfo->pMiscInfo->funcOpt.bufSize = 0; + } +} + void SqlInfoDestroy(SSqlInfo *pInfo) { if (pInfo == NULL) return;; taosArrayDestroy(pInfo->funcs); @@ -840,6 +818,7 @@ void setDCLSqlElems(SSqlInfo *pInfo, int32_t type, int32_t nParam, ...) { va_end(va); } + void setDropDbTableInfo(SSqlInfo *pInfo, int32_t type, SToken* pToken, SToken* existsCheck, int16_t dbType, int16_t tableType) { pInfo->type = type; @@ -854,6 +833,7 @@ void setDropDbTableInfo(SSqlInfo *pInfo, int32_t type, SToken* pToken, SToken* e pInfo->pMiscInfo->dbType = dbType; pInfo->pMiscInfo->tableType = tableType; } + void setShowOptions(SSqlInfo *pInfo, int32_t type, SToken* prefix, SToken* pPatterns) { if (pInfo->pMiscInfo == NULL) { pInfo->pMiscInfo = calloc(1, sizeof(SMiscInfo)); @@ -903,6 +883,7 @@ void setCreateAcctSql(SSqlInfo *pInfo, int32_t type, SToken *pName, SToken *pPwd pInfo->pMiscInfo->user.passwd = *pPwd; } } + void setCreateUserSql(SSqlInfo *pInfo, SToken *pName, SToken *pPasswd) { pInfo->type = TSDB_SQL_CREATE_USER; if (pInfo->pMiscInfo == NULL) { @@ -914,6 +895,7 @@ void setCreateUserSql(SSqlInfo *pInfo, SToken *pName, SToken *pPasswd) { pInfo->pMiscInfo->user.user = *pName; pInfo->pMiscInfo->user.passwd = *pPasswd; } + void setKillSql(SSqlInfo *pInfo, int32_t type, SToken *id) { pInfo->type = type; if (pInfo->pMiscInfo == NULL) { @@ -980,7 +962,13 @@ void setDefaultCreateDbOption(SCreateDbInfo *pDBInfo) { memset(&pDBInfo->precision, 0, sizeof(SToken)); } -void setDefaultCreateTopicOption(SCreateDbInfo *pDBInfo); + +void setDefaultCreateTopicOption(SCreateDbInfo *pDBInfo) { + setDefaultCreateDbOption(pDBInfo); + + pDBInfo->dbType = TSDB_DB_TYPE_TOPIC; + pDBInfo->partitions = TSDB_DEFAULT_DB_PARTITON_OPTION; +} // prefix show db.tables; void tSetDbName(SToken *pCpxName, SToken *pDb) { diff --git a/source/libs/parser/src/astValidate.c b/source/libs/parser/src/astValidate.c index 7718dc0758..9d5ee21f72 100644 --- a/source/libs/parser/src/astValidate.c +++ b/source/libs/parser/src/astValidate.c @@ -13,14 +13,2753 @@ * along with this program. If not, see . */ - +#include +#include "astGenerator.h" +#include "function.h" #include "parserInt.h" +#include "parserUtil.h" +#include "queryInfoUtil.h" +#include "tbuffer.h" +#include "tglobal.h" +#include "tmsgtype.h" +#include "ttime.h" -int32_t qParserValidateSqlNode(struct SCatalog* pCatalog, SSqlInfo* pInfo, SQueryStmtInfo* pQueryInfo, int64_t id, char* msg) { - //1. if it is a query, get the meta info and continue. +#define TSQL_TBNAME_L "tbname" +#define DEFAULT_PRIMARY_TIMESTAMP_COL_NAME "_c0" +#define VALID_COLUMN_INDEX(index) (((index).tableIndex >= 0) && ((index).columnIndex >= TSDB_TBNAME_COLUMN_INDEX)) +// -1 is tbname column index, so here use the -2 as the initial value +#define COLUMN_INDEX_INITIAL_VAL (-2) +#define COLUMN_INDEX_INITIALIZER { COLUMN_INDEX_INITIAL_VAL, COLUMN_INDEX_INITIAL_VAL } +static int32_t validateSelectNodeList(SQueryStmtInfo* pQueryInfo, SArray* pSelNodeList, bool outerQuery, SMsgBuf* pMsgBuf); -// qParserExtractRequestedMetaInfo(pInfo->) +void setTokenAndResColumnName(tSqlExprItem* pItem, char* resColumnName, char* rawName, int32_t nameLength) { + memset(resColumnName, 0, nameLength); + + int32_t len = ((int32_t)pItem->pNode->exprToken.n < nameLength) ? (int32_t)pItem->pNode->exprToken.n : nameLength; + strncpy(rawName, pItem->pNode->exprToken.z, len); + + if (pItem->aliasName != NULL) { + assert(strlen(pItem->aliasName) < nameLength); + tstrncpy(resColumnName, pItem->aliasName, len); + } else { + strncpy(resColumnName, rawName, len); + } +} + +size_t tscNumOfExprs(SQueryStmtInfo* pQueryInfo) { + return taosArrayGetSize(pQueryInfo->exprList); +} + +static int32_t evaluateImpl(tSqlExpr* pExpr, int32_t tsPrecision) { + int32_t code = 0; + if (pExpr->type == SQL_NODE_EXPR) { + code = evaluateImpl(pExpr->pLeft, tsPrecision); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + code = evaluateImpl(pExpr->pRight, tsPrecision); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + if (pExpr->pLeft->type == SQL_NODE_VALUE && pExpr->pRight->type == SQL_NODE_VALUE) { + tSqlExpr* pLeft = pExpr->pLeft; + tSqlExpr* pRight = pExpr->pRight; + if ((pLeft->tokenId == TK_TIMESTAMP && (pRight->tokenId == TK_INTEGER || pRight->tokenId == TK_FLOAT)) || + ((pRight->tokenId == TK_TIMESTAMP && (pLeft->tokenId == TK_INTEGER || pLeft->tokenId == TK_FLOAT)))) { + return TSDB_CODE_TSC_SQL_SYNTAX_ERROR; + } else if (pLeft->tokenId == TK_TIMESTAMP && pRight->tokenId == TK_TIMESTAMP) { + tSqlExprEvaluate(pExpr); + } else { + tSqlExprEvaluate(pExpr); + } + } else { + // Other types of expressions are not evaluated, they will be handled during the validation of the abstract syntax tree. + } + } else if (pExpr->type == SQL_NODE_VALUE) { + if (pExpr->tokenId == TK_NOW) { + pExpr->value.i64 = taosGetTimestamp(tsPrecision); + pExpr->value.nType = TSDB_DATA_TYPE_BIGINT; + pExpr->tokenId = TK_TIMESTAMP; + } else if (pExpr->tokenId == TK_VARIABLE) { + char unit = 0; + SToken* pToken = &pExpr->exprToken; + int32_t ret = parseAbsoluteDuration(pToken->z, pToken->n, &pExpr->value.i64, &unit, tsPrecision); + if (ret != TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_SQL_SYNTAX_ERROR; + } + + pExpr->value.nType = TSDB_DATA_TYPE_BIGINT; + pExpr->tokenId = TK_TIMESTAMP; + } else if (pExpr->tokenId == TK_NULL) { + pExpr->value.nType = TSDB_DATA_TYPE_NULL; + } else if (pExpr->tokenId == TK_INTEGER || pExpr->tokenId == TK_STRING || pExpr->tokenId == TK_FLOAT || pExpr->tokenId == TK_BOOL) { + SToken* pToken = &pExpr->exprToken; + + int32_t tokenType = pToken->type; + toTSDBType(tokenType); + taosVariantCreate(&pExpr->value, pToken->z, pToken->n, tokenType); + } + + return TSDB_CODE_SUCCESS; + // other types of data are handled in the parent level. + } + + return TSDB_CODE_SUCCESS; +} + +typedef struct SVgroupTableInfo { + SVgroupMsg vgInfo; + SArray *itemList; // SArray +} SVgroupTableInfo; + +void freeVgroupTableInfo(SArray* pVgroupTables) { + if (pVgroupTables == NULL) { + return; + } + + size_t num = taosArrayGetSize(pVgroupTables); + for (size_t i = 0; i < num; i++) { + SVgroupTableInfo* pInfo = taosArrayGet(pVgroupTables, i); + taosArrayDestroy(pInfo->itemList); + } + + taosArrayDestroy(pVgroupTables); +} + +void destroyFilterInfo(SColumnFilterList* pFilterList) { + if (pFilterList->filterInfo == NULL) { + pFilterList->numOfFilters = 0; + return; + } + + for(int32_t i = 0; i < pFilterList->numOfFilters; ++i) { + if (pFilterList->filterInfo[i].filterstr) { + tfree(pFilterList->filterInfo[i].pz); + } + } + + tfree(pFilterList->filterInfo); + pFilterList->numOfFilters = 0; +} + +void columnDestroy(SColumn* pCol) { + destroyFilterInfo(&pCol->info.flist); + free(pCol); +} + +void destroyColumnList(SArray* pColumnList) { + if (pColumnList == NULL) { + return; + } + + size_t num = taosArrayGetSize(pColumnList); + for (int32_t i = 0; i < num; ++i) { + SColumn* pCol = taosArrayGetP(pColumnList, i); + columnDestroy(pCol); + } + + taosArrayDestroy(pColumnList); +} + +void clearTableMetaInfo(STableMetaInfo* pTableMetaInfo) { + if (pTableMetaInfo == NULL) { + return; + } + + tfree(pTableMetaInfo->pTableMeta); + tfree(pTableMetaInfo->vgroupList); + + destroyColumnList(pTableMetaInfo->tagColList); + pTableMetaInfo->tagColList = NULL; + + free(pTableMetaInfo); +} + +static STableMeta* extractTempTableMetaFromSubquery(SQueryStmtInfo* pUpstream) { + STableMetaInfo* pUpstreamTableMetaInfo /*= getMetaInfo(pUpstream, 0)*/; + + int32_t numOfColumns = pUpstream->fieldsInfo.numOfOutput; + STableMeta *meta = calloc(1, sizeof(STableMeta) + sizeof(SSchema) * numOfColumns); + meta->tableType = TSDB_TEMP_TABLE; + + STableComInfo *info = &meta->tableInfo; + info->numOfColumns = numOfColumns; + info->precision = pUpstreamTableMetaInfo->pTableMeta->tableInfo.precision; + info->numOfTags = 0; + + int32_t n = 0; + for(int32_t i = 0; i < numOfColumns; ++i) { +// SInternalField* pField = tscFieldInfoGetInternalField(&pUpstream->fieldsInfo, i); +// if (!pField->visible) { +// continue; +// } +// +// meta->schema[n].bytes = pField->field.bytes; +// meta->schema[n].type = pField->field.type; +// +// SExprInfo* pExpr = pField->pExpr; +// meta->schema[n].colId = pExpr->base.resColId; +// tstrncpy(meta->schema[n].name, pField->pExpr->base.aliasName, TSDB_COL_NAME_LEN); +// info->rowSize += meta->schema[n].bytes; +// +// n += 1; + } + + info->numOfColumns = n; + return meta; +} + +void initQueryInfo(SQueryStmtInfo* pQueryInfo) { + pQueryInfo->fieldsInfo.internalField = taosArrayInit(4, sizeof(SInternalField)); + pQueryInfo->exprList = taosArrayInit(4, POINTER_BYTES); + pQueryInfo->colList = taosArrayInit(4, POINTER_BYTES); + pQueryInfo->udColumnId = TSDB_UD_COLUMN_INDEX; + pQueryInfo->limit.limit = -1; + pQueryInfo->limit.offset = 0; + + pQueryInfo->slimit.limit = -1; + pQueryInfo->slimit.offset = 0; + pQueryInfo->pUpstream = taosArrayInit(4, POINTER_BYTES); + pQueryInfo->window = TSWINDOW_INITIALIZER; +} + +static int32_t doValidateSubquery(SSqlNode* pSqlNode, int32_t index, SQueryStmtInfo* pQueryInfo, SMsgBuf* pMsgBuf) { + SRelElementPair* subInfo = taosArrayGet(pSqlNode->from->list, index); + + // union all is not support currently + SSqlNode* p = taosArrayGetP(subInfo->pSubquery, 0); + if (taosArrayGetSize(subInfo->pSubquery) >= 2) { + return buildInvalidOperationMsg(pMsgBuf, "not support union in subquery"); + } + + SQueryStmtInfo* pSub = calloc(1, sizeof(SQueryStmtInfo)); + initQueryInfo(pSub); + + SArray *pUdfInfo = NULL; + if (pQueryInfo->pUdfInfo) { + pUdfInfo = taosArrayDup(pQueryInfo->pUdfInfo); + } + + pSub->pUdfInfo = pUdfInfo; + pSub->pDownstream = pQueryInfo; + int32_t code = validateSqlNode(p, pSub, pMsgBuf); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + // create dummy table meta info + STableMetaInfo* pTableMetaInfo1 = calloc(1, sizeof(STableMetaInfo)); + if (pTableMetaInfo1 == NULL) { + return TSDB_CODE_TSC_OUT_OF_MEMORY; + } + + pTableMetaInfo1->pTableMeta = extractTempTableMetaFromSubquery(pSub); + + if (subInfo->aliasName.n > 0) { + if (subInfo->aliasName.n >= TSDB_TABLE_FNAME_LEN) { + tfree(pTableMetaInfo1); + return buildInvalidOperationMsg(pMsgBuf, "subquery alias name too long"); + } + + tstrncpy(pTableMetaInfo1->aliasName, subInfo->aliasName.z, subInfo->aliasName.n + 1); + } + + taosArrayPush(pQueryInfo->pUpstream, &pSub); + + // NOTE: order mix up in subquery not support yet. + pQueryInfo->order = pSub->order; + + STableMetaInfo** tmp = realloc(pQueryInfo->pTableMetaInfo, (pQueryInfo->numOfTables + 1) * POINTER_BYTES); + if (tmp == NULL) { + tfree(pTableMetaInfo1); + return TSDB_CODE_TSC_OUT_OF_MEMORY; + } + + pQueryInfo->pTableMetaInfo = tmp; + + pQueryInfo->pTableMetaInfo[pQueryInfo->numOfTables] = pTableMetaInfo1; + pQueryInfo->numOfTables += 1; + + // all columns are added into the table column list + STableMeta* pMeta = pTableMetaInfo1->pTableMeta; + int32_t startOffset = (int32_t) taosArrayGetSize(pQueryInfo->colList); + + for(int32_t i = 0; i < pMeta->tableInfo.numOfColumns; ++i) { + columnListInsert(pQueryInfo->colList, i + startOffset, pMeta->uid, &pMeta->schema[i]); + } + + return TSDB_CODE_SUCCESS; +} + +int32_t getTableIndexImpl(SToken* pTableToken, SQueryStmtInfo* pQueryInfo, SColumnIndex* pIndex) { + if (pTableToken->n == 0) { // only one table and no table name prefix in column name + if (pQueryInfo->numOfTables == 1) { + pIndex->tableIndex = 0; + } else { + pIndex->tableIndex = COLUMN_INDEX_INITIAL_VAL; + } + + return TSDB_CODE_SUCCESS; + } + + pIndex->tableIndex = COLUMN_INDEX_INITIAL_VAL; + for (int32_t i = 0; i < pQueryInfo->numOfTables; ++i) { + STableMetaInfo* pTableMetaInfo = getMetaInfo(pQueryInfo, i); + char* name = pTableMetaInfo->aliasName; + if (strncasecmp(name, pTableToken->z, pTableToken->n) == 0 && strlen(name) == pTableToken->n) { + pIndex->tableIndex = i; + return TSDB_CODE_SUCCESS; + } + } + + return TSDB_CODE_TSC_INVALID_OPERATION; +} + +void extractTableNameFromToken(SToken* pToken, SToken* pTable) { + const char sep = TS_PATH_DELIMITER[0]; + + if (pToken == pTable || pToken == NULL || pTable == NULL) { + return; + } + + char* r = strnchr(pToken->z, sep, pToken->n, false); + + if (r != NULL) { // record the table name token + pTable->n = (uint32_t)(r - pToken->z); + pTable->z = pToken->z; + + r += 1; + pToken->n -= (uint32_t)(r - pToken->z); + pToken->z = r; + } +} + +int32_t getTableIndexByName(SToken* pToken, SQueryStmtInfo* pQueryInfo, SColumnIndex* pIndex) { + SToken tableToken = {0}; + extractTableNameFromToken(pToken, &tableToken); + + if (getTableIndexImpl(&tableToken, pQueryInfo, pIndex) != TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + return TSDB_CODE_SUCCESS; +} + +static int16_t doGetColumnIndex(SQueryStmtInfo* pQueryInfo, int32_t index, const SToken* pToken, int16_t* type) { + STableMeta* pTableMeta = getMetaInfo(pQueryInfo, index)->pTableMeta; + + int32_t numOfCols = getNumOfColumns(pTableMeta) + getNumOfTags(pTableMeta); + SSchema* pSchema = getTableColumnSchema(pTableMeta); + + int16_t columnIndex = COLUMN_INDEX_INITIAL_VAL; + + for (int32_t i = 0; i < numOfCols; ++i) { + if (pToken->n != strlen(pSchema[i].name)) { + continue; + } + + if (strncasecmp(pSchema[i].name, pToken->z, pToken->n) == 0) { + columnIndex = i; + break; + } + } + + *type = (columnIndex >= getNumOfColumns(pTableMeta))? TSDB_COL_TAG:TSDB_COL_NORMAL; + return columnIndex; +} + +static bool isTablenameToken(SToken* token) { + SToken tmpToken = *token; + SToken tableToken = {0}; + + extractTableNameFromToken(&tmpToken, &tableToken); + return (tmpToken.n == strlen(TSQL_TBNAME_L) && strncasecmp(TSQL_TBNAME_L, tmpToken.z, tmpToken.n) == 0); +} + +int32_t doGetColumnIndexByName(SToken* pToken, SQueryStmtInfo* pQueryInfo, SColumnIndex* pIndex, SMsgBuf* pMsgBuf) { + const char* msg0 = "ambiguous column name"; + const char* msg1 = "invalid column name"; + + pIndex->type = TSDB_COL_NORMAL; + + if (isTablenameToken(pToken)) { + pIndex->columnIndex = TSDB_TBNAME_COLUMN_INDEX; + pIndex->type = TSDB_COL_TAG; + } else if (strlen(DEFAULT_PRIMARY_TIMESTAMP_COL_NAME) == pToken->n && + strncasecmp(pToken->z, DEFAULT_PRIMARY_TIMESTAMP_COL_NAME, pToken->n) == 0) { + pIndex->columnIndex = PRIMARYKEY_TIMESTAMP_COL_INDEX; // just make runtime happy, need fix java test case InsertSpecialCharacterJniTest + } else if (pToken->n == 0) { + pIndex->columnIndex = PRIMARYKEY_TIMESTAMP_COL_INDEX; // just make runtime happy, need fix java test case InsertSpecialCharacterJniTest + } else { + // not specify the table name, try to locate the table index by column name + if (pIndex->tableIndex == COLUMN_INDEX_INITIAL_VAL) { + for (int16_t i = 0; i < pQueryInfo->numOfTables; ++i) { + int16_t colIndex = doGetColumnIndex(pQueryInfo, i, pToken, &pIndex->type); + + if (colIndex != COLUMN_INDEX_INITIAL_VAL) { + if (pIndex->columnIndex != COLUMN_INDEX_INITIAL_VAL) { + return buildInvalidOperationMsg(pMsgBuf, msg0); + } else { + pIndex->tableIndex = i; + pIndex->columnIndex = colIndex; + } + } + } + } else { // table index is valid, get the column index + pIndex->columnIndex = doGetColumnIndex(pQueryInfo, pIndex->tableIndex, pToken, &pIndex->type); + } + + if (pIndex->columnIndex == COLUMN_INDEX_INITIAL_VAL) { + return buildInvalidOperationMsg(pMsgBuf, msg1); + } + } + + if (VALID_COLUMN_INDEX(*pIndex)) { + return TSDB_CODE_SUCCESS; + } else { + return TSDB_CODE_TSC_INVALID_OPERATION; + } +} + +int32_t getColumnIndexByName(const SToken* pToken, SQueryStmtInfo* pQueryInfo, SColumnIndex* pIndex, SMsgBuf* pMsgBuf) { + if (pQueryInfo->pTableMetaInfo == NULL || pQueryInfo->numOfTables == 0) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + SToken tmpToken = *pToken; + if (getTableIndexByName(&tmpToken, pQueryInfo, pIndex) != TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + return doGetColumnIndexByName(&tmpToken, pQueryInfo, pIndex, pMsgBuf); +} + +int32_t validateGroupbyNode(SQueryStmtInfo* pQueryInfo, SArray* pList, SMsgBuf* pMsgBuf) { + const char* msg1 = "too many columns in group by clause"; + const char* msg2 = "invalid column name in group by clause"; + const char* msg3 = "columns from one table allowed as group by columns"; + const char* msg4 = "join query does not support group by"; + const char* msg5 = "not allowed column type for group by"; + const char* msg6 = "tags not allowed for table query"; + const char* msg7 = "not support group by expression"; + const char* msg8 = "normal column can only locate at the end of group by clause"; + + // todo : handle two tables situation + STableMetaInfo* pTableMetaInfo = NULL; + if (pList == NULL) { + return TSDB_CODE_SUCCESS; + } + + if (pQueryInfo->numOfTables > 1) { + return buildInvalidOperationMsg(pMsgBuf, msg4); + } + + SGroupbyExpr* pGroupExpr = &pQueryInfo->groupbyExpr; + if (pGroupExpr->columnInfo == NULL) { + pGroupExpr->columnInfo = taosArrayInit(4, sizeof(SColIndex)); + } + + if (pQueryInfo->colList == NULL) { + pQueryInfo->colList = taosArrayInit(4, POINTER_BYTES); + } + + if (pGroupExpr->columnInfo == NULL || pQueryInfo->colList == NULL) { + return TSDB_CODE_TSC_OUT_OF_MEMORY; + } + + int32_t numOfGroupCols = (int16_t) taosArrayGetSize(pList); + if (numOfGroupCols > TSDB_MAX_TAGS) { + return buildInvalidOperationMsg(pMsgBuf, msg1); + } + + SSchema *pSchema = NULL; + int32_t tableIndex = COLUMN_INDEX_INITIAL_VAL; + + size_t num = taosArrayGetSize(pList); + for (int32_t i = 0; i < num; ++i) { + SListItem * pItem = taosArrayGet(pList, i); + SVariant* pVar = &pItem->pVar; + + SToken token = {pVar->nLen, pVar->nType, pVar->pz}; + + SColumnIndex index = COLUMN_INDEX_INITIALIZER; + if (getColumnIndexByName(&token, pQueryInfo, &index, pMsgBuf) != TSDB_CODE_SUCCESS) { + return buildInvalidOperationMsg(pMsgBuf, msg2); + } + + if (tableIndex == COLUMN_INDEX_INITIAL_VAL) { + tableIndex = index.tableIndex; + } else if (tableIndex != index.tableIndex) { + return buildInvalidOperationMsg(pMsgBuf, msg3); + } + + pTableMetaInfo = getMetaInfo(pQueryInfo, index.tableIndex); + STableMeta* pTableMeta = pTableMetaInfo->pTableMeta; + + if (index.columnIndex == TSDB_TBNAME_COLUMN_INDEX) { + pSchema = getTbnameColumnSchema(); + } else { + pSchema = getOneColumnSchema(pTableMeta, index.columnIndex); + } + + int32_t numOfCols = getNumOfColumns(pTableMeta); + bool groupTag = (index.columnIndex == TSDB_TBNAME_COLUMN_INDEX || index.columnIndex >= numOfCols); + + if (groupTag) { + if (!UTIL_TABLE_IS_SUPER_TABLE(pTableMetaInfo)) { + return buildInvalidOperationMsg(pMsgBuf, msg6); + } + + int32_t relIndex = index.columnIndex; + if (index.columnIndex != TSDB_TBNAME_COLUMN_INDEX) { + relIndex -= numOfCols; + } + + SColIndex colIndex = { .colIndex = relIndex, .flag = TSDB_COL_TAG, .colId = pSchema->colId, }; + strncpy(colIndex.name, pSchema->name, tListLen(colIndex.name)); + taosArrayPush(pGroupExpr->columnInfo, &colIndex); + + index.columnIndex = relIndex; + columnListInsert(pTableMetaInfo->tagColList, index.columnIndex, pTableMeta->uid, pSchema); + } else { + // check if the column type is valid, here only support the bool/tinyint/smallint/bigint group by + if (pSchema->type == TSDB_DATA_TYPE_TIMESTAMP || pSchema->type == TSDB_DATA_TYPE_FLOAT || pSchema->type == TSDB_DATA_TYPE_DOUBLE) { + return buildInvalidOperationMsg(pMsgBuf, msg5); + } + + columnListInsert(pQueryInfo->colList, index.columnIndex, pTableMeta->uid, pSchema); + + SColIndex colIndex = { .colIndex = index.columnIndex, .flag = TSDB_COL_NORMAL, .colId = pSchema->colId }; + strncpy(colIndex.name, pSchema->name, tListLen(colIndex.name)); + + taosArrayPush(pGroupExpr->columnInfo, &colIndex); + pQueryInfo->groupbyExpr.orderType = TSDB_ORDER_ASC; + numOfGroupCols++; + } + } + + // 1. only one normal column allowed in the group by clause + // 2. the normal column in the group by clause can only located in the end position + if (numOfGroupCols > 1) { + return buildInvalidOperationMsg(pMsgBuf, msg7); + } + + for(int32_t i = 0; i < num; ++i) { + SColIndex* pIndex = taosArrayGet(pGroupExpr->columnInfo, i); + if (TSDB_COL_IS_NORMAL_COL(pIndex->flag) && i != num - 1) { + return buildInvalidOperationMsg(pMsgBuf, msg8); + } + } + + pQueryInfo->groupbyExpr.tableIndex = tableIndex; + return TSDB_CODE_SUCCESS; +} + +int32_t filterUnsupportedQueryFunction(SQueryStmtInfo* pQueryInfo, SMsgBuf* pMsgBuf) { + // todo NOT support yet + const char* msg6 = "not support stddev/percentile/interp in the outer query yet"; + const char* msg9 = "not support 3 level select"; + + for (int32_t i = 0; i < tscNumOfExprs(pQueryInfo); ++i) { + SExprInfo* pExpr = getExprInfo(pQueryInfo, i); + assert(pExpr->pExpr->nodeType == TEXPR_UNARYEXPR_NODE); + + int32_t f = pExpr->pExpr->_node.functionId; + if (f == FUNCTION_STDDEV || f == FUNCTION_PERCT || f == FUNCTION_INTERP) { + return buildInvalidOperationMsg(pMsgBuf, msg6); + } + + if (f == FUNCTION_BLKINFO && taosArrayGetSize(pQueryInfo->pUpstream) > 0) { + return buildInvalidOperationMsg(pMsgBuf, msg9); + } + + if (/*(timeWindowQuery || pQueryInfo->stateWindow) &&*/ f == FUNCTION_LAST) { + pExpr->base.numOfParams = 1; + pExpr->base.param[0].i64 = TSDB_ORDER_ASC; + pExpr->base.param[0].nType = TSDB_DATA_TYPE_INT; + } + } +} + +int32_t validateWhereNode(SQueryStmtInfo *pQueryInfo, tSqlExpr* pWhereExpr, SMsgBuf* pMsgBuf) { return 0; } + +// validate the interval info +int32_t validateIntervalNode(SQueryStmtInfo *pQueryInfo, SSqlNode* pSqlNode, SMsgBuf* pMsgBuf) { + return 0; +} + +int32_t validateSessionNode(SQueryStmtInfo *pQueryInfo, SSqlNode* pSqlNode, SMsgBuf* pMsgBuf) { + return 0; +} + +// parse the window_state +int32_t validateStateWindowNode(SQueryStmtInfo *pQueryInfo, SSqlNode* pSqlNode, SMsgBuf* pMsgBuf) { +return 0; +} + +// parse the having clause in the first place +int32_t validateHavingNode(SQueryStmtInfo *pQueryInfo, SSqlNode* pSqlNode, SMsgBuf* pMsgBuf) { + return 0; +} + +int32_t validateLimitNode(SQueryStmtInfo *pQueryInfo, SSqlNode* pSqlNode, SMsgBuf* pMsgBuf) { + return 0; +} + +// set order by info +int32_t validateOrderbyNode(SQueryStmtInfo *pQueryInfo, SSqlNode* pSqlNode, SMsgBuf* pMsgBuf) { + return 0; +} + +int32_t validateFillNode(SQueryStmtInfo *pQueryInfo, SSqlNode* pSqlNode, SMsgBuf* pMsgBuf) { + return 0; +} + +int32_t validateSqlNode(SSqlNode* pSqlNode, SQueryStmtInfo* pQueryInfo, SMsgBuf* pMsgBuf) { + assert(pSqlNode != NULL && (pSqlNode->from == NULL || taosArrayGetSize(pSqlNode->from->list) > 0)); + + const char* msg1 = "point interpolation query needs timestamp"; + const char* msg2 = "too many tables in from clause"; + const char* msg3 = "start(end) time of query range required or time range too large"; + const char* msg4 = "interval query not supported, since the result of sub query not include valid timestamp column"; + const char* msg5 = "only tag query not compatible with normal column filter"; + const char* msg7 = "derivative/twa/irate requires timestamp column exists in subquery"; + const char* msg8 = "condition missing for join query"; + + int32_t code = TSDB_CODE_SUCCESS; + + /* + * handle the sql expression without from subclause + * select server_status(); + * select server_version(); + * select client_version(); + * select database(); + */ + if (pSqlNode->from == NULL) { + assert(pSqlNode->fillType == NULL && pSqlNode->pGroupby == NULL && pSqlNode->pWhere == NULL && + pSqlNode->pSortOrder == NULL); + assert(0); +// return doLocalQueryProcess(pCmd, pQueryInfo, pSqlNode); + } + + if (pSqlNode->from->type == SQL_NODE_FROM_SUBQUERY) { + pQueryInfo->numOfTables = 0; + + // parse the subquery in the first place + int32_t numOfSub = (int32_t)taosArrayGetSize(pSqlNode->from->list); + for (int32_t i = 0; i < numOfSub; ++i) { + SRelElementPair* subInfo = taosArrayGet(pSqlNode->from->list, i); + code = doValidateSubquery(pSqlNode, i, pQueryInfo, pMsgBuf); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + + int32_t timeWindowQuery = + (TPARSER_HAS_TOKEN(pSqlNode->interval.interval) || TPARSER_HAS_TOKEN(pSqlNode->sessionVal.gap)); +// TSDB_QUERY_SET_TYPE(pQueryInfo->type, TSDB_QUERY_TYPE_TABLE_QUERY); + + // parse the group by clause in the first place + if (validateGroupbyNode(pQueryInfo, pSqlNode->pGroupby, pMsgBuf) != TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + if (validateSelectNodeList(pQueryInfo, pSqlNode->pSelNodeList, true, pMsgBuf) != + TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + code = filterUnsupportedQueryFunction(pQueryInfo, pMsgBuf); + + STableMeta* pTableMeta = getMetaInfo(pQueryInfo, 0)->pTableMeta; + SSchema* pSchema = getOneColumnSchema(pTableMeta, 0); + + if (pSchema->type != TSDB_DATA_TYPE_TIMESTAMP) { + int32_t numOfExprs = (int32_t)tscNumOfExprs(pQueryInfo); + + for (int32_t i = 0; i < numOfExprs; ++i) { + SExprInfo* pExpr = getExprInfo(pQueryInfo, i); + + int32_t f = pExpr->pExpr->_node.functionId; + if (f == FUNCTION_DERIVATIVE || f == FUNCTION_TWA || f == FUNCTION_IRATE) { + return buildInvalidOperationMsg(pMsgBuf, msg7); + } + } + } + + // validate the query filter condition info + if (pSqlNode->pWhere != NULL) { + if (validateWhereNode(pQueryInfo, pSqlNode->pWhere, pMsgBuf) != TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + } else { + if (pQueryInfo->numOfTables > 1) { + return buildInvalidOperationMsg(pMsgBuf, msg8); + } + } + + // validate the interval info + if (validateIntervalNode(pQueryInfo, pSqlNode, pMsgBuf) != TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } else { + if (validateSessionNode(pQueryInfo, pSqlNode, pMsgBuf) != TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + // parse the window_state + if (validateStateWindowNode(pQueryInfo, pSqlNode, pMsgBuf) != TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + +// if (isTimeWindowQuery(pQueryInfo)) { +// // check if the first column of the nest query result is timestamp column +// SColumn* pCol = taosArrayGetP(pQueryInfo->colList, 0); +// if (pCol->info.type != TSDB_DATA_TYPE_TIMESTAMP) { +// return buildInvalidOperationMsg(pMsgBuf, msg4); +// } +// +// if (validateFunctionsInIntervalOrGroupbyQuery(pCmd, pQueryInfo) != TSDB_CODE_SUCCESS) { +// return TSDB_CODE_TSC_INVALID_OPERATION; +// } +// } + } + + // parse the having clause in the first place + int32_t joinQuery = (pSqlNode->from != NULL && taosArrayGetSize(pSqlNode->from->list) > 1); + if (validateHavingNode(pQueryInfo, pSqlNode, pMsgBuf) != + TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + if ((code = validateLimitNode(pQueryInfo, pSqlNode, pMsgBuf)) != TSDB_CODE_SUCCESS) { + return code; + } + + // set order by info + if (validateOrderbyNode(pQueryInfo, pSqlNode, pMsgBuf) != TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + if ((code = validateFillNode(pQueryInfo, pSqlNode, pMsgBuf)) != TSDB_CODE_SUCCESS) { + return code; + } + } else { + pQueryInfo->command = TSDB_SQL_SELECT; + + size_t numOfTables = taosArrayGetSize(pSqlNode->from->list); + if (numOfTables > TSDB_MAX_JOIN_TABLE_NUM) { + return buildInvalidOperationMsg(pMsgBuf, msg2); + } + + STableMetaInfo* pTableMetaInfo = getMetaInfo(pQueryInfo, 0); + bool isSTable = UTIL_TABLE_IS_SUPER_TABLE(pTableMetaInfo); + + int32_t type = isSTable? TSDB_QUERY_TYPE_STABLE_QUERY:TSDB_QUERY_TYPE_TABLE_QUERY; + TSDB_QUERY_SET_TYPE(pQueryInfo->type, type); + + // parse the group by clause in the first place + if (validateGroupbyNode(pQueryInfo, pSqlNode->pGroupby, pMsgBuf) != TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + pQueryInfo->onlyHasTagCond = true; + + // set where info + if (pSqlNode->pWhere != NULL) { + if (validateWhereNode(pQueryInfo, pSqlNode->pWhere, pMsgBuf) != TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + pSqlNode->pWhere = NULL; + } else { + if (taosArrayGetSize(pSqlNode->from->list) > 1) { // Cross join not allowed yet + return buildInvalidOperationMsg(pMsgBuf, "cross join not supported yet"); + } + } + + if (validateSelectNodeList(pQueryInfo, pSqlNode->pSelNodeList, false, pMsgBuf) != + TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + if (isSTable && (pQueryInfo) && pQueryInfo->distinct && !pQueryInfo->onlyHasTagCond) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + // parse the window_state + if (validateStateWindowNode(pQueryInfo, pSqlNode, pMsgBuf) != TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + // set order by info + if (validateOrderbyNode(pQueryInfo, pSqlNode, pMsgBuf) != + TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + // set interval value + if (validateIntervalNode(pQueryInfo, pSqlNode, pMsgBuf) != TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + // parse the having clause in the first place + if (validateHavingNode(pQueryInfo, pSqlNode, pMsgBuf) != + TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + /* + * transfer sql functions that need secondary merge into another format + * in dealing with super table queries such as: count/first/last + */ + if (validateSessionNode(pQueryInfo, pSqlNode, pMsgBuf) != TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + +// if (isTimeWindowQuery(pQueryInfo) && (validateFunctionsInIntervalOrGroupbyQuery(pCmd, pQueryInfo) != TSDB_CODE_SUCCESS)) { +// return TSDB_CODE_TSC_INVALID_OPERATION; +// } + + // no result due to invalid query time range + if (pQueryInfo->window.skey > pQueryInfo->window.ekey) { + pQueryInfo->command = TSDB_SQL_RETRIEVE_EMPTY_RESULT; + return TSDB_CODE_SUCCESS; + } + + if ((code = validateLimitNode(pQueryInfo, pSqlNode, pMsgBuf)) != TSDB_CODE_SUCCESS) { + return code; + } + + if ((code = validateFillNode(pQueryInfo, pSqlNode, pMsgBuf)) != TSDB_CODE_SUCCESS) { + return code; + } + } + + // set the query info + SExprInfo** p = NULL; + int32_t numOfExpr = 0; + STableMetaInfo* pTableMetaInfo = getMetaInfo(pQueryInfo, 0); + code = createProjectionExpr(pQueryInfo, pTableMetaInfo, &p, &numOfExpr); + if (pQueryInfo->exprList1 == NULL) { + pQueryInfo->exprList1 = taosArrayInit(4, POINTER_BYTES); + } + + taosArrayAddBatch(pQueryInfo->exprList1, (void*) p, numOfExpr); + tfree(p); + + return TSDB_CODE_SUCCESS; // Does not build query message here +} + +int32_t checkForInvalidExpr(SQueryStmtInfo* pQueryInfo, SMsgBuf* pMsgBuf) { + assert(pQueryInfo != NULL && pMsgBuf != NULL); + return 0; +} + +static int32_t distinctCompatibleCheck(SQueryStmtInfo* pQueryInfo, bool joinQuery, SMsgBuf* pMsgBuf) { + const char* msg6 = "not support distinct mixed with join"; + const char* msg7 = "not support distinct mixed with groupby"; + const char* msg8 = "not support distinct in nest query"; + + if (pQueryInfo->distinct) { + if (joinQuery) { + return buildInvalidOperationMsg(pMsgBuf, msg6); + } + + if (taosArrayGetSize(pQueryInfo->groupbyExpr.columnInfo) != 0) { + return buildInvalidOperationMsg(pMsgBuf, msg7); + } + + if (pQueryInfo->pDownstream != NULL) { + return buildInvalidOperationMsg(pMsgBuf, msg8); + } + } +} + +static int32_t resColId = 5000; +int32_t getNewResColId() { + return resColId++; +} + +int32_t addResColumnInfo(SQueryStmtInfo* pQueryInfo, int32_t outputIndex, SSchema* pSchema, SExprInfo* pSqlExpr) { + SInternalField* pInfo = insertFieldInfo(&pQueryInfo->fieldsInfo, outputIndex, pSchema); + pInfo->pExpr = pSqlExpr; + return TSDB_CODE_SUCCESS; +} + +void setResultColName(char* name, tSqlExprItem* pItem, SToken* pToken, SToken* functionToken, bool multiCols) { + if (pItem->aliasName != NULL) { + tstrncpy(name, pItem->aliasName, TSDB_COL_NAME_LEN); + } else if (multiCols) { + char uname[TSDB_COL_NAME_LEN] = {0}; + int32_t len = MIN(pToken->n + 1, TSDB_COL_NAME_LEN); + tstrncpy(uname, pToken->z, len); + + if (tsKeepOriginalColumnName) { // keep the original column name + tstrncpy(name, uname, TSDB_COL_NAME_LEN); + } else { + const int32_t size = TSDB_COL_NAME_LEN + FUNCTIONS_NAME_MAX_LENGTH + 2 + 1; + char tmp[TSDB_COL_NAME_LEN + FUNCTIONS_NAME_MAX_LENGTH + 2 + 1] = {0}; + + char f[FUNCTIONS_NAME_MAX_LENGTH] = {0}; + strncpy(f, functionToken->z, functionToken->n); + + snprintf(tmp, size, "%s(%s)", f, uname); + tstrncpy(name, tmp, TSDB_COL_NAME_LEN); + } + } else { // use the user-input result column name + int32_t len = MIN(pItem->pNode->exprToken.n + 1, TSDB_COL_NAME_LEN); + tstrncpy(name, pItem->pNode->exprToken.z, len); + } +} + +SExprInfo* doAddOneExprInfo(SQueryStmtInfo* pQueryInfo, int32_t outputColIndex, int16_t functionId, SColumnIndex* pIndex, + SSchema* pColSchema, SSchema* pResultSchema, tExprNode* pExprNode, int32_t interSize, const char* token) { + STableMetaInfo* pTableMetaInfo = getMetaInfo(pQueryInfo, pIndex->tableIndex); + + SExprInfo* pExpr = createExprInfo(pTableMetaInfo, functionId, pIndex, pExprNode, pResultSchema, interSize); + addExprInfo(pQueryInfo, outputColIndex, pExpr); + + tstrncpy(pExpr->base.token, token, sizeof(pExpr->base.token)); + + pExpr->base.colInfo.flag = pIndex->type; + + SArray* p = TSDB_COL_IS_TAG(pIndex->type)?pTableMetaInfo->tagColList:pQueryInfo->colList; + columnListInsert(p, pIndex->columnIndex, pTableMetaInfo->pTableMeta->uid, pColSchema); + + addResColumnInfo(pQueryInfo, outputColIndex, pColSchema, pExpr); + return pExpr; +} + +void doAddSourceColumnAndResColumn(SQueryStmtInfo* pQueryInfo, SColumnIndex* index, int32_t outputIndex, SExprInfo* pExpr, SSchema* pColSchema, bool finalResult) { + columnListInsert(pQueryInfo->colList, index->columnIndex, pExpr->base.uid, pColSchema); + + if (finalResult) { + addResColumnInfo(pQueryInfo, outputIndex, &pExpr->base.resSchema, pExpr); + } + + if (TSDB_COL_IS_NORMAL_COL(index->type)) { + insertPrimaryTsColumn(pQueryInfo->colList, pExpr->base.uid); + } +} + +static int32_t addOneExprInfo(SQueryStmtInfo* pQueryInfo, tSqlExprItem* pItem, int32_t functionId, + int32_t resColIdx, SColumnIndex* pColIndex, bool finalResult, SMsgBuf* pMsgBuf) { + const char* msg1 = "not support column types"; + + STableMetaInfo* pTableMetaInfo = getMetaInfo(pQueryInfo, pColIndex->tableIndex); + SSchema* pSchema = getOneColumnSchema(pTableMetaInfo->pTableMeta, pColIndex->columnIndex); + + if (functionId == FUNCTION_SPREAD) { + if (IS_VAR_DATA_TYPE(pSchema->type) || pSchema->type == TSDB_DATA_TYPE_BOOL) { + return buildInvalidOperationMsg(pMsgBuf, msg1); + } + } + + char name[TSDB_COL_NAME_LEN] = {0}; + SToken t = {.z = pSchema->name, .n = (uint32_t)strnlen(pSchema->name, TSDB_COL_NAME_LEN)}; + setResultColName(name, pItem, &t, &pItem->pNode->Expr.operand, true); + + SResultDataInfo resInfo = {0}; + getResultDataInfo(pSchema->type, pSchema->bytes, functionId, 0, &resInfo, 0, false); + + SSchema resultSchema = createSchema(resInfo.type, resInfo.bytes, getNewResColId(), name); + doAddOneExprInfo(pQueryInfo, resColIdx, functionId, pColIndex, pSchema, &resultSchema, NULL, resInfo.intermediateBytes, name); + return TSDB_CODE_SUCCESS; +} + +static int32_t checkForAliasName(SMsgBuf* pMsgBuf, char* aliasName) { + const char* msg1 = "column alias name too long"; + if (aliasName != NULL && strlen(aliasName) >= TSDB_COL_NAME_LEN) { + return buildInvalidOperationMsg(pMsgBuf, msg1); + } + + return TSDB_CODE_SUCCESS; +} + +static int32_t validateComplexExpr(tSqlExpr* pExpr, SQueryStmtInfo* pQueryInfo, SArray* pColList, int32_t* type, SMsgBuf* pMsgBuf); +static int32_t sqlExprToExprNode(tExprNode **pExpr, const tSqlExpr* pSqlExpr, SQueryStmtInfo* pQueryInfo, SArray* pCols, uint64_t *uid, SMsgBuf* pMsgBuf); + +static int64_t getTickPerSecond(SVariant* pVariant, int32_t precision, int64_t* tickPerSec, SMsgBuf *pMsgBuf) { + const char* msg10 = "derivative duration should be greater than 1 Second"; + + if (taosVariantDump(pVariant, (char*) tickPerSec, TSDB_DATA_TYPE_BIGINT, true) < 0) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + if (precision == TSDB_TIME_PRECISION_MILLI) { + *tickPerSec /= TSDB_TICK_PER_SECOND(TSDB_TIME_PRECISION_MICRO); + } else if (precision == TSDB_TIME_PRECISION_MICRO) { + *tickPerSec /= TSDB_TICK_PER_SECOND(TSDB_TIME_PRECISION_MILLI); + } + + if (*tickPerSec <= 0 || *tickPerSec < TSDB_TICK_PER_SECOND(precision)) { + return buildInvalidOperationMsg(pMsgBuf, msg10); + } + + return TSDB_CODE_SUCCESS; +} + +// set the first column ts for top/bottom query +static void setTsOutputExprInfo(SQueryStmtInfo* pQueryInfo, STableMetaInfo* pTableMetaInfo, int32_t outputIndex, int32_t tableIndex) { + SColumnIndex indexTS = {.tableIndex = tableIndex, .columnIndex = PRIMARYKEY_TIMESTAMP_COL_INDEX, .type = TSDB_COL_NORMAL}; + SSchema s = createSchema(TSDB_DATA_TYPE_TIMESTAMP, TSDB_KEYSIZE, getNewResColId(), "ts"); + + SExprInfo* pExpr = createExprInfo(pTableMetaInfo, FUNCTION_TS_DUMMY, &indexTS, NULL, &s, TSDB_KEYSIZE); + addExprInfo(pQueryInfo, outputIndex, pExpr); + + SSchema* pSourceSchema = getOneColumnSchema(pTableMetaInfo->pTableMeta, indexTS.columnIndex); + columnListInsert(pQueryInfo->colList, indexTS.columnIndex, pTableMetaInfo->pTableMeta->uid, pSourceSchema); + addResColumnInfo(pQueryInfo, outputIndex, &pExpr->base.resSchema, pExpr); +} + +// todo handle count(a+b) +static int32_t setColumnIndex(SQueryStmtInfo* pQueryInfo, SArray* pParamList, SColumnIndex* index, SMsgBuf* pMsgBuf) { + const char* msg3 = "illegal column name"; + const char* msg4 = "invalid table name"; + + if (pParamList != NULL) { + tSqlExprItem* pParamElem = taosArrayGet(pParamList, 0); + + SToken* pToken = &pParamElem->pNode->columnName; + int16_t tokenId = pParamElem->pNode->tokenId; + if ((pToken->z == NULL || pToken->n == 0) && (TK_INTEGER != tokenId)) { + return buildInvalidOperationMsg(pMsgBuf, msg3); + } + + // select count(table.*), select count(1), count(2) + if (tokenId == TK_ALL || tokenId == TK_INTEGER) { + // check if the table name is valid or not + SToken tmpToken = pParamElem->pNode->columnName; + if (getTableIndexByName(&tmpToken, pQueryInfo, index) != TSDB_CODE_SUCCESS) { + return buildInvalidOperationMsg(pMsgBuf, msg4); + } + + index->columnIndex = PRIMARYKEY_TIMESTAMP_COL_INDEX; + } else { + // count the number of table created according to the super table + if (getColumnIndexByName(pToken, pQueryInfo, index, pMsgBuf) != TSDB_CODE_SUCCESS) { + return buildInvalidOperationMsg(pMsgBuf, msg3); + } + } + } else { // count(*) is equalled to count(primary_timestamp_key) + *index = (SColumnIndex) {0, PRIMARYKEY_TIMESTAMP_COL_INDEX, false}; + } + + return TSDB_CODE_SUCCESS; +} + +static int32_t doAddAllColumnExprInSelectClause(SQueryStmtInfo *pQueryInfo, STableMetaInfo* pTableMetaInfo, tSqlExprItem* pItem, int32_t functionId, + int32_t tableIndex, int32_t* colIndex, bool finalResult, SMsgBuf* pMsgBuf) { + for (int32_t i = 0; i < getNumOfColumns(pTableMetaInfo->pTableMeta); ++i) { + SColumnIndex index = {.tableIndex = tableIndex, .columnIndex = i, .type = TSDB_COL_NORMAL}; + + if (addOneExprInfo(pQueryInfo, pItem, functionId, *colIndex, &index, finalResult, pMsgBuf) != 0) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + (*colIndex)++; + } +} + +static int32_t extractFunctionParameterInfo(SQueryStmtInfo* pQueryInfo, int32_t tokenId, STableMetaInfo** pTableMetaInfo, SSchema* columnSchema, + tExprNode** pNode, SColumnIndex* pIndex, tSqlExprItem* pParamElem, SMsgBuf* pMsgBuf); + +static int32_t doHandleOneParam(SQueryStmtInfo *pQueryInfo, tSqlExprItem* pItem, tSqlExprItem* pParamElem, int32_t functionId, + int32_t* outputIndex, bool finalResult, SMsgBuf* pMsgBuf) { + const char* msg3 = "illegal column name"; + const char* msg4 = "invalid table name"; + const char* msg6 = "functions applied to tags are not allowed"; + + SColumnIndex index = COLUMN_INDEX_INITIALIZER; + + if (pParamElem->pNode->tokenId == TK_ALL) { // select table.* + SToken tmpToken = pParamElem->pNode->columnName; + + if (getTableIndexByName(&tmpToken, pQueryInfo, &index) != TSDB_CODE_SUCCESS) { + return buildInvalidOperationMsg(pMsgBuf, msg4); + } + + STableMetaInfo* pTableMetaInfo = getMetaInfo(pQueryInfo, index.tableIndex); + doAddAllColumnExprInSelectClause(pQueryInfo, pTableMetaInfo, pItem, functionId, index.tableIndex, outputIndex, finalResult, pMsgBuf); + } else { + tExprNode* pNode = NULL; + int32_t tokenId = pParamElem->pNode->tokenId; + SSchema columnSchema = {0}; + STableMetaInfo* pTableMetaInfo = {0}; + + extractFunctionParameterInfo(pQueryInfo, tokenId, &pTableMetaInfo, &columnSchema, &pNode, &index, pParamElem, pMsgBuf); + + if (getColumnIndexByName(&pParamElem->pNode->columnName, pQueryInfo, &index, pMsgBuf) != TSDB_CODE_SUCCESS) { + return buildInvalidOperationMsg(pMsgBuf, msg3); + } + + // functions can not be applied to tags + if (TSDB_COL_IS_TAG(index.type)) { + return buildInvalidOperationMsg(pMsgBuf, msg6); + } + + if (addOneExprInfo(pQueryInfo, pItem, functionId, (*outputIndex)++, &index, finalResult, pMsgBuf) != 0) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + } +} + +static int32_t multiColumnListInsert(SQueryStmtInfo* pQueryInfo, SArray* pColumnList, SMsgBuf* pMsgBuf); + +int32_t extractFunctionParameterInfo(SQueryStmtInfo* pQueryInfo, int32_t tokenId, STableMetaInfo** pTableMetaInfo, SSchema* columnSchema, + tExprNode** pNode, SColumnIndex* pIndex, tSqlExprItem* pParamElem, SMsgBuf* pMsgBuf) { + const char* msg1 = "not support column types"; + const char* msg2 = "invalid parameters"; + const char* msg3 = "illegal column name"; + const char* msg6 = "functions applied to tags are not allowed"; + const char* msg13 = "nested function is not supported"; + + if (tokenId == TK_ALL || tokenId == TK_ID) { // simple parameter + if ((getColumnIndexByName(&pParamElem->pNode->columnName, pQueryInfo, pIndex, pMsgBuf) != TSDB_CODE_SUCCESS)) { + return buildInvalidOperationMsg(pMsgBuf, msg3); + } + + // functions can not be applied to tags + if (TSDB_COL_IS_TAG(pIndex->type)) { + return buildInvalidOperationMsg(pMsgBuf, msg6); + } + + *pTableMetaInfo = getMetaInfo(pQueryInfo, pIndex->tableIndex); + + // 2. check if sql function can be applied on this column data type + *columnSchema = *(SSchema*) getOneColumnSchema((*pTableMetaInfo)->pTableMeta, pIndex->columnIndex); + } else if (tokenId == TK_PLUS || tokenId == TK_MINUS || tokenId == TK_STAR || tokenId == TK_REM || tokenId == TK_CONCAT) { + int32_t arithmeticType = NON_ARITHMEIC_EXPR; + SArray* pColumnList = taosArrayInit(4, sizeof(SColumnIndex)); + if (validateComplexExpr(pParamElem->pNode, pQueryInfo, pColumnList, &arithmeticType, pMsgBuf) != TSDB_CODE_SUCCESS) { + return buildInvalidOperationMsg(pMsgBuf, msg1); + } + + if (arithmeticType != NORMAL_ARITHMETIC) { + return buildInvalidOperationMsg(pMsgBuf, msg13); + } + + *pTableMetaInfo = getMetaInfo(pQueryInfo, 0); // todo get the first table meta. + *columnSchema = createSchema(TSDB_DATA_TYPE_DOUBLE, sizeof(double), getNewResColId(), ""); + + SArray* colList = taosArrayInit(10, sizeof(SColIndex)); + int32_t ret = sqlExprToExprNode(pNode, pParamElem->pNode, pQueryInfo, colList, NULL, pMsgBuf); + if (ret != TSDB_CODE_SUCCESS) { + taosArrayDestroy(colList); + tExprTreeDestroy(*pNode, NULL); + return buildInvalidOperationMsg(pMsgBuf, msg2); + } + + pIndex->tableIndex = 0; + multiColumnListInsert(pQueryInfo, pColumnList, pMsgBuf); + } else { + assert(0); + } + + return TSDB_CODE_SUCCESS; +} + +static int32_t checkForkParam(tSqlExpr* pSqlExpr, size_t k, SMsgBuf* pMsgBuf) { + const char* msg1 = "invalid parameters"; + + if (k == 0) { + if (pSqlExpr->Expr.paramList != NULL && taosArrayGetSize(pSqlExpr->Expr.paramList) != 0) { + return buildInvalidOperationMsg(pMsgBuf, msg1); + } + } else { + if (pSqlExpr->Expr.paramList == NULL || taosArrayGetSize(pSqlExpr->Expr.paramList) != k) { + return buildInvalidOperationMsg(pMsgBuf, msg1); + } + } + return TSDB_CODE_SUCCESS; +} + +int32_t addExprAndResColumn(SQueryStmtInfo* pQueryInfo, int32_t colIndex, tSqlExprItem* pItem, bool finalResult, SMsgBuf* pMsgBuf) { + STableMetaInfo* pTableMetaInfo = NULL; + int32_t functionId = pItem->functionId; + int32_t code = TSDB_CODE_SUCCESS; + + const char* msg1 = "not support column types"; + const char* msg2 = "invalid parameters"; + const char* msg3 = "illegal column name"; + const char* msg4 = "invalid table name"; + const char* msg5 = "parameter is out of range [0, 100]"; + const char* msg6 = "functions applied to tags are not allowed"; + const char* msg7 = "normal table can not apply this function"; + const char* msg8 = "multi-columns selection does not support alias column name"; + const char* msg9 = "diff/derivative can no be applied to unsigned numeric type"; + const char* msg10 = "derivative duration should be greater than 1 Second"; + const char* msg11 = "third parameter in derivative should be 0 or 1"; + const char* msg12 = "parameter is out of range [1, 100]"; + const char* msg13 = "nested function is not supported"; + + if (checkForAliasName(pMsgBuf, pItem->aliasName) != TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + switch (functionId) { + case FUNCTION_COUNT: { + // more than one parameter for count() function + SArray* pParamList = pItem->pNode->Expr.paramList; + if ((code = checkForkParam(pItem->pNode, 1, pMsgBuf)) != TSDB_CODE_SUCCESS) { + return code; + } + + SColumnIndex index = COLUMN_INDEX_INITIALIZER; + code = setColumnIndex(pQueryInfo, pParamList, &index, pMsgBuf); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + int32_t size = tDataTypes[TSDB_DATA_TYPE_BIGINT].bytes; + SSchema s = createSchema(TSDB_DATA_TYPE_BIGINT, size, getNewResColId(), ""); + + char token[TSDB_COL_NAME_LEN] = {0}; + setTokenAndResColumnName(pItem, s.name, token,sizeof(s.name) - 1); + + STableMeta* pTableMeta = getMetaInfo(pQueryInfo, index.tableIndex)->pTableMeta; + int32_t outputIndex = getNumOfFields(&pQueryInfo->fieldsInfo); + SSchema* ps = getOneColumnSchema(pTableMeta, index.columnIndex); + + doAddOneExprInfo(pQueryInfo, outputIndex, functionId, &index, ps, &s, NULL, size, token); + return TSDB_CODE_SUCCESS; + } + + case FUNCTION_SUM: + case FUNCTION_AVG: + case FUNCTION_RATE: + case FUNCTION_IRATE: + case FUNCTION_TWA: + case FUNCTION_MIN: + case FUNCTION_MAX: + case FUNCTION_DIFF: + case FUNCTION_DERIVATIVE: + case FUNCTION_STDDEV: + case FUNCTION_LEASTSQR: { + // 1. valid the number of parameters + int32_t numOfParams = (pItem->pNode->Expr.paramList == NULL)? 0: (int32_t) taosArrayGetSize(pItem->pNode->Expr.paramList); + + // no parameters or more than one parameter for function + if (pItem->pNode->Expr.paramList == NULL || + (functionId != FUNCTION_LEASTSQR && functionId != FUNCTION_DERIVATIVE && numOfParams != 1) || + ((functionId == FUNCTION_LEASTSQR || functionId == FUNCTION_DERIVATIVE) && numOfParams != 3)) { + return buildInvalidOperationMsg(pMsgBuf, msg2); + } + + tSqlExprItem* pParamElem = taosArrayGet(pItem->pNode->Expr.paramList, 0); + + tExprNode* pNode = NULL; + int32_t tokenId = pParamElem->pNode->tokenId; + SColumnIndex index = COLUMN_INDEX_INITIALIZER; + SSchema columnSchema = {0}; + + code = extractFunctionParameterInfo(pQueryInfo, tokenId, &pTableMetaInfo, &columnSchema, &pNode, &index, pParamElem,pMsgBuf); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + if (tokenId == TK_ALL || tokenId == TK_ID) { + if (!IS_NUMERIC_TYPE(columnSchema.type)) { + return buildInvalidOperationMsg(pMsgBuf, msg1); + } else if (IS_UNSIGNED_NUMERIC_TYPE(columnSchema.type) && (functionId == FUNCTION_DIFF || functionId == FUNCTION_DERIVATIVE)) { + return buildInvalidOperationMsg(pMsgBuf, msg9); + } + } + + int32_t precision = pTableMetaInfo->pTableMeta->tableInfo.precision; + + SResultDataInfo resInfo = {0}; + if (getResultDataInfo(columnSchema.type, columnSchema.bytes, functionId, 0, &resInfo, 0, false) != TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + // set the first column ts for diff query + int32_t numOfOutput = getNumOfFields(&pQueryInfo->fieldsInfo); + if (functionId == FUNCTION_DIFF || functionId == FUNCTION_DERIVATIVE) { + setTsOutputExprInfo(pQueryInfo, pTableMetaInfo, numOfOutput, index.tableIndex); + numOfOutput += 1; + } + + SSchema s = createSchema(resInfo.type, resInfo.bytes, getNewResColId(), "ts"); + + char token[TSDB_COL_NAME_LEN] = {0}; + setTokenAndResColumnName(pItem, s.name, token, sizeof(s.name) - 1); + + SExprInfo* pExpr = doAddOneExprInfo(pQueryInfo, numOfOutput, functionId, &index, &columnSchema, &s, pNode, resInfo.intermediateBytes, token); + + if (functionId == FUNCTION_LEASTSQR) { // set the leastsquares parameters + char val[8] = {0}; + if (taosVariantDump(&pParamElem[1].pNode->value, val, TSDB_DATA_TYPE_DOUBLE, true) < 0) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + addExprInfoParam(&pExpr->base, val, TSDB_DATA_TYPE_DOUBLE, DOUBLE_BYTES); + + memset(val, 0, tListLen(val)); + if (taosVariantDump(&pParamElem[2].pNode->value, val, TSDB_DATA_TYPE_DOUBLE, true) < 0) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + addExprInfoParam(&pExpr->base, val, TSDB_DATA_TYPE_DOUBLE, DOUBLE_BYTES); + } else if (functionId == FUNCTION_IRATE) { + addExprInfoParam(&pExpr->base, (char*) &precision, TSDB_DATA_TYPE_BIGINT, LONG_BYTES); + } else if (functionId == FUNCTION_DERIVATIVE) { + char val[8] = {0}; + + int64_t tickPerSec = 0; + code = getTickPerSecond(&pParamElem[1].pNode->value, precision, &tickPerSec, pMsgBuf); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + addExprInfoParam(&pExpr->base, (char*) &tickPerSec, TSDB_DATA_TYPE_BIGINT, LONG_BYTES); + memset(val, 0, tListLen(val)); + + if (taosVariantDump(&pParamElem[2].pNode->value, val, TSDB_DATA_TYPE_BIGINT, true) < 0) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + if (GET_INT64_VAL(val) != 0 && GET_INT64_VAL(val) != 1) { + return buildInvalidOperationMsg(pMsgBuf, msg11); + } + + addExprInfoParam(&pExpr->base, val, TSDB_DATA_TYPE_BIGINT, LONG_BYTES); + } + return TSDB_CODE_SUCCESS; + } + + case FUNCTION_FIRST: + case FUNCTION_LAST: + case FUNCTION_SPREAD: + case FUNCTION_LAST_ROW: + case FUNCTION_INTERP: { + bool requireAllFields = (pItem->pNode->Expr.paramList == NULL); + + if (!requireAllFields) { + SArray* pParamList = pItem->pNode->Expr.paramList; + if (taosArrayGetSize(pParamList) < 1) { + return buildInvalidOperationMsg(pMsgBuf, msg3); + } + + if (taosArrayGetSize(pParamList) > 1 && (pItem->aliasName != NULL)) { + return buildInvalidOperationMsg(pMsgBuf, msg8); + } + + // in first/last function, multiple columns can be add to resultset + for (int32_t i = 0; i < taosArrayGetSize(pParamList); ++i) { + tSqlExprItem* pParamElem = taosArrayGet(pParamList, i); + doHandleOneParam(pQueryInfo, pItem, pParamElem, functionId, &colIndex, finalResult, pMsgBuf); + } + } else { // select function(*) from xxx + int32_t numOfFields = 0; + + // multicolumn selection does not support alias name + if (pItem->aliasName != NULL && strlen(pItem->aliasName) > 0) { + return buildInvalidOperationMsg(pMsgBuf, msg8); + } + + for (int32_t j = 0; j < pQueryInfo->numOfTables; ++j) { + pTableMetaInfo = getMetaInfo(pQueryInfo, j); + doAddAllColumnExprInSelectClause(pQueryInfo, pTableMetaInfo, pItem, functionId, j, &colIndex, finalResult, pMsgBuf); + numOfFields += getNumOfColumns(pTableMetaInfo->pTableMeta); + } + } + return TSDB_CODE_SUCCESS; + } + + case FUNCTION_TOP: + case FUNCTION_BOTTOM: + case FUNCTION_PERCT: + case FUNCTION_APERCT: { + // 1. valid the number of parameters + // no parameters or more than one parameter for function + if ((code = checkForkParam(pItem->pNode, 2, pMsgBuf)) != TSDB_CODE_SUCCESS) { + return code; + } + + tSqlExprItem* pParamElem = taosArrayGet(pItem->pNode->Expr.paramList, 0); + if (pParamElem->pNode->tokenId != TK_ID) { + return buildInvalidOperationMsg(pMsgBuf, msg2); + } + + SColumnIndex index = COLUMN_INDEX_INITIALIZER; + if (getColumnIndexByName(&pParamElem->pNode->columnName, pQueryInfo, &index, pMsgBuf) != TSDB_CODE_SUCCESS) { + return buildInvalidOperationMsg(pMsgBuf, msg3); + } + + // functions can not be applied to tags + if (TSDB_COL_IS_TAG(index.type)) { + return buildInvalidOperationMsg(pMsgBuf, msg6); + } + + pTableMetaInfo = getMetaInfo(pQueryInfo, index.tableIndex); + SSchema* pSchema = getOneColumnSchema(pTableMetaInfo->pTableMeta, index.columnIndex); + + // 2. valid the column type + if (!IS_NUMERIC_TYPE(pSchema->type)) { + return buildInvalidOperationMsg(pMsgBuf, msg1); + } + + // 3. valid the parameters + if (pParamElem[1].pNode->tokenId == TK_ID) { + return buildInvalidOperationMsg(pMsgBuf, msg2); + } + + SVariant* pVariant = &pParamElem[1].pNode->value; + SResultDataInfo resInfo = {0}; + char val[8] = {0}; + + if (functionId == FUNCTION_PERCT || functionId == FUNCTION_APERCT) { + taosVariantDump(pVariant, val, TSDB_DATA_TYPE_DOUBLE, true); + + double dp = GET_DOUBLE_VAL(val); + if (dp < 0 || dp > TOP_BOTTOM_QUERY_LIMIT) { + return buildInvalidOperationMsg(pMsgBuf, msg5); + } + + + getResultDataInfo(pSchema->type, pSchema->bytes, functionId, 0, &resInfo, 0, false); + + /* + * sql function transformation + * for dp = 0, it is actually min, + * for dp = 100, it is max, + */ + colIndex += 1; // the first column is ts + } else { + taosVariantDump(pVariant, val, TSDB_DATA_TYPE_BIGINT, true); + + int64_t nTop = GET_INT32_VAL(val); + if (nTop <= 0 || nTop > 100) { // todo use macro + return buildInvalidOperationMsg(pMsgBuf, msg12); + } + + // set the first column ts for top/bottom query + setTsOutputExprInfo(pQueryInfo, pTableMetaInfo, colIndex, index.tableIndex); + colIndex += 1; // the first column is ts + } + + SSchema s = createSchema(resInfo.type, resInfo.bytes, getNewResColId(), ""); + + char token[TSDB_COL_NAME_LEN] = {0}; + setTokenAndResColumnName(pItem, s.name, token, sizeof(s.name) - 1); + SExprInfo* pExpr = doAddOneExprInfo(pQueryInfo, colIndex, functionId, &index, pSchema, &s, NULL, resInfo.intermediateBytes, token); + + if (functionId == FUNCTION_PERCT || functionId == FUNCTION_APERCT) { + addExprInfoParam(&pExpr->base, val, TSDB_DATA_TYPE_DOUBLE, sizeof(double)); + } else { + addExprInfoParam(&pExpr->base, val, TSDB_DATA_TYPE_BIGINT, sizeof(int64_t)); + } + return TSDB_CODE_SUCCESS; + } + + case FUNCTION_TID_TAG: { + pTableMetaInfo = getMetaInfo(pQueryInfo, 0); + if (UTIL_TABLE_IS_NORMAL_TABLE(pTableMetaInfo)) { + return buildInvalidOperationMsg(pMsgBuf, msg7); + } + + // no parameters or more than one parameter for function + if ((code = checkForkParam(pItem->pNode, 1, pMsgBuf)) != TSDB_CODE_SUCCESS) { + return code; + } + + tSqlExprItem* pParamItem = taosArrayGet(pItem->pNode->Expr.paramList, 0); + tSqlExpr* pParam = pParamItem->pNode; + + SColumnIndex index = COLUMN_INDEX_INITIALIZER; + if (getColumnIndexByName(&pParam->columnName, pQueryInfo, &index, pMsgBuf) != TSDB_CODE_SUCCESS) { + return buildInvalidOperationMsg(pMsgBuf, msg3); + } + + pTableMetaInfo = getMetaInfo(pQueryInfo, index.tableIndex); + SSchema* pSchema = getTableTagSchema(pTableMetaInfo->pTableMeta); + + // functions can not be applied to normal columns + int32_t numOfCols = getNumOfColumns(pTableMetaInfo->pTableMeta); + if (index.columnIndex < numOfCols && index.columnIndex != TSDB_TBNAME_COLUMN_INDEX) { + return buildInvalidOperationMsg(pMsgBuf, msg6); + } + + if (index.columnIndex > 0) { + index.columnIndex -= numOfCols; + } + + // 2. valid the column type + int16_t colType = 0; + if (index.columnIndex == TSDB_TBNAME_COLUMN_INDEX) { + colType = TSDB_DATA_TYPE_BINARY; + } else { + colType = pSchema[index.columnIndex].type; + } + + if (colType == TSDB_DATA_TYPE_BOOL) { + return buildInvalidOperationMsg(pMsgBuf, msg1); + } + + columnListInsert(pTableMetaInfo->tagColList, index.columnIndex, pTableMetaInfo->pTableMeta->uid, &pSchema[index.columnIndex]); + SSchema* pTagSchema = getTableTagSchema(pTableMetaInfo->pTableMeta); + + SSchema s = {0}; + if (index.columnIndex == TSDB_TBNAME_COLUMN_INDEX) { + s = *getTbnameColumnSchema(); + } else { + s = pTagSchema[index.columnIndex]; + } + + SResultDataInfo resInfo = {0}; + int32_t ret = getResultDataInfo(s.type, s.bytes, FUNCTION_TID_TAG, 0, &resInfo, 0, 0); + assert(ret == TSDB_CODE_SUCCESS); + + s.type = (uint8_t)resInfo.type; + s.bytes = resInfo.bytes; + s.colId = getNewResColId(); + TSDB_QUERY_SET_TYPE(pQueryInfo->type, TSDB_QUERY_TYPE_TAG_FILTER_QUERY); + + doAddOneExprInfo(pQueryInfo, 0, FUNCTION_TID_TAG, &index, &s, &s, NULL, 0, s.name); + return TSDB_CODE_SUCCESS; + } + + case FUNCTION_BLKINFO: { + // no parameters or more than one parameter for function + if ((code = checkForkParam(pItem->pNode, 0, pMsgBuf))!= TSDB_CODE_SUCCESS) { + return code; + } + + SColumnIndex index = {.tableIndex = 0, .columnIndex = 0, .type = TSDB_COL_NORMAL}; + pTableMetaInfo = getMetaInfo(pQueryInfo, index.tableIndex); + + SResultDataInfo resInfo = {0}; + getResultDataInfo(TSDB_DATA_TYPE_INT, 4, functionId, 0, &resInfo, 0, 0); + + SSchema s = createSchema(resInfo.type, resInfo.bytes, getNewResColId(), "block_dist"); + SSchema colSchema = {0}; + + char token[TSDB_COL_NAME_LEN] = {0}; + setTokenAndResColumnName(pItem, s.name, token, sizeof(s.name) - 1); + SExprInfo* pExpr = doAddOneExprInfo(pQueryInfo, colIndex, functionId, &index, &colSchema, &s, NULL, resInfo.intermediateBytes, token); + + int64_t rowSize = pTableMetaInfo->pTableMeta->tableInfo.rowSize; + addExprInfoParam(&pExpr->base, (char*) &rowSize, TSDB_DATA_TYPE_BIGINT, 8); + return TSDB_CODE_SUCCESS; + } + + default: { +// pUdfInfo = isValidUdf(pQueryInfo->pUdfInfo, pItem->pNode->Expr.operand.z, pItem->pNode->Expr.operand.n); +// if (pUdfInfo == NULL) { +// return buildInvalidOperationMsg(pMsgBuf, msg9); +// } + + tSqlExprItem* pParamElem = taosArrayGet(pItem->pNode->Expr.paramList, 0);; + if (pParamElem->pNode->tokenId != TK_ID) { + return buildInvalidOperationMsg(pMsgBuf, msg2); + } + + SColumnIndex index = COLUMN_INDEX_INITIALIZER; + if (getColumnIndexByName(&pParamElem->pNode->columnName, pQueryInfo, &index, pMsgBuf) != TSDB_CODE_SUCCESS) { + return buildInvalidOperationMsg(pMsgBuf, msg3); + } + + if (index.columnIndex == TSDB_TBNAME_COLUMN_INDEX) { + return buildInvalidOperationMsg(pMsgBuf, msg6); + } + + pTableMetaInfo = getMetaInfo(pQueryInfo, index.tableIndex); + + // functions can not be applied to tags + if (index.columnIndex >= getNumOfColumns(pTableMetaInfo->pTableMeta)) { + return buildInvalidOperationMsg(pMsgBuf, msg6); + } + + SResultDataInfo resInfo = {0}; + getResultDataInfo(TSDB_DATA_TYPE_INT, 4, functionId, 0, &resInfo, 0, false/*, pUdfInfo*/); + + SSchema s = createSchema(resInfo.type, resInfo.bytes, getNewResColId(), ""); + SSchema* colSchema = getOneColumnSchema(pTableMetaInfo->pTableMeta, index.tableIndex); + + char token[TSDB_COL_NAME_LEN] = {0}; + setTokenAndResColumnName(pItem, s.name, token, sizeof(s.name) - 1); + doAddOneExprInfo(pQueryInfo, colIndex, functionId, &index, colSchema, &s, NULL, resInfo.intermediateBytes, token); + return TSDB_CODE_SUCCESS; + } + } + + return TSDB_CODE_TSC_INVALID_OPERATION; +} + +SExprInfo* doAddProjectCol(SQueryStmtInfo* pQueryInfo, int32_t outputColIndex, SColumnIndex* pColIndex, const char* aliasName, int32_t colId) { + STableMeta* pTableMeta = getMetaInfo(pQueryInfo, pColIndex->tableIndex)->pTableMeta; + + SSchema* pSchema = getOneColumnSchema(pTableMeta, pColIndex->columnIndex); + SColumnIndex index = *pColIndex; + + int16_t functionId = 0; + if (TSDB_COL_IS_TAG(index.type)) { + int32_t numOfCols = getNumOfColumns(pTableMeta); + index.columnIndex = pColIndex->columnIndex - numOfCols; + functionId = FUNCTION_TAGPRJ; + } else { + index.columnIndex = pColIndex->columnIndex; + functionId = FUNCTION_PRJ; + } + + const char* name = (aliasName == NULL)? pSchema->name:aliasName; + SSchema s = createSchema(pSchema->type, pSchema->bytes, colId, name); + return doAddOneExprInfo(pQueryInfo, outputColIndex, functionId, &index, pSchema, &s, NULL, 0, pSchema->name); +} + +static int32_t doAddProjectionExprAndResColumn(SQueryStmtInfo* pQueryInfo, SColumnIndex* pIndex, int32_t startPos) { + STableMetaInfo* pTableMetaInfo = getMetaInfo(pQueryInfo, pIndex->tableIndex); + + STableMeta* pTableMeta = pTableMetaInfo->pTableMeta; + STableComInfo tinfo = getTableInfo(pTableMeta); + + int32_t numOfTotalColumns = tinfo.numOfColumns; + if (UTIL_TABLE_IS_SUPER_TABLE(pTableMetaInfo)) { + numOfTotalColumns += tinfo.numOfTags; + } + + for (int32_t j = 0; j < numOfTotalColumns; ++j) { + pIndex->columnIndex = j; + doAddProjectCol(pQueryInfo, startPos + j, pIndex, NULL, getNewResColId()); + } + + return numOfTotalColumns; +} + +// User input constant value as a new result column +static SColumnIndex createConstantColumnIndex(int32_t* colId) { + SColumnIndex index = COLUMN_INDEX_INITIALIZER; + index.columnIndex = ((*colId)--); + index.tableIndex = 0; + index.type = TSDB_COL_UDC; + return index; +} + +static SSchema createConstantColumnSchema(SVariant* pVal, const SToken* exprStr, const char* name) { + SSchema s = {0}; + + s.type = pVal->nType; + if (IS_VAR_DATA_TYPE(s.type)) { + s.bytes = (int16_t)(pVal->nLen + VARSTR_HEADER_SIZE); + } else { + s.bytes = tDataTypes[pVal->nType].bytes; + } + + s.colId = TSDB_UD_COLUMN_INDEX; + + if (name != NULL) { + tstrncpy(s.name, name, sizeof(s.name)); + } else { + size_t tlen = MIN(sizeof(s.name), exprStr->n + 1); + tstrncpy(s.name, exprStr->z, tlen); + strdequote(s.name); + } + + return s; +} + +int32_t addProjectionExprAndResColumn(SQueryStmtInfo* pQueryInfo, tSqlExprItem* pItem, bool outerQuery, SMsgBuf* pMsgBuf) { + const char* msg1 = "tag for normal table query is not allowed"; + const char* msg2 = "invalid column name"; + const char* msg3 = "tbname not allowed in outer query"; + + if (checkForAliasName(pMsgBuf, pItem->aliasName) != TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + int32_t startPos = (int32_t)tscNumOfExprs(pQueryInfo); + int32_t tokenId = pItem->pNode->tokenId; + if (tokenId == TK_ALL) { // project on all fields + TSDB_QUERY_SET_TYPE(pQueryInfo->type, TSDB_QUERY_TYPE_PROJECTION_QUERY); + + SColumnIndex index = COLUMN_INDEX_INITIALIZER; + if (getTableIndexByName(&pItem->pNode->columnName, pQueryInfo, &index) != TSDB_CODE_SUCCESS) { + return buildInvalidOperationMsg(pMsgBuf, msg2); + } + + // all columns are required + if (index.tableIndex == COLUMN_INDEX_INITIAL_VAL) { // all table columns are required. + for (int32_t i = 0; i < pQueryInfo->numOfTables; ++i) { + index.tableIndex = i; + int32_t inc = doAddProjectionExprAndResColumn(pQueryInfo, &index, startPos); + startPos += inc; + } + } else { + doAddProjectionExprAndResColumn(pQueryInfo, &index, startPos); + } + + // add the primary timestamp column even though it is not required by user + STableMeta* pTableMeta = pQueryInfo->pTableMetaInfo[index.tableIndex]->pTableMeta; + if (pTableMeta->tableType != TSDB_TEMP_TABLE) { + insertPrimaryTsColumn(pQueryInfo->colList, pTableMeta->uid); + } + } else if (tokenId == TK_STRING || tokenId == TK_INTEGER || tokenId == TK_FLOAT) { // simple column projection query + SColumnIndex index = createConstantColumnIndex(&pQueryInfo->udColumnId); + SSchema colSchema = createConstantColumnSchema(&pItem->pNode->value, &pItem->pNode->exprToken, pItem->aliasName); + + char rawName[TSDB_COL_NAME_LEN] = {0}; + tstrncpy(rawName, pItem->pNode->exprToken.z, MIN(TSDB_COL_NAME_LEN, TSDB_COL_NAME_LEN)); + SExprInfo* pExpr = doAddOneExprInfo(pQueryInfo, startPos, FUNCTION_PRJ, &index, &colSchema, &colSchema, NULL, 0, rawName); + + // NOTE: the first parameter is reserved for the tag column id during join query process. + pExpr->base.numOfParams = 2; + taosVariantAssign(&pExpr->base.param[1], &pItem->pNode->value); + } else if (tokenId == TK_ID) { + SColumnIndex index = COLUMN_INDEX_INITIALIZER; + if (getColumnIndexByName(&pItem->pNode->columnName, pQueryInfo, &index, pMsgBuf) != TSDB_CODE_SUCCESS) { + return buildInvalidOperationMsg(pMsgBuf, msg2); + } + + if (index.columnIndex == TSDB_TBNAME_COLUMN_INDEX) { + SSchema colSchema = {0}; + int32_t functionId = 0; + + if (outerQuery) { // todo?? + STableMetaInfo* pTableMetaInfo = getMetaInfo(pQueryInfo, index.tableIndex); + + bool existed = false; + SSchema* pSchema = pTableMetaInfo->pTableMeta->schema; + + int32_t numOfCols = getNumOfColumns(pTableMetaInfo->pTableMeta); + for (int32_t i = 0; i < numOfCols; ++i) { + if (strncasecmp(pSchema[i].name, TSQL_TBNAME_L, tListLen(pSchema[i].name)) == 0) { + existed = true; + index.columnIndex = i; + break; + } + } + + if (!existed) { + return buildInvalidOperationMsg(pMsgBuf, msg3); + } + + colSchema = pSchema[index.columnIndex]; + functionId = FUNCTION_PRJ; + } else { + colSchema = *getTbnameColumnSchema(); + functionId = FUNCTION_TAGPRJ; + } + + SSchema resultSchema = colSchema; + resultSchema.colId = getNewResColId(); + + char rawName[TSDB_COL_NAME_LEN] = {0}; + setTokenAndResColumnName(pItem, resultSchema.name, rawName, sizeof(colSchema.name) - 1); + + doAddOneExprInfo(pQueryInfo, startPos, functionId, &index, &colSchema, &resultSchema, NULL, 0, rawName); + } else { + STableMetaInfo* pTableMetaInfo = getMetaInfo(pQueryInfo, index.tableIndex); + if (TSDB_COL_IS_TAG(index.type) && UTIL_TABLE_IS_NORMAL_TABLE(pTableMetaInfo)) { + return buildInvalidOperationMsg(pMsgBuf, msg1); + } + + doAddProjectCol(pQueryInfo, startPos, &index, pItem->aliasName, getNewResColId()); + } + + // add the primary timestamp column even though it is not required by user + STableMetaInfo* pTableMetaInfo = getMetaInfo(pQueryInfo, index.tableIndex); + if (!UTIL_TABLE_IS_TMP_TABLE(pTableMetaInfo)) { + insertPrimaryTsColumn(pQueryInfo->colList, pTableMetaInfo->pTableMeta->uid); + } + } else { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + return TSDB_CODE_SUCCESS; +} + +static int32_t validateExprLeafNode(tSqlExpr* pExpr, SQueryStmtInfo* pQueryInfo, SArray* pList, int32_t* type, uint64_t* uid, + SMsgBuf* pMsgBuf) { + if (pExpr->type == SQL_NODE_TABLE_COLUMN) { + if (*type == NON_ARITHMEIC_EXPR) { + *type = NORMAL_ARITHMETIC; + } else if (*type == AGG_ARIGHTMEIC) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + SColumnIndex index = COLUMN_INDEX_INITIALIZER; + if (getColumnIndexByName(&pExpr->columnName, pQueryInfo, &index, pMsgBuf) != TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + // if column is timestamp, bool, binary, nchar, not support arithmetic, so return invalid sql + STableMeta* pTableMeta = getMetaInfo(pQueryInfo, index.tableIndex)->pTableMeta; + + SSchema* pSchema = getOneColumnSchema(pTableMeta, index.columnIndex); + if ((pSchema->type == TSDB_DATA_TYPE_TIMESTAMP) || (pSchema->type == TSDB_DATA_TYPE_BOOL) || + (pSchema->type == TSDB_DATA_TYPE_BINARY) || (pSchema->type == TSDB_DATA_TYPE_NCHAR)) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + taosArrayPush(pList, &index); + } else if ((pExpr->tokenId == TK_FLOAT && (isnan(pExpr->value.d) || isinf(pExpr->value.d))) || + pExpr->tokenId == TK_NULL) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } else if (pExpr->type == SQL_NODE_SQLFUNCTION) { + if (*type == NON_ARITHMEIC_EXPR) { + *type = AGG_ARIGHTMEIC; + } else if (*type == NORMAL_ARITHMETIC) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + tSqlExprItem item = {.pNode = pExpr, .aliasName = NULL}; + + // sql function list in selection clause. + // Append the sqlExpr into exprList of pQueryInfo structure sequentially + item.functionId = qIsBuiltinFunction(pExpr->Expr.operand.z, pExpr->Expr.operand.n); + if (item.functionId < 0) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + int32_t outputIndex = (int32_t)tscNumOfExprs(pQueryInfo); + if (addExprAndResColumn(pQueryInfo, outputIndex, &item, false, pMsgBuf) != TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + // It is invalid in case of more than one sqlExpr, such as first(ts, k) - last(ts, k) + int32_t inc = (int32_t) tscNumOfExprs(pQueryInfo) - outputIndex; + if (inc > 1) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + // Not supported data type in arithmetic expression + uint64_t id = -1; + for(int32_t i = 0; i < inc; ++i) { + SExprInfo* p1 = getExprInfo(pQueryInfo, i + outputIndex); + + int16_t t = p1->base.resSchema.type; + if (IS_VAR_DATA_TYPE(t) || t == TSDB_DATA_TYPE_BOOL || t == TSDB_DATA_TYPE_TIMESTAMP) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + if (i == 0) { + id = p1->base.uid; + continue; + } + + if (id != p1->base.uid) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + } + + *uid = id; + } + + return TSDB_CODE_SUCCESS; +} + +int32_t validateComplexExpr(tSqlExpr* pExpr, SQueryStmtInfo* pQueryInfo, SArray* pColList, int32_t* type, SMsgBuf* pMsgBuf) { + if (pExpr == NULL) { + return TSDB_CODE_SUCCESS; + } + + tSqlExpr* pLeft = pExpr->pLeft; + uint64_t uidLeft = 0; + uint64_t uidRight = 0; + + if (pLeft->type == SQL_NODE_EXPR) { + int32_t ret = validateComplexExpr(pLeft, pQueryInfo, pColList, type, pMsgBuf); + if (ret != TSDB_CODE_SUCCESS) { + return ret; + } + } else { + int32_t ret = validateExprLeafNode(pLeft, pQueryInfo, pColList, type, &uidLeft, pMsgBuf); + if (ret != TSDB_CODE_SUCCESS) { + return ret; + } + } + + tSqlExpr* pRight = pExpr->pRight; + if (pRight->type == SQL_NODE_EXPR) { + int32_t ret = validateComplexExpr(pRight, pQueryInfo, pColList, type, pMsgBuf); + if (ret != TSDB_CODE_SUCCESS) { + return ret; + } + } else { + int32_t ret = validateExprLeafNode(pRight, pQueryInfo, pColList, type, &uidRight, pMsgBuf); + if (ret != TSDB_CODE_SUCCESS) { + return ret; + } + } + + return TSDB_CODE_SUCCESS; +} + +int32_t sqlExprToExprNode(tExprNode **pExpr, const tSqlExpr* pSqlExpr, SQueryStmtInfo* pQueryInfo, SArray* pCols, uint64_t *uid, SMsgBuf* pMsgBuf) { + tExprNode* pLeft = NULL; + tExprNode* pRight= NULL; + + SColumnIndex index = COLUMN_INDEX_INITIALIZER; + if (pSqlExpr->pLeft != NULL) { + int32_t ret = sqlExprToExprNode(&pLeft, pSqlExpr->pLeft, pQueryInfo, pCols, uid, pMsgBuf); + if (ret != TSDB_CODE_SUCCESS) { + return ret; + } + } + + if (pSqlExpr->pRight != NULL) { + int32_t ret = sqlExprToExprNode(&pRight, pSqlExpr->pRight, pQueryInfo, pCols, uid, pMsgBuf); + if (ret != TSDB_CODE_SUCCESS) { + tExprTreeDestroy(pLeft, NULL); + return ret; + } + } + + if (pSqlExpr->pLeft == NULL && pSqlExpr->pRight == NULL && pSqlExpr->tokenId == 0) { + *pExpr = calloc(1, sizeof(tExprNode)); + return TSDB_CODE_SUCCESS; + } + + if (pSqlExpr->pLeft == NULL) { // it is the leaf node + assert(pSqlExpr->pRight == NULL); + + if (pSqlExpr->type == SQL_NODE_VALUE) { + int32_t ret = TSDB_CODE_SUCCESS; + *pExpr = calloc(1, sizeof(tExprNode)); + (*pExpr)->nodeType = TEXPR_VALUE_NODE; + (*pExpr)->pVal = calloc(1, sizeof(SVariant)); + taosVariantAssign((*pExpr)->pVal, &pSqlExpr->value); + + STableMeta* pTableMeta = getMetaInfo(pQueryInfo, 0)->pTableMeta; + if (pCols != NULL && taosArrayGetSize(pCols) > 0) { + SColIndex* idx = taosArrayGet(pCols, 0); + SSchema* pSchema = getOneColumnSchema(pTableMeta, idx->colIndex); + + // convert time by precision + if (pSchema != NULL && TSDB_DATA_TYPE_TIMESTAMP == pSchema->type && TSDB_DATA_TYPE_BINARY == (*pExpr)->pVal->nType) { +#if 0 + ret = setColumnFilterInfoForTimestamp(pCmd, pQueryInfo, (*pExpr)->pVal); +#endif + } + } + return ret; + } else if (pSqlExpr->type == SQL_NODE_SQLFUNCTION) { + // arithmetic expression on the results of aggregation functions + *pExpr = calloc(1, sizeof(tExprNode)); + (*pExpr)->nodeType = TEXPR_COL_NODE; + (*pExpr)->pSchema = calloc(1, sizeof(SSchema)); + strncpy((*pExpr)->pSchema->name, pSqlExpr->exprToken.z, pSqlExpr->exprToken.n); + + // set the input column data byte and type. + size_t size = taosArrayGetSize(pQueryInfo->exprList); + + for (int32_t i = 0; i < size; ++i) { + SExprInfo* p1 = taosArrayGetP(pQueryInfo->exprList, i); + + if (strcmp((*pExpr)->pSchema->name, p1->base.resSchema.name) == 0) { + memcpy((*pExpr)->pSchema, &p1->base.resSchema, sizeof(SSchema)); + if (uid != NULL) { + *uid = p1->base.uid; + } + + break; + } + } + } else if (pSqlExpr->type == SQL_NODE_TABLE_COLUMN) { // column name, normal column arithmetic expression + int32_t ret = getColumnIndexByName(&pSqlExpr->columnName, pQueryInfo, &index, pMsgBuf); + if (ret != TSDB_CODE_SUCCESS) { + return ret; + } + + pQueryInfo->curTableIdx = index.tableIndex; + STableMeta* pTableMeta = getMetaInfo(pQueryInfo, index.tableIndex)->pTableMeta; + + *pExpr = calloc(1, sizeof(tExprNode)); + (*pExpr)->nodeType = TEXPR_COL_NODE; + (*pExpr)->pSchema = calloc(1, sizeof(SSchema)); + + SSchema* pSchema = getOneColumnSchema(pTableMeta, index.columnIndex); + *(*pExpr)->pSchema = *pSchema; + + if (pCols != NULL) { // record the involved columns + SColIndex colIndex = {0}; + tstrncpy(colIndex.name, pSchema->name, sizeof(colIndex.name)); + colIndex.colId = pSchema->colId; + colIndex.colIndex = index.columnIndex; + colIndex.flag = index.type; + + taosArrayPush(pCols, &colIndex); + } + + return TSDB_CODE_SUCCESS; + } else if (pSqlExpr->tokenId == TK_SET) { + int32_t colType = -1; + STableMeta* pTableMeta = getMetaInfo(pQueryInfo, pQueryInfo->curTableIdx)->pTableMeta; + if (pCols != NULL) { + size_t colSize = taosArrayGetSize(pCols); + + if (colSize > 0) { + SColIndex* idx = taosArrayGet(pCols, colSize - 1); + SSchema* pSchema = getOneColumnSchema(pTableMeta, idx->colIndex); + if (pSchema != NULL) { + colType = pSchema->type; + } + } + } + + SVariant *pVal; + if (colType >= TSDB_DATA_TYPE_TINYINT && colType <= TSDB_DATA_TYPE_BIGINT) { + colType = TSDB_DATA_TYPE_BIGINT; + } else if (colType == TSDB_DATA_TYPE_FLOAT || colType == TSDB_DATA_TYPE_DOUBLE) { + colType = TSDB_DATA_TYPE_DOUBLE; + } + STableMetaInfo* pTableMetaInfo = getMetaInfo(pQueryInfo, pQueryInfo->curTableIdx); + STableComInfo tinfo = getTableInfo(pTableMetaInfo->pTableMeta); +#if 0 + if (serializeExprListToVariant(pSqlExpr->Expr.paramList, &pVal, colType, tinfo.precision) == false) { + return buildInvalidOperationMsg(pMsgBuf, "not support filter expression"); + } +#endif + *pExpr = calloc(1, sizeof(tExprNode)); + (*pExpr)->nodeType = TEXPR_VALUE_NODE; + (*pExpr)->pVal = pVal; + } else { + return buildInvalidOperationMsg(pMsgBuf, "not support filter expression"); + } + + } else { + *pExpr = (tExprNode *)calloc(1, sizeof(tExprNode)); + (*pExpr)->nodeType = TEXPR_BINARYEXPR_NODE; + + (*pExpr)->_node.pLeft = pLeft; + (*pExpr)->_node.pRight = pRight; + + SToken t = {.type = pSqlExpr->tokenId}; + (*pExpr)->_node.optr = convertRelationalOperator(&t); + + assert((*pExpr)->_node.optr != 0); + + // check for dividing by 0 + if ((*pExpr)->_node.optr == TSDB_BINARY_OP_DIVIDE) { + if (pRight->nodeType == TEXPR_VALUE_NODE) { + if (pRight->pVal->nType == TSDB_DATA_TYPE_INT && pRight->pVal->i64 == 0) { + return buildInvalidOperationMsg(pMsgBuf, "invalid expr (divide by 0)"); + } else if (pRight->pVal->nType == TSDB_DATA_TYPE_FLOAT && pRight->pVal->d == 0) { + return buildInvalidOperationMsg(pMsgBuf, "invalid expr (divide by 0)"); + } + } + } + + // NOTE: binary|nchar data allows the >|< type filter + if ((*pExpr)->_node.optr != TSDB_RELATION_EQUAL && (*pExpr)->_node.optr != TSDB_RELATION_NOT_EQUAL) { + if (pRight != NULL && pRight->nodeType == TEXPR_VALUE_NODE) { + if (pRight->pVal->nType == TSDB_DATA_TYPE_BOOL && pLeft->pSchema->type == TSDB_DATA_TYPE_BOOL) { + return buildInvalidOperationMsg(pMsgBuf, "invalid operator for bool"); + } + } + } + } + + return TSDB_CODE_SUCCESS; +} + +static int32_t multiColumnListInsert(SQueryStmtInfo* pQueryInfo, SArray* pColumnList, SMsgBuf* pMsgBuf) { + const char* msg3 = "tag columns can not be used in arithmetic expression"; + + SColumnIndex* p1 = taosArrayGet(pColumnList, 0); + STableMeta* pTableMeta = getMetaInfo(pQueryInfo, p1->tableIndex)->pTableMeta; + + size_t numOfNode = taosArrayGetSize(pColumnList); + for(int32_t k = 0; k < numOfNode; ++k) { + SColumnIndex* pIndex = taosArrayGet(pColumnList, k); + if (TSDB_COL_IS_TAG(pIndex->type)) { + return buildInvalidOperationMsg(pMsgBuf, msg3); + } + + SSchema* ps = getOneColumnSchema(pTableMeta, pIndex->columnIndex); + columnListInsert(pQueryInfo->colList, pIndex->columnIndex, pTableMeta->uid, ps); + } + + insertPrimaryTsColumn(pQueryInfo->colList, pTableMeta->uid); + return TSDB_CODE_SUCCESS; +} + +static int32_t createComplexExpr(SQueryStmtInfo* pQueryInfo, int32_t exprIndex, tSqlExprItem* pItem, SMsgBuf* pMsgBuf) { + const char* msg1 = "invalid column name, illegal column type, or columns in arithmetic expression from two tables"; + const char* msg2 = "invalid arithmetic expression in select clause"; + const char* msg3 = "tag columns can not be used in arithmetic expression"; + + int32_t arithmeticType = NON_ARITHMEIC_EXPR; + SArray* pColumnList = taosArrayInit(4, sizeof(SColumnIndex)); + if (validateComplexExpr(pItem->pNode, pQueryInfo, pColumnList, &arithmeticType, pMsgBuf) != TSDB_CODE_SUCCESS) { + return buildInvalidOperationMsg(pMsgBuf, msg1); + } + + if (arithmeticType == NORMAL_ARITHMETIC) { + // expr string is set as the parameter of function + SSchema s = createSchema(TSDB_DATA_TYPE_DOUBLE, sizeof(double), getNewResColId(), ""); + + tExprNode* pNode = NULL; + SArray* colList = taosArrayInit(10, sizeof(SColIndex)); + int32_t ret = sqlExprToExprNode(&pNode, pItem->pNode, pQueryInfo, colList, NULL, pMsgBuf); + if (ret != TSDB_CODE_SUCCESS) { + taosArrayDestroy(colList); + tExprTreeDestroy(pNode, NULL); + return buildInvalidOperationMsg(pMsgBuf, msg2); + } + + SExprInfo* pExpr = createBinaryExprInfo(pNode, &s); + addExprInfo(pQueryInfo, exprIndex, pExpr); + setTokenAndResColumnName(pItem, pExpr->base.resSchema.name, pExpr->base.token, TSDB_COL_NAME_LEN); + + // check for if there is a tag in the arithmetic express + int32_t code = multiColumnListInsert(pQueryInfo, pColumnList, pMsgBuf); + if (code != TSDB_CODE_SUCCESS) { + taosArrayDestroy(colList); + tExprTreeDestroy(pNode, NULL); + return code; + } + + SBufferWriter bw = tbufInitWriter(NULL, false); + +// TRY(0) { + exprTreeToBinary(&bw, pNode); +// } CATCH(code) { +// tbufCloseWriter(&bw); +// UNUSED(code); +// TODO: other error handling +// } END_TRY + + int32_t len = tbufTell(&bw); + char* c = tbufGetData(&bw, false); + + // set the serialized binary string as the parameter of arithmetic expression + SColumnIndex* index1 = taosArrayGet(pColumnList, 0); + addExprInfoParam(&pExpr->base, c, TSDB_DATA_TYPE_BINARY, (int32_t)len); + addResColumnInfo(pQueryInfo, exprIndex, &pExpr->base.resSchema, pExpr); + + tbufCloseWriter(&bw); + taosArrayDestroy(colList); + tExprTreeDestroy(pNode, NULL); + } else { + SColumnIndex columnIndex = {0}; + + SSchema s = createSchema(TSDB_DATA_TYPE_DOUBLE, sizeof(double), getNewResColId(), ""); + addResColumnInfo(pQueryInfo, exprIndex, &s, NULL); + + tExprNode* pNode = NULL; + int32_t ret = sqlExprToExprNode(&pNode, pItem->pNode, pQueryInfo, NULL, NULL, pMsgBuf); + if (ret != TSDB_CODE_SUCCESS) { + tExprTreeDestroy(pNode, NULL); + return buildInvalidOperationMsg(pMsgBuf, "invalid expression in select clause"); + } + + SExprInfo* pExpr = createBinaryExprInfo(pNode, &s); + addExprInfo(pQueryInfo, exprIndex, pExpr); + setTokenAndResColumnName(pItem, pExpr->base.resSchema.name, pExpr->base.token, TSDB_COL_NAME_LEN); + + pExpr->base.numOfParams = 1; + + SBufferWriter bw = tbufInitWriter(NULL, false); +// TRY(0) { + exprTreeToBinary(&bw, pExpr->pExpr); +// } CATCH(code) { +// tbufCloseWriter(&bw); +// UNUSED(code); +// TODO: other error handling +// } END_TRY + + SSqlExpr* pSqlExpr = &pExpr->base; + pSqlExpr->param[0].nLen = (int16_t) tbufTell(&bw); + pSqlExpr->param[0].pz = tbufGetData(&bw, true); + pSqlExpr->param[0].nType = TSDB_DATA_TYPE_BINARY; + + tbufCloseWriter(&bw); + +// tbufCloseWriter(&bw); // TODO there is a memory leak + } + + return TSDB_CODE_SUCCESS; +} + +int32_t validateSelectNodeList(SQueryStmtInfo* pQueryInfo, SArray* pSelNodeList, bool outerQuery, SMsgBuf* pMsgBuf) { + assert(pSelNodeList != NULL); + + const char* msg1 = "too many items in selection clause"; + const char* msg2 = "functions or others can not be mixed up"; + const char* msg3 = "not support query expression"; + const char* msg4 = "not support distinct mixed with proj/agg func"; + const char* msg5 = "invalid function name"; + const char* msg6 = "_block_dist not support subquery, only support stable/table"; + + // too many result columns not support order by in query + if (taosArrayGetSize(pSelNodeList) > TSDB_MAX_COLUMNS) { + return buildInvalidOperationMsg(pMsgBuf, msg1); + } + + if (pQueryInfo->colList == NULL) { + pQueryInfo->colList = taosArrayInit(4, POINTER_BYTES); + } + + size_t numOfExpr = taosArrayGetSize(pSelNodeList); + + for (int32_t i = 0; i < numOfExpr; ++i) { + int32_t outputIndex = (int32_t)tscNumOfExprs(pQueryInfo); + tSqlExprItem* pItem = taosArrayGet(pSelNodeList, i); + int32_t type = pItem->pNode->type; + + if (pItem->distinct) { + if (i != 0 || type == SQL_NODE_SQLFUNCTION || type == SQL_NODE_EXPR) { + return buildInvalidOperationMsg(pMsgBuf, msg4); + } + + pQueryInfo->distinct = true; + } + + if (type == SQL_NODE_SQLFUNCTION) { + pItem->functionId = qIsBuiltinFunction(pItem->pNode->Expr.operand.z, pItem->pNode->Expr.operand.n); + if (pItem->functionId == FUNCTION_INVALID_ID) { + int32_t functionId = FUNCTION_INVALID_ID; + bool valid = qIsValidUdf(pQueryInfo->pUdfInfo, pItem->pNode->Expr.operand.z, pItem->pNode->Expr.operand.n, &functionId); + if (!valid) { + return buildInvalidOperationMsg(pMsgBuf, msg5); + } + + pItem->functionId = functionId; + } + + // sql function in selection clause, append sql function info in pSqlCmd structure sequentially + if (addExprAndResColumn(pQueryInfo, outputIndex, pItem, true, pMsgBuf) != TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + } else if (type == SQL_NODE_TABLE_COLUMN || type == SQL_NODE_VALUE) { + // use the dynamic array list to decide if the function is valid or not + // select table_name1.field_name1, table_name2.field_name2 from table_name1, table_name2 + if (addProjectionExprAndResColumn(pQueryInfo, pItem, outerQuery, pMsgBuf) != TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + } else if (type == SQL_NODE_EXPR) { + int32_t code = createComplexExpr(pQueryInfo, i, pItem, pMsgBuf); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } else { + return buildInvalidOperationMsg(pMsgBuf, msg3); + } + } + + // there is only one user-defined column in the final result field, add the timestamp column. +// size_t numOfSrcCols = taosArrayGetSize(pQueryInfo->colList); +// if ((numOfSrcCols <= 0 || !hasNoneUserDefineExpr(pQueryInfo)) && !tscQueryTags(pQueryInfo) && !tscQueryBlockInfo(pQueryInfo)) { +// addPrimaryTsColIntoResult(pQueryInfo, pCmd); +// } + + return TSDB_CODE_SUCCESS; +} + +int32_t evaluateSqlNode(SSqlNode* pNode, int32_t tsPrecision, SMsgBuf* pMsgBuf) { + assert(pNode != NULL && pMsgBuf != NULL && pMsgBuf->len > 0); + if (pNode->pWhere == NULL) { + return TSDB_CODE_SUCCESS; + } + + // Evaluate expression in where clause + int32_t code = evaluateImpl(pNode->pWhere, tsPrecision); + if (code != TSDB_CODE_SUCCESS) { + strncpy(pMsgBuf->buf, "invalid time expression in sql", pMsgBuf->len); + return code; + } + + // Evaluate the expression in select clause + size_t size = taosArrayGetSize(pNode->pSelNodeList); + for(int32_t i = 0; i < size; ++i) { + tSqlExprItem* pItem = taosArrayGet(pNode->pSelNodeList, i); + code = evaluateImpl(pItem->pNode, tsPrecision); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + + return code; +} + +int32_t qParserValidateSqlNode(struct SCatalog* pCatalog, SSqlInfo* pInfo, SQueryStmtInfo* pQueryInfo, int64_t id, char* msgBuf, int32_t msgBufLen) { + //1. if it is a query, get the meta info and continue. + assert(pCatalog != NULL && pInfo != NULL); + int32_t code = 0; +#if 0 + switch (pInfo->type) { + case TSDB_SQL_DROP_TABLE: + case TSDB_SQL_DROP_USER: + case TSDB_SQL_DROP_ACCT: + case TSDB_SQL_DROP_DNODE: + case TSDB_SQL_DROP_DB: { + const char* msg1 = "param name too long"; + const char* msg2 = "invalid name"; + + SToken* pzName = taosArrayGet(pInfo->pMiscInfo->a, 0); + if ((pInfo->type != TSDB_SQL_DROP_DNODE) && (parserValidateIdToken(pzName) != TSDB_CODE_SUCCESS)) { + return setInvalidOperatorMsg(pMsgBuf, msg2); + } + + if (pInfo->type == TSDB_SQL_DROP_DB) { + assert(taosArrayGetSize(pInfo->pMiscInfo->a) == 1); + code = tNameSetDbName(&pTableMetaInfo->name, getAccountId(pSql), pzName); + if (code != TSDB_CODE_SUCCESS) { + return setInvalidOperatorMsg(pMsgBuf, msg2); + } + + } else if (pInfo->type == TSDB_SQL_DROP_TABLE) { + assert(taosArrayGetSize(pInfo->pMiscInfo->a) == 1); + + code = tscSetTableFullName(&pTableMetaInfo->name, pzName, pSql); + if(code != TSDB_CODE_SUCCESS) { + return code; + } + } else if (pInfo->type == TSDB_SQL_DROP_DNODE) { + if (pzName->type == TK_STRING) { + pzName->n = strdequote(pzName->z); + } + strncpy(pCmd->payload, pzName->z, pzName->n); + } else { // drop user/account + if (pzName->n >= TSDB_USER_LEN) { + return setInvalidOperatorMsg(pMsgBuf, msg3); + } + + strncpy(pCmd->payload, pzName->z, pzName->n); + } + + break; + } + + case TSDB_SQL_USE_DB: { + const char* msg = "invalid db name"; + SToken* pToken = taosArrayGet(pInfo->pMiscInfo->a, 0); + + if (tscValidateName(pToken) != TSDB_CODE_SUCCESS) { + return setInvalidOperatorMsg(pMsgBuf, msg); + } + + int32_t ret = tNameSetDbName(&pTableMetaInfo->name, getAccountId(pSql), pToken); + if (ret != TSDB_CODE_SUCCESS) { + return setInvalidOperatorMsg(pMsgBuf, msg); + } + + break; + } + + case TSDB_SQL_RESET_CACHE: { + return TSDB_CODE_SUCCESS; + } + + case TSDB_SQL_SHOW: { + if (setShowInfo(pSql, pInfo) != TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + break; + } + + case TSDB_SQL_CREATE_FUNCTION: + case TSDB_SQL_DROP_FUNCTION: { + code = handleUserDefinedFunc(pSql, pInfo); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + break; + } + + case TSDB_SQL_ALTER_DB: + case TSDB_SQL_CREATE_DB: { + const char* msg1 = "invalid db name"; + const char* msg2 = "name too long"; + + SCreateDbInfo* pCreateDB = &(pInfo->pMiscInfo->dbOpt); + if (pCreateDB->dbname.n >= TSDB_DB_NAME_LEN) { + return setInvalidOperatorMsg(pMsgBuf, msg2); + } + + char buf[TSDB_DB_NAME_LEN] = {0}; + SToken token = taosTokenDup(&pCreateDB->dbname, buf, tListLen(buf)); + + if (tscValidateName(&token) != TSDB_CODE_SUCCESS) { + return setInvalidOperatorMsg(pMsgBuf, msg1); + } + + int32_t ret = tNameSetDbName(&pTableMetaInfo->name, getAccountId(pSql), &token); + if (ret != TSDB_CODE_SUCCESS) { + return setInvalidOperatorMsg(pMsgBuf, msg2); + } + + if (parseCreateDBOptions(pCmd, pCreateDB) != TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + break; + } + + case TSDB_SQL_CREATE_DNODE: { + const char* msg = "invalid host name (ip address)"; + + if (taosArrayGetSize(pInfo->pMiscInfo->a) > 1) { + return setInvalidOperatorMsg(pMsgBuf, msg); + } + + SToken* id = taosArrayGet(pInfo->pMiscInfo->a, 0); + if (id->type == TK_STRING) { + id->n = strdequote(id->z); + } + break; + } + + case TSDB_SQL_CREATE_ACCT: + case TSDB_SQL_ALTER_ACCT: { + const char* msg1 = "invalid state option, available options[no, r, w, all]"; + const char* msg2 = "invalid user/account name"; + const char* msg3 = "name too long"; + + SToken* pName = &pInfo->pMiscInfo->user.user; + SToken* pPwd = &pInfo->pMiscInfo->user.passwd; + + if (handlePassword(pCmd, pPwd) != TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + if (pName->n >= TSDB_USER_LEN) { + return setInvalidOperatorMsg(pMsgBuf, msg3); + } + + if (tscValidateName(pName) != TSDB_CODE_SUCCESS) { + return setInvalidOperatorMsg(pMsgBuf, msg2); + } + + SCreateAcctInfo* pAcctOpt = &pInfo->pMiscInfo->acctOpt; + if (pAcctOpt->stat.n > 0) { + if (pAcctOpt->stat.z[0] == 'r' && pAcctOpt->stat.n == 1) { + } else if (pAcctOpt->stat.z[0] == 'w' && pAcctOpt->stat.n == 1) { + } else if (strncmp(pAcctOpt->stat.z, "all", 3) == 0 && pAcctOpt->stat.n == 3) { + } else if (strncmp(pAcctOpt->stat.z, "no", 2) == 0 && pAcctOpt->stat.n == 2) { + } else { + return setInvalidOperatorMsg(pMsgBuf, msg1); + } + } + + break; + } + + case TSDB_SQL_DESCRIBE_TABLE: { + const char* msg1 = "invalid table name"; + + SToken* pToken = taosArrayGet(pInfo->pMiscInfo->a, 0); + if (tscValidateName(pToken) != TSDB_CODE_SUCCESS) { + return setInvalidOperatorMsg(pMsgBuf, msg1); + } + // additional msg has been attached already + code = tscSetTableFullName(&pTableMetaInfo->name, pToken, pSql); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + return tscGetTableMeta(pSql, pTableMetaInfo); + } + case TSDB_SQL_SHOW_CREATE_STABLE: + case TSDB_SQL_SHOW_CREATE_TABLE: { + const char* msg1 = "invalid table name"; + + SToken* pToken = taosArrayGet(pInfo->pMiscInfo->a, 0); + if (tscValidateName(pToken) != TSDB_CODE_SUCCESS) { + return setInvalidOperatorMsg(pMsgBuf, msg1); + } + + code = tscSetTableFullName(&pTableMetaInfo->name, pToken, pSql); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + return tscGetTableMeta(pSql, pTableMetaInfo); + } + case TSDB_SQL_SHOW_CREATE_DATABASE: { + const char* msg1 = "invalid database name"; + + SToken* pToken = taosArrayGet(pInfo->pMiscInfo->a, 0); + if (tscValidateName(pToken) != TSDB_CODE_SUCCESS) { + return setInvalidOperatorMsg(pMsgBuf, msg1); + } + + if (pToken->n > TSDB_DB_NAME_LEN) { + return setInvalidOperatorMsg(pMsgBuf, msg1); + } + return tNameSetDbName(&pTableMetaInfo->name, getAccountId(pSql), pToken); + } + case TSDB_SQL_CFG_DNODE: { + const char* msg2 = "invalid configure options or values, such as resetlog / debugFlag 135 / balance 'vnode:2-dnode:2' / monitor 1 "; + const char* msg3 = "invalid dnode ep"; + + /* validate the ip address */ + SMiscInfo* pMiscInfo = pInfo->pMiscInfo; + + /* validate the parameter names and options */ + if (validateDNodeConfig(pMiscInfo) != TSDB_CODE_SUCCESS) { + return setInvalidOperatorMsg(pMsgBuf, msg2); + } + + char* pMsg = pCmd->payload; + + SCfgDnodeMsg* pCfg = (SCfgDnodeMsg*)pMsg; + + SToken* t0 = taosArrayGet(pMiscInfo->a, 0); + SToken* t1 = taosArrayGet(pMiscInfo->a, 1); + + t0->n = strdequote(t0->z); + strncpy(pCfg->ep, t0->z, t0->n); + + if (validateEp(pCfg->ep) != TSDB_CODE_SUCCESS) { + return setInvalidOperatorMsg(pMsgBuf, msg3); + } + + strncpy(pCfg->config, t1->z, t1->n); + + if (taosArrayGetSize(pMiscInfo->a) == 3) { + SToken* t2 = taosArrayGet(pMiscInfo->a, 2); + + pCfg->config[t1->n] = ' '; // add sep + strncpy(&pCfg->config[t1->n + 1], t2->z, t2->n); + } + + break; + } + + case TSDB_SQL_CREATE_USER: + case TSDB_SQL_ALTER_USER: { + const char* msg2 = "invalid user/account name"; + const char* msg3 = "name too long"; + const char* msg5 = "invalid user rights"; + const char* msg7 = "not support options"; + + pCmd->command = pInfo->type; + + SUserInfo* pUser = &pInfo->pMiscInfo->user; + SToken* pName = &pUser->user; + SToken* pPwd = &pUser->passwd; + + if (pName->n >= TSDB_USER_LEN) { + return setInvalidOperatorMsg(pMsgBuf, msg3); + } + + if (tscValidateName(pName) != TSDB_CODE_SUCCESS) { + return setInvalidOperatorMsg(pMsgBuf, msg2); + } + + if (pCmd->command == TSDB_SQL_CREATE_USER) { + if (handlePassword(pCmd, pPwd) != TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + } else { + if (pUser->type == TSDB_ALTER_USER_PASSWD) { + if (handlePassword(pCmd, pPwd) != TSDB_CODE_SUCCESS) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + } else if (pUser->type == TSDB_ALTER_USER_PRIVILEGES) { + assert(pPwd->type == TSDB_DATA_TYPE_NULL); + + SToken* pPrivilege = &pUser->privilege; + + if (strncasecmp(pPrivilege->z, "super", 5) == 0 && pPrivilege->n == 5) { + pCmd->count = 1; + } else if (strncasecmp(pPrivilege->z, "read", 4) == 0 && pPrivilege->n == 4) { + pCmd->count = 2; + } else if (strncasecmp(pPrivilege->z, "write", 5) == 0 && pPrivilege->n == 5) { + pCmd->count = 3; + } else { + return setInvalidOperatorMsg(pMsgBuf, msg5); + } + } else { + return setInvalidOperatorMsg(pMsgBuf, msg7); + } + } + + break; + } + + case TSDB_SQL_CFG_LOCAL: { + SMiscInfo *pMiscInfo = pInfo->pMiscInfo; + const char *msg = "invalid configure options or values"; + + // validate the parameter names and options + if (validateLocalConfig(pMiscInfo) != TSDB_CODE_SUCCESS) { + return setInvalidOperatorMsg(pMsgBuf, msg); + } + + int32_t numOfToken = (int32_t) taosArrayGetSize(pMiscInfo->a); + assert(numOfToken >= 1 && numOfToken <= 2); + + SToken* t = taosArrayGet(pMiscInfo->a, 0); + strncpy(pCmd->payload, t->z, t->n); + if (numOfToken == 2) { + SToken* t1 = taosArrayGet(pMiscInfo->a, 1); + pCmd->payload[t->n] = ' '; // add sep + strncpy(&pCmd->payload[t->n + 1], t1->z, t1->n); + } + return TSDB_CODE_SUCCESS; + } + + case TSDB_SQL_CREATE_TABLE: { + SCreateTableSql* pCreateTable = pInfo->pCreateTableInfo; + + if (pCreateTable->type == TSQL_CREATE_TABLE || pCreateTable->type == TSQL_CREATE_STABLE) { + if ((code = doCheckForCreateTable(pSql, 0, pInfo)) != TSDB_CODE_SUCCESS) { + return code; + } + + } else if (pCreateTable->type == TSQL_CREATE_TABLE_FROM_STABLE) { + assert(pCmd->numOfCols == 0); + if ((code = doCheckForCreateFromStable(pSql, pInfo)) != TSDB_CODE_SUCCESS) { + return code; + } + + } else if (pCreateTable->type == TSQL_CREATE_STREAM) { + if ((code = doCheckForStream(pSql, pInfo)) != TSDB_CODE_SUCCESS) { + return code; + } + } + + break; + } + + case TSDB_SQL_SELECT: { + const char * msg1 = "no nested query supported in union clause"; + code = loadAllTableMeta(pSql, pInfo); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + pQueryInfo = tscGetQueryInfo(pCmd); + + size_t size = taosArrayGetSize(pInfo->list); + for (int32_t i = 0; i < size; ++i) { + SSqlNode* pSqlNode = taosArrayGetP(pInfo->list, i); + + tscTrace("0x%"PRIx64" start to parse the %dth subclause, total:%"PRIzu, pSql->self, i, size); + + if (size > 1 && pSqlNode->from && pSqlNode->from->type == SQL_NODE_FROM_SUBQUERY) { + return setInvalidOperatorMsg(pMsgBuf, msg1); + } + +// normalizeSqlNode(pSqlNode); // normalize the column name in each function + if ((code = validateSqlNode(pSql, pSqlNode, pQueryInfo)) != TSDB_CODE_SUCCESS) { + return code; + } + + tscPrintSelNodeList(pSql, i); + + if ((i + 1) < size && pQueryInfo->sibling == NULL) { + if ((code = tscAddQueryInfo(pCmd)) != TSDB_CODE_SUCCESS) { + return code; + } + + SArray *pUdfInfo = NULL; + if (pQueryInfo->pUdfInfo) { + pUdfInfo = taosArrayDup(pQueryInfo->pUdfInfo); + } + + pQueryInfo = pCmd->active; + pQueryInfo->pUdfInfo = pUdfInfo; + pQueryInfo->udfCopy = true; + } + } + + if ((code = normalizeVarDataTypeLength(pCmd)) != TSDB_CODE_SUCCESS) { + return code; + } + + // set the command/global limit parameters from the first subclause to the sqlcmd object + pCmd->active = pCmd->pQueryInfo; + pCmd->command = pCmd->pQueryInfo->command; + + STableMetaInfo* pTableMetaInfo1 = getMetaInfo(pCmd->active, 0); + if (pTableMetaInfo1->pTableMeta != NULL) { + pSql->res.precision = tscGetTableInfo(pTableMetaInfo1->pTableMeta).precision; + } + + return TSDB_CODE_SUCCESS; // do not build query message here + } + + case TSDB_SQL_ALTER_TABLE: { + if ((code = setAlterTableInfo(pSql, pInfo)) != TSDB_CODE_SUCCESS) { + return code; + } + + break; + } + + case TSDB_SQL_KILL_QUERY: + case TSDB_SQL_KILL_STREAM: + case TSDB_SQL_KILL_CONNECTION: { + if ((code = setKillInfo(pSql, pInfo, pInfo->type)) != TSDB_CODE_SUCCESS) { + return code; + } + break; + } + + case TSDB_SQL_SYNC_DB_REPLICA: { + const char* msg1 = "invalid db name"; + SToken* pzName = taosArrayGet(pInfo->pMiscInfo->a, 0); + + assert(taosArrayGetSize(pInfo->pMiscInfo->a) == 1); + code = tNameSetDbName(&pTableMetaInfo->name, getAccountId(pSql), pzName); + if (code != TSDB_CODE_SUCCESS) { + return setInvalidOperatorMsg(pMsgBuf, msg1); + } + break; + } + case TSDB_SQL_COMPACT_VNODE:{ + const char* msg = "invalid compact"; + if (setCompactVnodeInfo(pSql, pInfo) != TSDB_CODE_SUCCESS) { + return setInvalidOperatorMsg(pMsgBuf, msg); + } + break; + } + default: + return setInvalidOperatorMsg(pMsgBuf, "not support sql expression"); + } +#endif + + SMetaReq req = {0}; + SMetaData data = {0}; + + // TODO: check if the qnode info has been cached already + req.qNodeEpset = true; + code = qParserExtractRequestedMetaInfo(pInfo, &req, msgBuf, msgBufLen); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + // load the meta data from catalog + code = catalogGetMetaData(pCatalog, &req, &data); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + // evaluate the sqlnode + STableMeta* pTableMeta = (STableMeta*) taosArrayGetP(data.pTableMeta, 0); + assert(pTableMeta != NULL); + + SMsgBuf buf = {.buf = msgBuf, .len = msgBufLen}; + + size_t len = taosArrayGetSize(pInfo->list); + for(int32_t i = 0; i < len; ++i) { + SSqlNode* p = taosArrayGetP(pInfo->list, i); + code = evaluateSqlNode(p, pTableMeta->tableInfo.precision, &buf); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + + for(int32_t i = 0; i < len; ++i) { + SSqlNode* p = taosArrayGetP(pInfo->list, i); + validateSqlNode(p, pQueryInfo, &buf); + } + + if ((code = checkForInvalidExpr(pQueryInfo, &buf)) != TSDB_CODE_SUCCESS) { + return code; + } + + // convert the sqlnode into queryinfo + return code; +} diff --git a/source/libs/parser/src/parser.c b/source/libs/parser/src/parser.c index 45215d7866..ff48f9c749 100644 --- a/source/libs/parser/src/parser.c +++ b/source/libs/parser/src/parser.c @@ -13,33 +13,37 @@ * along with this program. If not, see . */ -#include "parserInt.h" -#include "ttoken.h" #include "astGenerator.h" +#include "parserInt.h" +#include "parserUtil.h" +#include "ttoken.h" +#include "function.h" bool qIsInsertSql(const char* pStr, size_t length) { - return false; + int32_t index = 0; + + do { + SToken t0 = tStrGetToken((char*) pStr, &index, false); + if (t0.type != TK_LP) { + return t0.type == TK_INSERT || t0.type == TK_IMPORT; + } + } while (1); } int32_t qParseQuerySql(const char* pStr, size_t length, struct SQueryStmtInfo** pQueryInfo, int64_t id, char* msg, int32_t msgLen) { *pQueryInfo = calloc(1, sizeof(SQueryStmtInfo)); if (*pQueryInfo == NULL) { - return -1; // set correct error code. + return TSDB_CODE_TSC_OUT_OF_MEMORY; // set correct error code. } SSqlInfo info = doGenerateAST(pStr); if (!info.valid) { - strcpy(msg, info.msg); - return -1; // set correct error code. + strncpy(msg, info.msg, msgLen); + return TSDB_CODE_TSC_SQL_SYNTAX_ERROR; } struct SCatalog* pCatalog = getCatalogHandle(NULL); - int32_t code = qParserValidateSqlNode(pCatalog, &info, *pQueryInfo, id, msg); - if (code != 0) { - return code; - } - - return 0; + return qParserValidateSqlNode(pCatalog, &info, *pQueryInfo, id, msg, msgLen); } int32_t qParseInsertSql(const char* pStr, size_t length, struct SInsertStmtInfo** pInsertInfo, int64_t id, char* msg, int32_t msgLen) { @@ -50,6 +54,135 @@ int32_t qParserConvertSql(const char* pStr, size_t length, char** pConvertSql) { return 0; } -int32_t qParserExtractRequestedMetaInfo(const struct SSqlNode* pSqlNode, SMetaReq* pMetaInfo) { - return 0; +static int32_t getTableNameFromSqlNode(SSqlNode* pSqlNode, SArray* tableNameList, SMsgBuf* pMsgBuf); + +static int32_t tnameComparFn(const void* p1, const void* p2) { + SName* pn1 = (SName*)p1; + SName* pn2 = (SName*)p2; + + int32_t ret = strncmp(pn1->acctId, pn2->acctId, tListLen(pn1->acctId)); + if (ret != 0) { + return ret > 0? 1:-1; + } else { + ret = strncmp(pn1->dbname, pn2->dbname, tListLen(pn1->dbname)); + if (ret != 0) { + return ret > 0? 1:-1; + } else { + ret = strncmp(pn1->tname, pn2->tname, tListLen(pn1->tname)); + if (ret != 0) { + return ret > 0? 1:-1; + } else { + return 0; + } + } + } +} + +static int32_t getTableNameFromSubquery(SSqlNode* pSqlNode, SArray* tableNameList, SMsgBuf* pMsgBuf) { + int32_t numOfSub = (int32_t)taosArrayGetSize(pSqlNode->from->list); + + for (int32_t j = 0; j < numOfSub; ++j) { + SRelElementPair* sub = taosArrayGet(pSqlNode->from->list, j); + + int32_t num = (int32_t)taosArrayGetSize(sub->pSubquery); + for (int32_t i = 0; i < num; ++i) { + SSqlNode* p = taosArrayGetP(sub->pSubquery, i); + if (p->from->type == SQL_NODE_FROM_TABLELIST) { + int32_t code = getTableNameFromSqlNode(p, tableNameList, pMsgBuf); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } else { + getTableNameFromSubquery(p, tableNameList, pMsgBuf); + } + } + } + + return TSDB_CODE_SUCCESS; +} + +int32_t getTableNameFromSqlNode(SSqlNode* pSqlNode, SArray* tableNameList, SMsgBuf* pMsgBuf) { + const char* msg1 = "invalid table name"; + + int32_t numOfTables = (int32_t) taosArrayGetSize(pSqlNode->from->list); + assert(pSqlNode->from->type == SQL_NODE_FROM_TABLELIST); + + for(int32_t j = 0; j < numOfTables; ++j) { + SRelElementPair* item = taosArrayGet(pSqlNode->from->list, j); + + SToken* t = &item->tableName; + if (t->type == TK_INTEGER || t->type == TK_FLOAT || t->type == TK_STRING) { + return buildInvalidOperationMsg(pMsgBuf, msg1); + } + + if (parserValidateIdToken(t) != TSDB_CODE_SUCCESS) { + return buildInvalidOperationMsg(pMsgBuf, msg1); + } + + SName name = {0}; + strndequote(name.tname, t->z, t->n); + taosArrayPush(tableNameList, &name); + } + + return TSDB_CODE_SUCCESS; +} + +static void freePtrElem(void* p) { + tfree(*(char**)p); +} + +int32_t qParserExtractRequestedMetaInfo(const SSqlInfo* pSqlInfo, SMetaReq* pMetaInfo, char* msg, int32_t msgBufLen) { + int32_t code = TSDB_CODE_SUCCESS; + SMsgBuf msgBuf = {.buf = msg, .len = msgBufLen}; + + pMetaInfo->pTableName = taosArrayInit(4, sizeof(SName)); + pMetaInfo->pUdf = taosArrayInit(4, POINTER_BYTES); + + size_t size = taosArrayGetSize(pSqlInfo->list); + for (int32_t i = 0; i < size; ++i) { + SSqlNode* pSqlNode = taosArrayGetP(pSqlInfo->list, i); + if (pSqlNode->from == NULL) { + return buildInvalidOperationMsg(&msgBuf, "invalid from clause"); + } + + // load the table meta in the FROM clause + if (pSqlNode->from->type == SQL_NODE_FROM_TABLELIST) { + code = getTableNameFromSqlNode(pSqlNode, pMetaInfo->pTableName, &msgBuf); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } else { + code = getTableNameFromSubquery(pSqlNode, pMetaInfo->pTableName, &msgBuf); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + } + + taosArraySort(pMetaInfo->pTableName, tnameComparFn); + taosArrayRemoveDuplicate(pMetaInfo->pTableName, tnameComparFn, NULL); + + size_t funcSize = 0; + if (pSqlInfo->funcs) { + funcSize = taosArrayGetSize(pSqlInfo->funcs); + } + + if (funcSize > 0) { + for (size_t i = 0; i < funcSize; ++i) { + SToken* t = taosArrayGet(pSqlInfo->funcs, i); + assert(t != NULL); + + if (t->n >= TSDB_FUNC_NAME_LEN) { + return buildSyntaxErrMsg(msg, msgBufLen, "too long function name", t->z); + } + + // Let's assume that it is an UDF/UDAF, if it is not a built-in function. + if (!qIsBuiltinFunction(t->z, t->n)) { + char* fname = strndup(t->z, t->n); + taosArrayPush(pMetaInfo->pUdf, &fname); + } + } + } + + return code; } \ No newline at end of file diff --git a/source/libs/parser/src/parserUtil.c b/source/libs/parser/src/parserUtil.c new file mode 100644 index 0000000000..bb473e9a73 --- /dev/null +++ b/source/libs/parser/src/parserUtil.c @@ -0,0 +1,1962 @@ +#include "taosmsg.h" +#include "parser.h" +#include "parserUtil.h" +#include "taoserror.h" +#include "tutil.h" +#include "ttypes.h" +#include "thash.h" +#include "tbuffer.h" +#include "parserInt.h" +#include "queryInfoUtil.h" +#include "function.h" + +typedef struct STableFilterCond { + uint64_t uid; + int16_t idx; //table index + int32_t len; // length of tag query condition data + char * cond; +} STableFilterCond; + +static STableMetaInfo* addTableMetaInfo(SQueryStmtInfo* pQueryInfo, SName* name, STableMeta* pTableMeta, + SVgroupsInfo* vgroupList, SArray* pTagCols, SArray* pVgroupTables); +STableMeta* tableMetaDup(STableMeta* pTableMeta); + +int32_t parserValidateIdToken(SToken* pToken) { + if (pToken == NULL || pToken->z == NULL || pToken->type != TK_ID) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + // it is a token quoted with escape char '`' + if (pToken->z[0] == TS_ESCAPE_CHAR && pToken->z[pToken->n - 1] == TS_ESCAPE_CHAR) { + return TSDB_CODE_SUCCESS; + } + + char* sep = strnchr(pToken->z, TS_PATH_DELIMITER[0], pToken->n, true); + if (sep == NULL) { // It is a single part token, not a complex type + if (isNumber(pToken)) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + strntolower(pToken->z, pToken->z, pToken->n); + } else { // two part + int32_t oldLen = pToken->n; + char* pStr = pToken->z; + + if (pToken->type == TK_SPACE) { + pToken->n = (uint32_t)strtrim(pToken->z); + } + + pToken->n = tGetToken(pToken->z, &pToken->type); + if (pToken->z[pToken->n] != TS_PATH_DELIMITER[0]) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + if (pToken->type != TK_ID) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + int32_t firstPartLen = pToken->n; + + pToken->z = sep + 1; + pToken->n = (uint32_t)(oldLen - (sep - pStr) - 1); + int32_t len = tGetToken(pToken->z, &pToken->type); + if (len != pToken->n || pToken->type != TK_ID) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + // re-build the whole name string + if (pStr[firstPartLen] == TS_PATH_DELIMITER[0]) { + // first part do not have quote do nothing + } else { + pStr[firstPartLen] = TS_PATH_DELIMITER[0]; + memmove(&pStr[firstPartLen + 1], pToken->z, pToken->n); + uint32_t offset = (uint32_t)(pToken->z - (pStr + firstPartLen + 1)); + memset(pToken->z + pToken->n - offset, ' ', offset); + } + + pToken->n += (firstPartLen + sizeof(TS_PATH_DELIMITER[0])); + pToken->z = pStr; + + strntolower(pToken->z, pToken->z, pToken->n); + } + + return TSDB_CODE_SUCCESS; +} + +int32_t buildInvalidOperationMsg(SMsgBuf* pBuf, const char* msg) { + strncpy(pBuf->buf, msg, pBuf->len); + return TSDB_CODE_TSC_INVALID_OPERATION; +} + +int32_t buildSyntaxErrMsg(char* dst, int32_t dstBufLen, const char* additionalInfo, const char* sourceStr) { + const char* msgFormat1 = "syntax error near \'%s\'"; + const char* msgFormat2 = "syntax error near \'%s\' (%s)"; + const char* msgFormat3 = "%s"; + + const char* prefix = "syntax error"; + if (sourceStr == NULL) { + assert(additionalInfo != NULL); + snprintf(dst, dstBufLen, msgFormat1, additionalInfo); + return TSDB_CODE_TSC_SQL_SYNTAX_ERROR; + } + + char buf[64] = {0}; // only extract part of sql string + strncpy(buf, sourceStr, tListLen(buf) - 1); + + if (additionalInfo != NULL) { + snprintf(dst, dstBufLen, msgFormat2, buf, additionalInfo); + } else { + const char* msgFormat = (0 == strncmp(sourceStr, prefix, strlen(prefix))) ? msgFormat3 : msgFormat1; + snprintf(dst, dstBufLen, msgFormat, buf); + } + + return TSDB_CODE_TSC_SQL_SYNTAX_ERROR; +} + +SCond* getSTableQueryCond(STagCond* pTagCond, uint64_t uid) { + if (pTagCond->pCond == NULL) { + return NULL; + } + + size_t size = taosArrayGetSize(pTagCond->pCond); + for (int32_t i = 0; i < size; ++i) { + SCond* pCond = taosArrayGet(pTagCond->pCond, i); + + if (uid == pCond->uid) { + return pCond; + } + } + + return NULL; +} + +STableFilterCond* tsGetTableFilter(SArray* filters, uint64_t uid, int16_t idx) { + if (filters == NULL) { + return NULL; + } + + size_t size = taosArrayGetSize(filters); + for (int32_t i = 0; i < size; ++i) { + STableFilterCond* cond = taosArrayGet(filters, i); + + if (uid == cond->uid && (idx >= 0 && cond->idx == idx)) { + return cond; + } + } + + return NULL; +} + +void setSTableQueryCond(STagCond* pTagCond, uint64_t uid, SBufferWriter* bw) { + if (tbufTell(bw) == 0) { + return; + } + + SCond cond = { + .uid = uid, + .len = (int32_t)(tbufTell(bw)), + .cond = NULL, + }; + + cond.cond = tbufGetData(bw, true); + + if (pTagCond->pCond == NULL) { + pTagCond->pCond = taosArrayInit(3, sizeof(SCond)); + } + + taosArrayPush(pTagCond->pCond, &cond); +} + +//typedef struct SJoinStatus { +// SSDataBlock* pBlock; // point to the upstream block +// int32_t index; +// bool completed;// current upstream is completed or not +//} SJoinStatus; + +/* +static void createInputDataFilterInfo(SQueryStmtInfo* px, int32_t numOfCol1, int32_t* numOfFilterCols, SSingleColumnFilterInfo** pFilterInfo) { + SColumnInfo* tableCols = calloc(numOfCol1, sizeof(SColumnInfo)); + for(int32_t i = 0; i < numOfCol1; ++i) { + SColumn* pCol = taosArrayGetP(px->colList, i); + if (pCol->info.flist.numOfFilters > 0) { + (*numOfFilterCols) += 1; + } + + tableCols[i] = pCol->info; + } + + if ((*numOfFilterCols) > 0) { + doCreateFilterInfo(tableCols, numOfCol1, (*numOfFilterCols), pFilterInfo, 0); + } + + tfree(tableCols); +} +*/ + +//void destroyTableNameList(SInsertStatementParam* pInsertParam) { +// if (pInsertParam->numOfTables == 0) { +// assert(pInsertParam->pTableNameList == NULL); +// return; +// } +// +// for(int32_t i = 0; i < pInsertParam->numOfTables; ++i) { +// tfree(pInsertParam->pTableNameList[i]); +// } +// +// pInsertParam->numOfTables = 0; +// tfree(pInsertParam->pTableNameList); +//} + +//void tscDestroyBoundColumnInfo(SParsedDataColInfo* pColInfo) { +// tfree(pColInfo->boundedColumns); +// tfree(pColInfo->cols); +// tfree(pColInfo->colIdxInfo); +//} +// +//void tscDestroyDataBlock(STableDataBlocks* pDataBlock, bool removeMeta) { +// if (pDataBlock == NULL) { +// return; +// } +// +// tfree(pDataBlock->pData); +// +// if (removeMeta) { +// char name[TSDB_TABLE_FNAME_LEN] = {0}; +// tNameExtractFullName(&pDataBlock->tableName, name); +// +// taosHashRemove(tscTableMetaMap, name, strnlen(name, TSDB_TABLE_FNAME_LEN)); +// } +// +// if (!pDataBlock->cloned) { +// tfree(pDataBlock->params); +// +// // free the refcount for metermeta +// if (pDataBlock->pTableMeta != NULL) { +// tfree(pDataBlock->pTableMeta); +// } +// +// tscDestroyBoundColumnInfo(&pDataBlock->boundColumnInfo); +// } +// +// tfree(pDataBlock); +//} + +//SParamInfo* tscAddParamToDataBlock(STableDataBlocks* pDataBlock, char type, uint8_t timePrec, int16_t bytes, +// uint32_t offset) { +// uint32_t needed = pDataBlock->numOfParams + 1; +// if (needed > pDataBlock->numOfAllocedParams) { +// needed *= 2; +// void* tmp = realloc(pDataBlock->params, needed * sizeof(SParamInfo)); +// if (tmp == NULL) { +// return NULL; +// } +// pDataBlock->params = (SParamInfo*)tmp; +// pDataBlock->numOfAllocedParams = needed; +// } +// +// SParamInfo* param = pDataBlock->params + pDataBlock->numOfParams; +// param->idx = -1; +// param->type = type; +// param->timePrec = timePrec; +// param->bytes = bytes; +// param->offset = offset; +// +// ++pDataBlock->numOfParams; +// return param; +//} + +//void* tscDestroyBlockArrayList(SArray* pDataBlockList) { +// if (pDataBlockList == NULL) { +// return NULL; +// } +// +// size_t size = taosArrayGetSize(pDataBlockList); +// for (int32_t i = 0; i < size; i++) { +// void* d = taosArrayGetP(pDataBlockList, i); +// tscDestroyDataBlock(d, false); +// } +// +// taosArrayDestroy(pDataBlockList); +// return NULL; +//} + + +//void freeUdfInfo(SUdfInfo* pUdfInfo) { +// if (pUdfInfo == NULL) { +// return; +// } +// +// if (pUdfInfo->funcs[TSDB_UDF_FUNC_DESTROY]) { +// (*(udfDestroyFunc)pUdfInfo->funcs[TSDB_UDF_FUNC_DESTROY])(&pUdfInfo->init); +// } +// +// tfree(pUdfInfo->name); +// +// if (pUdfInfo->path) { +// unlink(pUdfInfo->path); +// } +// +// tfree(pUdfInfo->path); +// +// tfree(pUdfInfo->content); +// +// taosCloseDll(pUdfInfo->handle); +//} + +//void* tscDestroyUdfArrayList(SArray* pUdfList) { +// if (pUdfList == NULL) { +// return NULL; +// } +// +// size_t size = taosArrayGetSize(pUdfList); +// for (int32_t i = 0; i < size; i++) { +// SUdfInfo* udf = taosArrayGet(pUdfList, i); +// freeUdfInfo(udf); +// } +// +// taosArrayDestroy(pUdfList); +// return NULL; +//} + +//void* tscDestroyBlockHashTable(SHashObj* pBlockHashTable, bool removeMeta) { +// if (pBlockHashTable == NULL) { +// return NULL; +// } +// +// STableDataBlocks** p = taosHashIterate(pBlockHashTable, NULL); +// while(p) { +// tscDestroyDataBlock(*p, removeMeta); +// p = taosHashIterate(pBlockHashTable, p); +// } +// +// taosHashCleanup(pBlockHashTable); +// return NULL; +//} + +/** + * create the in-memory buffer for each table to keep the submitted data block + * @param initialSize + * @param rowSize + * @param startOffset + * @param name + * @param dataBlocks + * @return + */ +//int32_t tscCreateDataBlock(size_t defaultSize, int32_t rowSize, int32_t startOffset, SName* name, +// STableMeta* pTableMeta, STableDataBlocks** dataBlocks) { +// STableDataBlocks* dataBuf = (STableDataBlocks*)calloc(1, sizeof(STableDataBlocks)); +// if (dataBuf == NULL) { +// tscError("failed to allocated memory, reason:%s", strerror(errno)); +// return TSDB_CODE_TSC_OUT_OF_MEMORY; +// } +// +// dataBuf->nAllocSize = (uint32_t)defaultSize; +// dataBuf->headerSize = startOffset; +// +// // the header size will always be the startOffset value, reserved for the subumit block header +// if (dataBuf->nAllocSize <= dataBuf->headerSize) { +// dataBuf->nAllocSize = dataBuf->headerSize * 2; +// } +// +// //dataBuf->pData = calloc(1, dataBuf->nAllocSize); +// dataBuf->pData = malloc(dataBuf->nAllocSize); +// if (dataBuf->pData == NULL) { +// tscError("failed to allocated memory, reason:%s", strerror(errno)); +// tfree(dataBuf); +// return TSDB_CODE_TSC_OUT_OF_MEMORY; +// } +// memset(dataBuf->pData, 0, sizeof(SSubmitBlk)); +// +// //Here we keep the tableMeta to avoid it to be remove by other threads. +// dataBuf->pTableMeta = tscTableMetaDup(pTableMeta); +// +// SParsedDataColInfo* pColInfo = &dataBuf->boundColumnInfo; +// SSchema* pSchema = getTableColumnSchema(dataBuf->pTableMeta); +// tscSetBoundColumnInfo(pColInfo, pSchema, dataBuf->pTableMeta->tableInfo.numOfColumns); +// +// dataBuf->ordered = true; +// dataBuf->prevTS = INT64_MIN; +// dataBuf->rowSize = rowSize; +// dataBuf->size = startOffset; +// dataBuf->tsSource = -1; +// dataBuf->vgId = dataBuf->pTableMeta->vgId; +// +// tNameAssign(&dataBuf->tableName, name); +// +// assert(defaultSize > 0 && pTableMeta != NULL && dataBuf->pTableMeta != NULL); +// +// *dataBlocks = dataBuf; +// return TSDB_CODE_SUCCESS; +//} +// +//int32_t tscGetDataBlockFromList(SHashObj* pHashList, int64_t id, int32_t size, int32_t startOffset, int32_t rowSize, +// SName* name, STableMeta* pTableMeta, STableDataBlocks** dataBlocks, +// SArray* pBlockList) { +// *dataBlocks = NULL; +// STableDataBlocks** t1 = (STableDataBlocks**)taosHashGet(pHashList, (const char*)&id, sizeof(id)); +// if (t1 != NULL) { +// *dataBlocks = *t1; +// } +// +// if (*dataBlocks == NULL) { +// int32_t ret = tscCreateDataBlock((size_t)size, rowSize, startOffset, name, pTableMeta, dataBlocks); +// if (ret != TSDB_CODE_SUCCESS) { +// return ret; +// } +// +// taosHashPut(pHashList, (const char*)&id, sizeof(int64_t), (char*)dataBlocks, POINTER_BYTES); +// if (pBlockList) { +// taosArrayPush(pBlockList, dataBlocks); +// } +// } +// +// return TSDB_CODE_SUCCESS; +//} +// +//// Erase the empty space reserved for binary data +//static int trimDataBlock(void* pDataBlock, STableDataBlocks* pTableDataBlock, SInsertStatementParam* insertParam, +// SBlockKeyTuple* blkKeyTuple) { +// // TODO: optimize this function, handle the case while binary is not presented +// STableMeta* pTableMeta = pTableDataBlock->pTableMeta; +// STableComInfo tinfo = tscGetTableInfo(pTableMeta); +// SSchema* pSchema = getTableColumnSchema(pTableMeta); +// +// SSubmitBlk* pBlock = pDataBlock; +// memcpy(pDataBlock, pTableDataBlock->pData, sizeof(SSubmitBlk)); +// pDataBlock = (char*)pDataBlock + sizeof(SSubmitBlk); +// +// int32_t flen = 0; // original total length of row +// +// // schema needs to be included into the submit data block +// if (insertParam->schemaAttached) { +// int32_t numOfCols = tscGetNumOfColumns(pTableDataBlock->pTableMeta); +// for(int32_t j = 0; j < numOfCols; ++j) { +// STColumn* pCol = (STColumn*) pDataBlock; +// pCol->colId = htons(pSchema[j].colId); +// pCol->type = pSchema[j].type; +// pCol->bytes = htons(pSchema[j].bytes); +// pCol->offset = 0; +// +// pDataBlock = (char*)pDataBlock + sizeof(STColumn); +// flen += TYPE_BYTES[pSchema[j].type]; +// } +// +// int32_t schemaSize = sizeof(STColumn) * numOfCols; +// pBlock->schemaLen = schemaSize; +// } else { +// if (IS_RAW_PAYLOAD(insertParam->payloadType)) { +// for (int32_t j = 0; j < tinfo.numOfColumns; ++j) { +// flen += TYPE_BYTES[pSchema[j].type]; +// } +// } +// pBlock->schemaLen = 0; +// } +// +// char* p = pTableDataBlock->pData + sizeof(SSubmitBlk); +// pBlock->dataLen = 0; +// int32_t numOfRows = htons(pBlock->numOfRows); +// +// if (IS_RAW_PAYLOAD(insertParam->payloadType)) { +// for (int32_t i = 0; i < numOfRows; ++i) { +// SMemRow memRow = (SMemRow)pDataBlock; +// memRowSetType(memRow, SMEM_ROW_DATA); +// SDataRow trow = memRowDataBody(memRow); +// dataRowSetLen(trow, (uint16_t)(TD_DATA_ROW_HEAD_SIZE + flen)); +// dataRowSetVersion(trow, pTableMeta->sversion); +// +// int toffset = 0; +// for (int32_t j = 0; j < tinfo.numOfColumns; j++) { +// tdAppendColVal(trow, p, pSchema[j].type, toffset); +// toffset += TYPE_BYTES[pSchema[j].type]; +// p += pSchema[j].bytes; +// } +// +// pDataBlock = (char*)pDataBlock + memRowTLen(memRow); +// pBlock->dataLen += memRowTLen(memRow); +// } +// } else { +// for (int32_t i = 0; i < numOfRows; ++i) { +// char* payload = (blkKeyTuple + i)->payloadAddr; +// if (isNeedConvertRow(payload)) { +// convertSMemRow(pDataBlock, payload, pTableDataBlock); +// TDRowTLenT rowTLen = memRowTLen(pDataBlock); +// pDataBlock = POINTER_SHIFT(pDataBlock, rowTLen); +// pBlock->dataLen += rowTLen; +// } else { +// TDRowTLenT rowTLen = memRowTLen(payload); +// memcpy(pDataBlock, payload, rowTLen); +// pDataBlock = POINTER_SHIFT(pDataBlock, rowTLen); +// pBlock->dataLen += rowTLen; +// } +// } +// } +// +// int32_t len = pBlock->dataLen + pBlock->schemaLen; +// pBlock->dataLen = htonl(pBlock->dataLen); +// pBlock->schemaLen = htonl(pBlock->schemaLen); +// +// return len; +//} + +TAOS_FIELD createField(const SSchema* pSchema) { + TAOS_FIELD f = { .type = pSchema->type, .bytes = pSchema->bytes, }; + tstrncpy(f.name, pSchema->name, sizeof(f.name)); + return f; +} + +SSchema createSchema(uint8_t type, int16_t bytes, int16_t colId, const char* name) { + SSchema s = {0}; + s.type = type; + s.bytes = bytes; + s.colId = colId; + + tstrncpy(s.name, name, tListLen(s.name)); + return s; +} + +int32_t getNumOfFields(SFieldInfo* pFieldInfo) { + return pFieldInfo->numOfOutput; +} + +int32_t getFirstInvisibleFieldPos(SQueryStmtInfo* pQueryInfo) { + if (pQueryInfo->fieldsInfo.numOfOutput <= 0 || pQueryInfo->fieldsInfo.internalField == NULL) { + return 0; + } + + for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutput; ++i) { + SInternalField* pField = taosArrayGet(pQueryInfo->fieldsInfo.internalField, i); + if (!pField->visible) { + return i; + } + } + + return pQueryInfo->fieldsInfo.numOfOutput; +} + +SInternalField* appendFieldInfo(SFieldInfo* pFieldInfo, TAOS_FIELD* pField) { + assert(pFieldInfo != NULL); + pFieldInfo->numOfOutput++; + + struct SInternalField info = { .pExpr = NULL, .visible = true }; + + info.field = *pField; + return taosArrayPush(pFieldInfo->internalField, &info); +} + +SInternalField* insertFieldInfo(SFieldInfo* pFieldInfo, int32_t index, SSchema* pSchema) { + pFieldInfo->numOfOutput++; + struct SInternalField info = { .pExpr = NULL, .visible = true }; + + info.field.type = pSchema->type; + info.field.bytes = pSchema->bytes; + tstrncpy(info.field.name, pSchema->name, tListLen(pSchema->name)); + + return taosArrayInsert(pFieldInfo->internalField, index, &info); +} + +void fieldInfoUpdateOffset(SQueryStmtInfo* pQueryInfo) { + int32_t offset = 0; + size_t numOfExprs = getNumOfExprs(pQueryInfo); + + for (int32_t i = 0; i < numOfExprs; ++i) { + SExprInfo* p = taosArrayGetP(pQueryInfo->exprList, i); + +// p->base.offset = offset; + offset += p->base.resSchema.bytes; + } +} + +SInternalField* getInternalField(SFieldInfo* pFieldInfo, int32_t index) { + assert(index < pFieldInfo->numOfOutput); + return TARRAY_GET_ELEM(pFieldInfo->internalField, index); +} + +TAOS_FIELD* getFieldInfo(SFieldInfo* pFieldInfo, int32_t index) { + assert(index < pFieldInfo->numOfOutput); + return &((SInternalField*)TARRAY_GET_ELEM(pFieldInfo->internalField, index))->field; +} + +int16_t getFieldInfoOffset(SQueryStmtInfo* pQueryInfo, int32_t index) { + SInternalField* pInfo = getInternalField(&pQueryInfo->fieldsInfo, index); + assert(pInfo != NULL && pInfo->pExpr->pExpr == NULL); + return 0; +// return pInfo->pExpr->base.offset; +} + +int32_t fieldInfoCompare(const SFieldInfo* pFieldInfo1, const SFieldInfo* pFieldInfo2, int32_t *diffSize) { + assert(pFieldInfo1 != NULL && pFieldInfo2 != NULL); + + if (pFieldInfo1->numOfOutput != pFieldInfo2->numOfOutput) { + return pFieldInfo1->numOfOutput - pFieldInfo2->numOfOutput; + } + + for (int32_t i = 0; i < pFieldInfo1->numOfOutput; ++i) { + TAOS_FIELD* pField1 = getFieldInfo((SFieldInfo*) pFieldInfo1, i); + TAOS_FIELD* pField2 = getFieldInfo((SFieldInfo*) pFieldInfo2, i); + + if (pField1->type != pField2->type || + strcasecmp(pField1->name, pField2->name) != 0) { + return 1; + } + + if (pField1->bytes != pField2->bytes) { + *diffSize = 1; + + if (pField2->bytes > pField1->bytes) { + assert(IS_VAR_DATA_TYPE(pField1->type)); + pField1->bytes = pField2->bytes; + } + } + } + + return 0; +} + +int32_t getFieldInfoSize(const SFieldInfo* pFieldInfo1, const SFieldInfo* pFieldInfo2) { + assert(pFieldInfo1 != NULL && pFieldInfo2 != NULL); + + for (int32_t i = 0; i < pFieldInfo1->numOfOutput; ++i) { + TAOS_FIELD* pField1 = getFieldInfo((SFieldInfo*) pFieldInfo1, i); + TAOS_FIELD* pField2 = getFieldInfo((SFieldInfo*) pFieldInfo2, i); + + pField2->bytes = pField1->bytes; + } + + return 0; +} + +static void destroyFilterInfo(SColumnFilterList* pFilterList) { + if (pFilterList->filterInfo == NULL) { + pFilterList->numOfFilters = 0; + return; + } + + for(int32_t i = 0; i < pFilterList->numOfFilters; ++i) { + if (pFilterList->filterInfo[i].filterstr) { + tfree(pFilterList->filterInfo[i].pz); + } + } + + tfree(pFilterList->filterInfo); + pFilterList->numOfFilters = 0; +} + +void cleanupFieldInfo(SFieldInfo* pFieldInfo) { + if (pFieldInfo == NULL) { + return; + } + + if (pFieldInfo->internalField != NULL) { + size_t num = taosArrayGetSize(pFieldInfo->internalField); + for (int32_t i = 0; i < num; ++i) { +// SInternalField* pfield = taosArrayGet(pFieldInfo->internalField, i); +// if (pfield->pExpr != NULL && pfield->pExpr->pExpr != NULL) { +// sqlExprDestroy(pfield->pExpr); +// } + } + } + + taosArrayDestroy(pFieldInfo->internalField); +// tfree(pFieldInfo->final); + + memset(pFieldInfo, 0, sizeof(SFieldInfo)); +} + +void copyFieldInfo(SFieldInfo* pFieldInfo, const SFieldInfo* pSrc, const SArray* pExprList) { + assert(pFieldInfo != NULL && pSrc != NULL && pExprList != NULL); + pFieldInfo->numOfOutput = pSrc->numOfOutput; + + if (pSrc->final != NULL) { + pFieldInfo->final = calloc(pSrc->numOfOutput, sizeof(TAOS_FIELD)); + memcpy(pFieldInfo->final, pSrc->final, sizeof(TAOS_FIELD) * pSrc->numOfOutput); + } + + if (pSrc->internalField != NULL) { + size_t num = taosArrayGetSize(pSrc->internalField); + size_t numOfExpr = taosArrayGetSize(pExprList); + + for (int32_t i = 0; i < num; ++i) { + SInternalField* pfield = taosArrayGet(pSrc->internalField, i); + + SInternalField p = {.visible = pfield->visible, .field = pfield->field}; + + bool found = false; + int32_t resColId = pfield->pExpr->base.resSchema.colId; + for(int32_t j = 0; j < numOfExpr; ++j) { + SExprInfo* pExpr = taosArrayGetP(pExprList, j); + if (pExpr->base.resSchema.colId == resColId) { + p.pExpr = pExpr; + found = true; + break; + } + } + + if (!found) { + assert(pfield->pExpr->pExpr != NULL); + p.pExpr = calloc(1, sizeof(SExprInfo)); + assignExprInfo(p.pExpr, pfield->pExpr); + } + + taosArrayPush(pFieldInfo->internalField, &p); + } + } +} + +// ignore the tbname columnIndex to be inserted into source list +int32_t columnExists(SArray* pColumnList, int32_t columnId, uint64_t uid) { + size_t numOfCols = taosArrayGetSize(pColumnList); + + int32_t i = 0; + while (i < numOfCols) { + SColumn* pCol = taosArrayGetP(pColumnList, i); + if ((pCol->info.colId != columnId) || (pCol->tableUid != uid)) { + ++i; + continue; + } else { + break; + } + } + + if (i >= numOfCols || numOfCols == 0) { + return -1; + } + + return i; +} + +SColumn* columnListInsert(SArray* pColumnList, int32_t columnIndex, uint64_t uid, SSchema* pSchema) { + // ignore the tbname columnIndex to be inserted into source list + if (columnIndex < 0) { + return NULL; + } + + size_t numOfCols = taosArrayGetSize(pColumnList); + + int32_t i = 0; + while (i < numOfCols) { + SColumn* pCol = taosArrayGetP(pColumnList, i); + if (pCol->columnIndex < columnIndex) { + i++; + } else if (pCol->tableUid < uid) { + i++; + } else { + break; + } + } + + if (i >= numOfCols || numOfCols == 0) { + SColumn* b = calloc(1, sizeof(SColumn)); + if (b == NULL) { + return NULL; + } + + b->columnIndex = columnIndex; + b->tableUid = uid; + b->info.colId = pSchema->colId; + b->info.bytes = pSchema->bytes; + b->info.type = pSchema->type; + + taosArrayInsert(pColumnList, i, &b); + } else { + SColumn* pCol = taosArrayGetP(pColumnList, i); + + if (i < numOfCols && (pCol->columnIndex > columnIndex || pCol->tableUid != uid)) { + SColumn* b = calloc(1, sizeof(SColumn)); + if (b == NULL) { + return NULL; + } + + b->columnIndex = columnIndex; + b->tableUid = uid; + b->info.colId = pSchema->colId; + b->info.bytes = pSchema->bytes; + b->info.type = pSchema->type; + + taosArrayInsert(pColumnList, i, &b); + } + } + + return taosArrayGetP(pColumnList, i); +} + +SColumn* insertPrimaryTsColumn(SArray* pColumnList, uint64_t tableUid) { + SSchema s = {.type = TSDB_DATA_TYPE_TIMESTAMP, .bytes = TSDB_KEYSIZE, .colId = PRIMARYKEY_TIMESTAMP_COL_INDEX}; + return columnListInsert(pColumnList, PRIMARYKEY_TIMESTAMP_COL_INDEX, tableUid, &s); +} + +void columnCopy(SColumn* pDest, const SColumn* pSrc); + +SColumn* columnClone(const SColumn* src) { + assert(src != NULL); + + SColumn* dst = calloc(1, sizeof(SColumn)); + if (dst == NULL) { + return NULL; + } + + columnCopy(dst, src); + return dst; +} + +SColumnFilterInfo* tFilterInfoDup(const SColumnFilterInfo* src, int32_t numOfFilters) { + if (numOfFilters == 0 || src == NULL) { + assert(src == NULL); + return NULL; + } + + SColumnFilterInfo* pFilter = calloc(1, numOfFilters * sizeof(SColumnFilterInfo)); + + memcpy(pFilter, src, sizeof(SColumnFilterInfo) * numOfFilters); + for (int32_t j = 0; j < numOfFilters; ++j) { + if (pFilter[j].filterstr) { + size_t len = (size_t) pFilter[j].len + 1 * TSDB_NCHAR_SIZE; + pFilter[j].pz = (int64_t) calloc(1, len); + + memcpy((char*)pFilter[j].pz, (char*)src[j].pz, (size_t) pFilter[j].len); + } + } + + assert(src->filterstr == 0 || src->filterstr == 1); + assert(!(src->lowerRelOptr == TSDB_RELATION_INVALID && src->upperRelOptr == TSDB_RELATION_INVALID)); + + return pFilter; +} + +void columnCopy(SColumn* pDest, const SColumn* pSrc) { + destroyFilterInfo(&pDest->info.flist); + + pDest->columnIndex = pSrc->columnIndex; + pDest->tableUid = pSrc->tableUid; + pDest->info.flist.numOfFilters = pSrc->info.flist.numOfFilters; + pDest->info.flist.filterInfo = tFilterInfoDup(pSrc->info.flist.filterInfo, pSrc->info.flist.numOfFilters); + pDest->info.type = pSrc->info.type; + pDest->info.colId = pSrc->info.colId; + pDest->info.bytes = pSrc->info.bytes; +} + +void columnListCopyAll(SArray* dst, const SArray* src) { + assert(src != NULL && dst != NULL); + + size_t num = taosArrayGetSize(src); + for (int32_t i = 0; i < num; ++i) { + SColumn* pCol = taosArrayGetP(src, i); + SColumn* p = columnClone(pCol); + taosArrayPush(dst, &p); + } +} + +void columnListCopy(SArray* dst, const SArray* src, uint64_t uid) { + assert(src != NULL && dst != NULL); + + size_t num = taosArrayGetSize(src); + for (int32_t i = 0; i < num; ++i) { + SColumn* pCol = taosArrayGetP(src, i); + + if (pCol->tableUid == uid) { + SColumn* p = columnClone(pCol); + taosArrayPush(dst, &p); + } + } +} + +static void columnDestroy(SColumn* pCol) { + destroyFilterInfo(&pCol->info.flist); + free(pCol); +} + +void columnListDestroy(SArray* pColumnList) { + if (pColumnList == NULL) { + return; + } + + size_t num = taosArrayGetSize(pColumnList); + for (int32_t i = 0; i < num; ++i) { + SColumn* pCol = taosArrayGetP(pColumnList, i); + columnDestroy(pCol); + } + + taosArrayDestroy(pColumnList); +} + +bool validateColumnId(STableMetaInfo* pTableMetaInfo, int32_t colId, int32_t numOfParams) { + if (pTableMetaInfo->pTableMeta == NULL) { + return false; + } + + if (colId == TSDB_TBNAME_COLUMN_INDEX || (colId <= TSDB_UD_COLUMN_INDEX && numOfParams == 2)) { + return true; + } + + SSchema* pSchema = getTableColumnSchema(pTableMetaInfo->pTableMeta); + STableComInfo tinfo = getTableInfo(pTableMetaInfo->pTableMeta); + + int32_t numOfTotal = tinfo.numOfTags + tinfo.numOfColumns; + + for (int32_t i = 0; i < numOfTotal; ++i) { + if (pSchema[i].colId == colId) { + return true; + } + } + + return false; +} + +int32_t tscTagCondCopy(STagCond* dest, const STagCond* src) { + memset(dest, 0, sizeof(STagCond)); + + if (src->tbnameCond.cond != NULL) { + dest->tbnameCond.cond = strdup(src->tbnameCond.cond); + if (dest->tbnameCond.cond == NULL) { + return -1; + } + } + + dest->tbnameCond.uid = src->tbnameCond.uid; + dest->tbnameCond.len = src->tbnameCond.len; + + dest->joinInfo.hasJoin = src->joinInfo.hasJoin; + + for (int32_t i = 0; i < TSDB_MAX_JOIN_TABLE_NUM; ++i) { + if (src->joinInfo.joinTables[i]) { + dest->joinInfo.joinTables[i] = calloc(1, sizeof(SJoinNode)); + + memcpy(dest->joinInfo.joinTables[i], src->joinInfo.joinTables[i], sizeof(SJoinNode)); + + if (src->joinInfo.joinTables[i]->tsJoin) { + dest->joinInfo.joinTables[i]->tsJoin = taosArrayDup(src->joinInfo.joinTables[i]->tsJoin); + } + + if (src->joinInfo.joinTables[i]->tagJoin) { + dest->joinInfo.joinTables[i]->tagJoin = taosArrayDup(src->joinInfo.joinTables[i]->tagJoin); + } + } + } + + + dest->relType = src->relType; + + if (src->pCond == NULL) { + return 0; + } + + size_t s = taosArrayGetSize(src->pCond); + dest->pCond = taosArrayInit(s, sizeof(SCond)); + + for (int32_t i = 0; i < s; ++i) { + SCond* pCond = taosArrayGet(src->pCond, i); + + SCond c = {0}; + c.len = pCond->len; + c.uid = pCond->uid; + + if (pCond->len > 0) { + assert(pCond->cond != NULL); + c.cond = malloc(c.len); + if (c.cond == NULL) { + return -1; + } + + memcpy(c.cond, pCond->cond, c.len); + } + + taosArrayPush(dest->pCond, &c); + } + + return 0; +} + +int32_t tscColCondCopy(SArray** dest, const SArray* src, uint64_t uid, int16_t tidx) { + if (src == NULL) { + return 0; + } + + size_t s = taosArrayGetSize(src); + *dest = taosArrayInit(s, sizeof(SCond)); + + for (int32_t i = 0; i < s; ++i) { + STableFilterCond* pCond = taosArrayGet(src, i); + STableFilterCond c = {0}; + + if (tidx > 0) { + if (!(pCond->uid == uid && pCond->idx == tidx)) { + continue; + } + + c.idx = 0; + } else { + c.idx = pCond->idx; + } + + c.len = pCond->len; + c.uid = pCond->uid; + + if (pCond->len > 0) { + assert(pCond->cond != NULL); + c.cond = malloc(c.len); + if (c.cond == NULL) { + return -1; + } + + memcpy(c.cond, pCond->cond, c.len); + } + + taosArrayPush(*dest, &c); + } + + return 0; +} + +void cleanupColumnCond(SArray** pCond) { + if (*pCond == NULL) { + return; + } + + size_t s = taosArrayGetSize(*pCond); + for (int32_t i = 0; i < s; ++i) { + STableFilterCond* p = taosArrayGet(*pCond, i); + tfree(p->cond); + } + + taosArrayDestroy(*pCond); + + *pCond = NULL; +} + +void cleanupTagCond(STagCond* pTagCond) { + free(pTagCond->tbnameCond.cond); + + if (pTagCond->pCond != NULL) { + size_t s = taosArrayGetSize(pTagCond->pCond); + for (int32_t i = 0; i < s; ++i) { + SCond* p = taosArrayGet(pTagCond->pCond, i); + tfree(p->cond); + } + + taosArrayDestroy(pTagCond->pCond); + } + + for (int32_t i = 0; i < TSDB_MAX_JOIN_TABLE_NUM; ++i) { + SJoinNode *node = pTagCond->joinInfo.joinTables[i]; + if (node == NULL) { + continue; + } + + if (node->tsJoin != NULL) { + taosArrayDestroy(node->tsJoin); + } + + if (node->tagJoin != NULL) { + taosArrayDestroy(node->tagJoin); + } + + tfree(node); + } + + memset(pTagCond, 0, sizeof(STagCond)); +} + +//void tscGetSrcColumnInfo(SSrcColumnInfo* pColInfo, SQueryStmtInfo* pQueryInfo) { +// STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); +// SSchema* pSchema = getTableColumnSchema(pTableMetaInfo->pTableMeta); +// +// size_t numOfExprs = getNumOfExprs(pQueryInfo); +// for (int32_t i = 0; i < numOfExprs; ++i) { +// SExprInfo* pExpr = getExprInfo(pQueryInfo, i); +// pColInfo[i].functionId = pExpr->base.functionId; +// +// if (TSDB_COL_IS_TAG(pExpr->base.colInfo.flag)) { +// SSchema* pTagSchema = tscGetTableTagSchema(pTableMetaInfo->pTableMeta); +// +// int16_t index = pExpr->base.colInfo.colIndex; +// pColInfo[i].type = (index != -1) ? pTagSchema[index].type : TSDB_DATA_TYPE_BINARY; +// } else { +// pColInfo[i].type = pSchema[pExpr->base.colInfo.colIndex].type; +// } +// } +//} + +/** + * + * @param clauseIndex denote the index of the union sub clause, usually are 0, if no union query exists. + * @param tableIndex denote the table index for join query, where more than one table exists + * @return + */ +STableMetaInfo* getMetaInfo(SQueryStmtInfo* pQueryInfo, int32_t tableIndex) { + assert(pQueryInfo != NULL); + if (pQueryInfo->pTableMetaInfo == NULL) { + assert(pQueryInfo->numOfTables == 0); + return NULL; + } + + assert(tableIndex >= 0 && tableIndex <= pQueryInfo->numOfTables && pQueryInfo->pTableMetaInfo != NULL); + return pQueryInfo->pTableMetaInfo[tableIndex]; +} + +STableMetaInfo* getTableMetaInfoByUid(SQueryStmtInfo* pQueryInfo, uint64_t uid, int32_t* index) { + int32_t k = -1; + + for (int32_t i = 0; i < pQueryInfo->numOfTables; ++i) { + if (pQueryInfo->pTableMetaInfo[i]->pTableMeta->uid == uid) { + k = i; + break; + } + } + + if (index != NULL) { + *index = k; + } + + assert(k != -1); + return getMetaInfo(pQueryInfo, k); +} + +int32_t queryInfoCopy(SQueryStmtInfo* pQueryInfo, const SQueryStmtInfo* pSrc) { + assert(pQueryInfo != NULL && pSrc != NULL); + int32_t code = TSDB_CODE_SUCCESS; + + memcpy(&pQueryInfo->interval, &pSrc->interval, sizeof(pQueryInfo->interval)); + + pQueryInfo->command = pSrc->command; + pQueryInfo->type = pSrc->type; + pQueryInfo->window = pSrc->window; + pQueryInfo->limit = pSrc->limit; + pQueryInfo->slimit = pSrc->slimit; + pQueryInfo->order = pSrc->order; + pQueryInfo->vgroupLimit = pSrc->vgroupLimit; + pQueryInfo->tsBuf = NULL; + pQueryInfo->fillType = pSrc->fillType; + pQueryInfo->fillVal = NULL; + pQueryInfo->numOfFillVal = 0;; + pQueryInfo->clauseLimit = pSrc->clauseLimit; + pQueryInfo->prjOffset = pSrc->prjOffset; + pQueryInfo->numOfTables = 0; + pQueryInfo->window = pSrc->window; + pQueryInfo->sessionWindow = pSrc->sessionWindow; + pQueryInfo->pTableMetaInfo = NULL; + + pQueryInfo->bufLen = pSrc->bufLen; +// pQueryInfo->orderProjectQuery = pSrc->orderProjectQuery; +// pQueryInfo->arithmeticOnAgg = pSrc->arithmeticOnAgg; + pQueryInfo->buf = malloc(pSrc->bufLen); + if (pQueryInfo->buf == NULL) { + code = TSDB_CODE_TSC_OUT_OF_MEMORY; + goto _error; + } + + if (pSrc->bufLen > 0) { + memcpy(pQueryInfo->buf, pSrc->buf, pSrc->bufLen); + } + + pQueryInfo->groupbyExpr = pSrc->groupbyExpr; + if (pSrc->groupbyExpr.columnInfo != NULL) { + pQueryInfo->groupbyExpr.columnInfo = taosArrayDup(pSrc->groupbyExpr.columnInfo); + if (pQueryInfo->groupbyExpr.columnInfo == NULL) { + code = TSDB_CODE_TSC_OUT_OF_MEMORY; + goto _error; + } + } + + if (tscTagCondCopy(&pQueryInfo->tagCond, &pSrc->tagCond) != 0) { + code = TSDB_CODE_TSC_OUT_OF_MEMORY; + goto _error; + } + + if (tscColCondCopy(&pQueryInfo->colCond, pSrc->colCond, 0, -1) != 0) { + code = TSDB_CODE_TSC_OUT_OF_MEMORY; + goto _error; + } + + if (pSrc->fillType != TSDB_FILL_NONE) { + pQueryInfo->fillVal = calloc(1, pSrc->fieldsInfo.numOfOutput * sizeof(int64_t)); + if (pQueryInfo->fillVal == NULL) { + code = TSDB_CODE_TSC_OUT_OF_MEMORY; + goto _error; + } + pQueryInfo->numOfFillVal = pSrc->fieldsInfo.numOfOutput; + + memcpy(pQueryInfo->fillVal, pSrc->fillVal, pSrc->fieldsInfo.numOfOutput * sizeof(int64_t)); + } + + if (copyAllExprInfo(pQueryInfo->exprList, pSrc->exprList, true) != 0) { + code = TSDB_CODE_TSC_OUT_OF_MEMORY; + goto _error; + } + +// if (pQueryInfo->arithmeticOnAgg) { +// pQueryInfo->exprList1 = taosArrayInit(4, POINTER_BYTES); +// if (copyAllExprInfo(pQueryInfo->exprList1, pSrc->exprList1, true) != 0) { +// code = TSDB_CODE_TSC_OUT_OF_MEMORY; +// goto _error; +// } +// } + + columnListCopyAll(pQueryInfo->colList, pSrc->colList); + copyFieldInfo(&pQueryInfo->fieldsInfo, &pSrc->fieldsInfo, pQueryInfo->exprList); + + for(int32_t i = 0; i < pSrc->numOfTables; ++i) { + STableMetaInfo* p1 = getMetaInfo((SQueryStmtInfo*) pSrc, i); + + STableMeta* pMeta = tableMetaDup(p1->pTableMeta); + if (pMeta == NULL) { + // todo handle the error + } + + addTableMetaInfo(pQueryInfo, &p1->name, pMeta, p1->vgroupList, p1->tagColList, NULL); + } + + SArray *pUdfInfo = NULL; + if (pSrc->pUdfInfo) { + pUdfInfo = taosArrayDup(pSrc->pUdfInfo); + } + + pQueryInfo->pUdfInfo = pUdfInfo; + + _error: + return code; +} + +void clearAllTableMetaInfo(SQueryStmtInfo* pQueryInfo, bool removeMeta, uint64_t id) { + for(int32_t i = 0; i < pQueryInfo->numOfTables; ++i) { + STableMetaInfo* pTableMetaInfo = getMetaInfo(pQueryInfo, i); + clearTableMetaInfo(pTableMetaInfo); + } + + tfree(pQueryInfo->pTableMetaInfo); +} + +STableMetaInfo* addTableMetaInfo(SQueryStmtInfo* pQueryInfo, SName* name, STableMeta* pTableMeta, + SVgroupsInfo* vgroupList, SArray* pTagCols, SArray* pVgroupTables) { + void* tmp = realloc(pQueryInfo->pTableMetaInfo, (pQueryInfo->numOfTables + 1) * POINTER_BYTES); + if (tmp == NULL) { + terrno = TSDB_CODE_TSC_OUT_OF_MEMORY; + return NULL; + } + + pQueryInfo->pTableMetaInfo = tmp; + STableMetaInfo* pTableMetaInfo = calloc(1, sizeof(STableMetaInfo)); + + if (pTableMetaInfo == NULL) { + terrno = TSDB_CODE_TSC_OUT_OF_MEMORY; + return NULL; + } + + pQueryInfo->pTableMetaInfo[pQueryInfo->numOfTables] = pTableMetaInfo; + + if (name != NULL) { + tNameAssign(&pTableMetaInfo->name, name); + } + + pTableMetaInfo->pTableMeta = pTableMeta; + + if (vgroupList != NULL) { +// pTableMetaInfo->vgroupList = vgroupInfoClone(vgroupList); + } + + // TODO handle malloc failure + pTableMetaInfo->tagColList = taosArrayInit(4, POINTER_BYTES); + if (pTableMetaInfo->tagColList == NULL) { + return NULL; + } + + if (pTagCols != NULL && pTableMetaInfo->pTableMeta != NULL) { + columnListCopy(pTableMetaInfo->tagColList, pTagCols, pTableMetaInfo->pTableMeta->uid); + } + + pQueryInfo->numOfTables += 1; + return pTableMetaInfo; +} + +STableMetaInfo* addEmptyMetaInfo(SQueryStmtInfo* pQueryInfo) { + return addTableMetaInfo(pQueryInfo, NULL, NULL, NULL, NULL, NULL); +} + +SInternalField* getInternalFieldInfo(SFieldInfo* pFieldInfo, int32_t index) { + assert(index < pFieldInfo->numOfOutput); + return TARRAY_GET_ELEM(pFieldInfo->internalField, index); +} + +int32_t getNumOfInternalField(SFieldInfo* pFieldInfo) { + return (int32_t) taosArrayGetSize(pFieldInfo->internalField); +} + +static void doSetSqlExprAndResultFieldInfo(SQueryStmtInfo* pNewQueryInfo, int64_t uid) { + int32_t numOfOutput = (int32_t)getNumOfExprs(pNewQueryInfo); + if (numOfOutput == 0) { + return; + } + + // set the field info in pNewQueryInfo object according to sqlExpr information + for (int32_t i = 0; i < numOfOutput; ++i) { + SExprInfo* pExpr = getExprInfo(pNewQueryInfo, i); + + TAOS_FIELD f = createField(&pExpr->base.resSchema); + SInternalField* pInfo1 = appendFieldInfo(&pNewQueryInfo->fieldsInfo, &f); + pInfo1->pExpr = pExpr; + } + + // update the pSqlExpr pointer in SInternalField according the field name + // make sure the pSqlExpr point to the correct SqlExpr in pNewQueryInfo, not SqlExpr in pQueryInfo + for (int32_t f = 0; f < pNewQueryInfo->fieldsInfo.numOfOutput; ++f) { + TAOS_FIELD* field = getFieldInfo(&pNewQueryInfo->fieldsInfo, f); + + bool matched = false; + for (int32_t k1 = 0; k1 < numOfOutput; ++k1) { + SExprInfo* pExpr1 = getExprInfo(pNewQueryInfo, k1); + + if (strcmp(field->name, pExpr1->base.resSchema.name) == 0) { // establish link according to the result field name + SInternalField* pInfo = getInternalFieldInfo(&pNewQueryInfo->fieldsInfo, f); + pInfo->pExpr = pExpr1; + + matched = true; + break; + } + } + + assert(matched); + (void)matched; + } + +// updateFieldInfoOffset(pNewQueryInfo); +} + +int16_t getJoinTagColIdByUid(STagCond* pTagCond, uint64_t uid) { + int32_t i = 0; + while (i < TSDB_MAX_JOIN_TABLE_NUM) { + SJoinNode* node = pTagCond->joinInfo.joinTables[i]; + if (node && node->uid == uid) { + return node->tagColId; + } + + i++; + } + + assert(0); + return -1; +} + +int16_t getTagColIndexById(STableMeta* pTableMeta, int16_t colId) { + int32_t numOfTags = getNumOfTags(pTableMeta); + + SSchema* pSchema = getTableTagSchema(pTableMeta); + for(int32_t i = 0; i < numOfTags; ++i) { + if (pSchema[i].colId == colId) { + return i; + } + } + + // can not reach here + assert(0); + return INT16_MIN; +} + +bool isQueryWithLimit(SQueryStmtInfo* pQueryInfo) { + while(pQueryInfo != NULL) { + if (pQueryInfo->limit.limit > 0) { + return true; + } + + pQueryInfo = pQueryInfo->sibling; + } + + return false; +} + +SVgroupsInfo* vgroupInfoClone(SVgroupsInfo *vgroupList) { + if (vgroupList == NULL) { + return NULL; + } + + size_t size = sizeof(SVgroupsInfo) + sizeof(SVgroupMsg) * vgroupList->numOfVgroups; + SVgroupsInfo* pNew = malloc(size); + if (pNew == NULL) { + return NULL; + } + + pNew->numOfVgroups = vgroupList->numOfVgroups; + + for(int32_t i = 0; i < vgroupList->numOfVgroups; ++i) { + SVgroupMsg* pNewVInfo = &pNew->vgroups[i]; + + SVgroupMsg* pvInfo = &vgroupList->vgroups[i]; + pNewVInfo->vgId = pvInfo->vgId; + pNewVInfo->numOfEps = pvInfo->numOfEps; + + for(int32_t j = 0; j < pvInfo->numOfEps; ++j) { + pNewVInfo->epAddr[j].port = pvInfo->epAddr[j].port; + tstrncpy(pNewVInfo->epAddr[j].fqdn, pvInfo->epAddr[j].fqdn, TSDB_FQDN_LEN); + } + } + + return pNew; +} + +void* vgroupInfoClear(SVgroupsInfo *vgroupList) { + if (vgroupList == NULL) { + return NULL; + } + + tfree(vgroupList); + return NULL; +} + +char* serializeTagData(STagData* pTagData, char* pMsg) { + int32_t n = (int32_t) strlen(pTagData->name); + *(int32_t*) pMsg = htonl(n); + pMsg += sizeof(n); + + memcpy(pMsg, pTagData->name, n); + pMsg += n; + + *(int32_t*)pMsg = htonl(pTagData->dataLen); + pMsg += sizeof(int32_t); + + memcpy(pMsg, pTagData->data, pTagData->dataLen); + pMsg += pTagData->dataLen; + + return pMsg; +} + +int32_t copyTagData(STagData* dst, const STagData* src) { + dst->dataLen = src->dataLen; + tstrncpy(dst->name, src->name, tListLen(dst->name)); + + if (dst->dataLen > 0) { + dst->data = malloc(dst->dataLen); + if (dst->data == NULL) { + return -1; + } + + memcpy(dst->data, src->data, dst->dataLen); + } + + return 0; +} + +STableMeta* createSuperTableMeta(STableMetaMsg* pChild) { + assert(pChild != NULL); + int32_t total = pChild->numOfColumns + pChild->numOfTags; + + STableMeta* pTableMeta = calloc(1, sizeof(STableMeta) + sizeof(SSchema) * total); + pTableMeta->tableType = TSDB_SUPER_TABLE; + pTableMeta->tableInfo.numOfTags = pChild->numOfTags; + pTableMeta->tableInfo.numOfColumns = pChild->numOfColumns; + pTableMeta->tableInfo.precision = pChild->precision; + + pTableMeta->uid = pChild->suid; + pTableMeta->tversion = pChild->tversion; + pTableMeta->sversion = pChild->sversion; + + memcpy(pTableMeta->schema, pChild->schema, sizeof(SSchema) * total); + + int32_t num = pTableMeta->tableInfo.numOfColumns; + for(int32_t i = 0; i < num; ++i) { + pTableMeta->tableInfo.rowSize += pTableMeta->schema[i].bytes; + } + + return pTableMeta; +} + +uint32_t getTableMetaSize(STableMeta* pTableMeta) { + assert(pTableMeta != NULL); + + int32_t totalCols = 0; + if (pTableMeta->tableInfo.numOfColumns >= 0) { + totalCols = pTableMeta->tableInfo.numOfColumns + pTableMeta->tableInfo.numOfTags; + } + + return sizeof(STableMeta) + totalCols * sizeof(SSchema); +} + +uint32_t getTableMetaMaxSize() { + return sizeof(STableMeta) + TSDB_MAX_COLUMNS * sizeof(SSchema); +} + +STableMeta* tableMetaDup(STableMeta* pTableMeta) { + assert(pTableMeta != NULL); + size_t size = getTableMetaSize(pTableMeta); + + STableMeta* p = malloc(size); + memcpy(p, pTableMeta, size); + return p; +} + +SVgroupsInfo* vgroupsInfoDup(SVgroupsInfo* pVgroupsInfo) { + assert(pVgroupsInfo != NULL); + + size_t size = sizeof(SVgroupMsg) * pVgroupsInfo->numOfVgroups + sizeof(SVgroupsInfo); + SVgroupsInfo* pInfo = calloc(1, size); + pInfo->numOfVgroups = pVgroupsInfo->numOfVgroups; + for (int32_t m = 0; m < pVgroupsInfo->numOfVgroups; ++m) { + memcpy(&pInfo->vgroups[m], &pVgroupsInfo->vgroups[m], sizeof(SVgroupMsg)); + } + + return pInfo; +} + +int32_t getNumOfOutput(SFieldInfo* pFieldInfo) { + return pFieldInfo->numOfOutput; +} + +// todo move to planner module +int32_t createProjectionExpr(SQueryStmtInfo* pQueryInfo, STableMetaInfo* pTableMetaInfo, SExprInfo*** pExpr, int32_t* num) { +// if (!pQueryInfo->arithmeticOnAgg) { +// return TSDB_CODE_SUCCESS; +// } +#if 0 + *num = getNumOfOutput(pQueryInfo); + *pExpr = calloc(*(num), POINTER_BYTES); + if ((*pExpr) == NULL) { + return TSDB_CODE_TSC_OUT_OF_MEMORY; + } + + for (int32_t i = 0; i < (*num); ++i) { + SInternalField* pField = getInternalFieldInfo(&pQueryInfo->fieldsInfo, i); + SExprInfo* pSource = pField->pExpr; + + SExprInfo* px = calloc(1, sizeof(SExprInfo)); + (*pExpr)[i] = px; + + SSqlExpr *pse = &px->base; + pse->uid = pTableMetaInfo->pTableMeta->uid; + memcpy(&pse->resSchema, &pSource->base.resSchema, sizeof(SSchema)); + + if (pSource->base.functionId != FUNCTION_ARITHM) { // this should be switched to projection query + pse->numOfParams = 0; // no params for projection query + pse->functionId = FUNCTION_PRJ; + pse->colInfo.colId = pSource->base.resSchema.colId; + + int32_t numOfOutput = (int32_t) taosArrayGetSize(pQueryInfo->exprList); + for (int32_t j = 0; j < numOfOutput; ++j) { + SExprInfo* p = taosArrayGetP(pQueryInfo->exprList, j); + if (p->base.resSchema.colId == pse->colInfo.colId) { + pse->colInfo.colIndex = j; + break; + } + } + + pse->colInfo.flag = TSDB_COL_NORMAL; + strncpy(pse->colInfo.name, pSource->base.resSchema.name, tListLen(pse->colInfo.name)); + + // TODO restore refactor + int32_t functionId = pSource->base.functionId; + if (pSource->base.functionId == FUNCTION_FIRST_DST) { + functionId = FUNCTION_FIRST; + } else if (pSource->base.functionId == FUNCTION_LAST_DST) { + functionId = FUNCTION_LAST; + } else if (pSource->base.functionId == FUNCTION_STDDEV_DST) { + functionId = FUNCTION_STDDEV; + } + + int32_t inter = 0; + getResultDataInfo(pSource->base.colType, pSource->base.colBytes, functionId, 0, &pse->resSchema.type, + &pse->resSchema.bytes, &inter, 0, false/*, NULL*/); + pse->colType = pse->resSchema.type; + pse->colBytes = pse->resSchema.bytes; + + } else { // arithmetic expression + pse->colInfo.colId = pSource->base.colInfo.colId; + pse->colType = pSource->base.colType; + pse->colBytes = pSource->base.colBytes; + pse->resSchema.bytes = sizeof(double); + pse->resSchema.type = TSDB_DATA_TYPE_DOUBLE; + + pse->functionId = pSource->base.functionId; + pse->numOfParams = pSource->base.numOfParams; + + for (int32_t j = 0; j < pSource->base.numOfParams; ++j) { + taosVariantAssign(&pse->param[j], &pSource->base.param[j]); +// buildArithmeticExprFromMsg(px, NULL); + } + } + } +#endif + return TSDB_CODE_SUCCESS; +} + +int32_t getColFilterSerializeLen(SQueryStmtInfo* pQueryInfo) { + int16_t numOfCols = (int16_t)taosArrayGetSize(pQueryInfo->colList); + int32_t len = 0; + + for(int32_t i = 0; i < numOfCols; ++i) { + SColumn* pCol = taosArrayGetP(pQueryInfo->colList, i); + for (int32_t j = 0; j < pCol->info.flist.numOfFilters; ++j) { + len += sizeof(SColumnFilterInfo); + if (pCol->info.flist.filterInfo[j].filterstr) { + len += (int32_t)pCol->info.flist.filterInfo[j].len + 1 * TSDB_NCHAR_SIZE; + } + } + } + return len; +} + +int32_t getTagFilterSerializeLen(SQueryStmtInfo* pQueryInfo) { + // serialize tag column query condition + if (pQueryInfo->tagCond.pCond != NULL && taosArrayGetSize(pQueryInfo->tagCond.pCond) > 0) { + STagCond* pTagCond = &pQueryInfo->tagCond; + + STableMetaInfo *pTableMetaInfo = getMetaInfo(pQueryInfo, 0); + STableMeta * pTableMeta = pTableMetaInfo->pTableMeta; + SCond *pCond = getSTableQueryCond(pTagCond, pTableMeta->uid); + if (pCond != NULL && pCond->cond != NULL) { + return pCond->len; + } + } + return 0; +} + +uint32_t convertRelationalOperator(SToken *pToken) { + switch (pToken->type) { + case TK_LT: + return TSDB_RELATION_LESS; + case TK_LE: + return TSDB_RELATION_LESS_EQUAL; + case TK_GT: + return TSDB_RELATION_GREATER; + case TK_GE: + return TSDB_RELATION_GREATER_EQUAL; + case TK_NE: + return TSDB_RELATION_NOT_EQUAL; + case TK_AND: + return TSDB_RELATION_AND; + case TK_OR: + return TSDB_RELATION_OR; + case TK_EQ: + return TSDB_RELATION_EQUAL; + case TK_PLUS: + return TSDB_BINARY_OP_ADD; + + case TK_MINUS: + return TSDB_BINARY_OP_SUBTRACT; + case TK_STAR: + return TSDB_BINARY_OP_MULTIPLY; + case TK_SLASH: + case TK_DIVIDE: + return TSDB_BINARY_OP_DIVIDE; + case TK_REM: + return TSDB_BINARY_OP_REMAINDER; + case TK_LIKE: + return TSDB_RELATION_LIKE; + case TK_MATCH: + return TSDB_RELATION_MATCH; + case TK_NMATCH: + return TSDB_RELATION_NMATCH; + case TK_ISNULL: + return TSDB_RELATION_ISNULL; + case TK_NOTNULL: + return TSDB_RELATION_NOTNULL; + case TK_IN: + return TSDB_RELATION_IN; + default: { return 0; } + } +} + +#if 0 +int32_t tscCreateQueryFromQueryInfo(SQueryStmtInfo* pQueryInfo, SQueryAttr* pQueryAttr, void* addr) { + memset(pQueryAttr, 0, sizeof(SQueryAttr)); + + int16_t numOfCols = (int16_t) taosArrayGetSize(pQueryInfo->colList); + int16_t numOfOutput = (int16_t) getNumOfExprs(pQueryInfo); + + pQueryAttr->topBotQuery = tscIsTopBotQuery(pQueryInfo); + pQueryAttr->hasTagResults = hasTagValOutput(pQueryInfo); + pQueryAttr->stabledev = isStabledev(pQueryInfo); + pQueryAttr->tsCompQuery = isTsCompQuery(pQueryInfo); + pQueryAttr->diffQuery = tscIsDiffDerivQuery(pQueryInfo); + pQueryAttr->simpleAgg = isSimpleAggregateRv(pQueryInfo); + pQueryAttr->needReverseScan = tscNeedReverseScan(pQueryInfo); + pQueryAttr->stableQuery = QUERY_IS_STABLE_QUERY(pQueryInfo->type); + pQueryAttr->groupbyColumn = (!pQueryInfo->stateWindow) && tscGroupbyColumn(pQueryInfo); + pQueryAttr->queryBlockDist = isBlockDistQuery(pQueryInfo); + pQueryAttr->pointInterpQuery = tscIsPointInterpQuery(pQueryInfo); + pQueryAttr->timeWindowInterpo = timeWindowInterpoRequired(pQueryInfo); + pQueryAttr->distinct = pQueryInfo->distinct; + pQueryAttr->sw = pQueryInfo->sessionWindow; + pQueryAttr->stateWindow = pQueryInfo->stateWindow; + pQueryAttr->multigroupResult = pQueryInfo->multigroupResult; + + pQueryAttr->numOfCols = numOfCols; + pQueryAttr->numOfOutput = numOfOutput; + pQueryAttr->limit = pQueryInfo->limit; + pQueryAttr->slimit = pQueryInfo->slimit; + pQueryAttr->order = pQueryInfo->order; + pQueryAttr->fillType = pQueryInfo->fillType; + pQueryAttr->havingNum = pQueryInfo->havingFieldNum; + pQueryAttr->pUdfInfo = pQueryInfo->pUdfInfo; + + if (pQueryInfo->order.order == TSDB_ORDER_ASC) { // TODO refactor + pQueryAttr->window = pQueryInfo->window; + } else { + pQueryAttr->window.skey = pQueryInfo->window.ekey; + pQueryAttr->window.ekey = pQueryInfo->window.skey; + } + + memcpy(&pQueryAttr->interval, &pQueryInfo->interval, sizeof(pQueryAttr->interval)); + + STableMetaInfo* pTableMetaInfo = pQueryInfo->pTableMetaInfo[0]; + + if (pQueryInfo->groupbyExpr.numOfGroupCols > 0) { + pQueryAttr->pGroupbyExpr = calloc(1, sizeof(SGroupbyExpr)); + *(pQueryAttr->pGroupbyExpr) = pQueryInfo->groupbyExpr; + pQueryAttr->pGroupbyExpr->columnInfo = taosArrayDup(pQueryInfo->groupbyExpr.columnInfo); + } else { + assert(pQueryInfo->groupbyExpr.columnInfo == NULL); + } + + pQueryAttr->pExpr1 = calloc(pQueryAttr->numOfOutput, sizeof(SExprInfo)); + for(int32_t i = 0; i < pQueryAttr->numOfOutput; ++i) { + SExprInfo* pExpr = getExprInfo(pQueryInfo, i); + ExprInfoCopy(&pQueryAttr->pExpr1[i], pExpr); + + if (pQueryAttr->pExpr1[i].base.functionId == FUNCTION_ARITHM) { + for (int32_t j = 0; j < pQueryAttr->pExpr1[i].base.numOfParams; ++j) { + buildArithmeticExprFromMsg(&pQueryAttr->pExpr1[i], NULL); + } + } + } + + pQueryAttr->tableCols = calloc(numOfCols, sizeof(SColumnInfo)); + for(int32_t i = 0; i < numOfCols; ++i) { + SColumn* pCol = taosArrayGetP(pQueryInfo->colList, i); + if (!isValidDataType(pCol->info.type) || pCol->info.type == TSDB_DATA_TYPE_NULL) { + assert(0); + } + + pQueryAttr->tableCols[i] = pCol->info; + pQueryAttr->tableCols[i].flist.filterInfo = tFilterInfoDup(pCol->info.flist.filterInfo, pQueryAttr->tableCols[i].flist.numOfFilters); + } + + // global aggregate query + if (pQueryAttr->stableQuery && (pQueryAttr->simpleAgg || pQueryAttr->interval.interval > 0) && tscIsTwoStageSTableQuery(pQueryInfo, 0)) { + createGlobalAggregateExpr(pQueryAttr, pQueryInfo); + } + + // for simple table, not for super table + if (pQueryInfo->arithmeticOnAgg) { + pQueryAttr->numOfExpr2 = (int32_t) taosArrayGetSize(pQueryInfo->exprList1); + pQueryAttr->pExpr2 = calloc(pQueryAttr->numOfExpr2, sizeof(SExprInfo)); + for(int32_t i = 0; i < pQueryAttr->numOfExpr2; ++i) { + SExprInfo* p = taosArrayGetP(pQueryInfo->exprList1, i); + ExprInfoCopy(&pQueryAttr->pExpr2[i], p); + } + } + + // tag column info + int32_t code = createTagColumnInfo(pQueryAttr, pQueryInfo, pTableMetaInfo); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + if (pQueryAttr->fillType != TSDB_FILL_NONE) { + pQueryAttr->fillVal = calloc(pQueryAttr->numOfOutput, sizeof(int64_t)); + memcpy(pQueryAttr->fillVal, pQueryInfo->fillVal, pQueryInfo->numOfFillVal * sizeof(int64_t)); + } + + pQueryAttr->srcRowSize = 0; + pQueryAttr->maxTableColumnWidth = 0; + for (int16_t i = 0; i < numOfCols; ++i) { + pQueryAttr->srcRowSize += pQueryAttr->tableCols[i].bytes; + if (pQueryAttr->maxTableColumnWidth < pQueryAttr->tableCols[i].bytes) { + pQueryAttr->maxTableColumnWidth = pQueryAttr->tableCols[i].bytes; + } + } + + pQueryAttr->interBufSize = getOutputInterResultBufSize(pQueryAttr); + + if (pQueryAttr->numOfCols <= 0 && !tscQueryTags(pQueryInfo) && !pQueryAttr->queryBlockDist) { + tscError("%p illegal value of numOfCols in query msg: %" PRIu64 ", table cols:%d", addr, + (uint64_t)pQueryAttr->numOfCols, numOfCols); + + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + if (pQueryAttr->interval.interval < 0) { + tscError("%p illegal value of aggregation time interval in query msg: %" PRId64, addr, + (int64_t)pQueryInfo->interval.interval); + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + if (pQueryAttr->pGroupbyExpr != NULL && pQueryAttr->pGroupbyExpr->numOfGroupCols < 0) { + tscError("%p illegal value of numOfGroupCols in query msg: %d", addr, pQueryInfo->groupbyExpr.numOfGroupCols); + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + return TSDB_CODE_SUCCESS; +} + +static int32_t doAddTableName(char* nextStr, char** str, SArray* pNameArray, SSqlObj* pSql) { + int32_t code = TSDB_CODE_SUCCESS; + SSqlCmd* pCmd = &pSql->cmd; + + char tablename[TSDB_TABLE_FNAME_LEN] = {0}; + int32_t len = 0; + + if (nextStr == NULL) { + tstrncpy(tablename, *str, TSDB_TABLE_FNAME_LEN); + len = (int32_t) strlen(tablename); + } else { + len = (int32_t)(nextStr - (*str)); + if (len >= TSDB_TABLE_NAME_LEN) { + sprintf(pCmd->payload, "table name too long"); + return TSDB_CODE_TSC_INVALID_OPERATION; + } + + memcpy(tablename, *str, nextStr - (*str)); + tablename[len] = '\0'; + } + + (*str) = nextStr + 1; + len = (int32_t)strtrim(tablename); + + SToken sToken = {.n = len, .type = TK_ID, .z = tablename}; + tGetToken(tablename, &sToken.type); + + // Check if the table name available or not + if (tscValidateName(&sToken) != TSDB_CODE_SUCCESS) { + sprintf(pCmd->payload, "table name is invalid"); + return TSDB_CODE_TSC_INVALID_TABLE_ID_LENGTH; + } + + SName name = {0}; + if ((code = tscSetTableFullName(&name, &sToken, pSql)) != TSDB_CODE_SUCCESS) { + return code; + } + + memset(tablename, 0, tListLen(tablename)); + tNameExtractFullName(&name, tablename); + + char* p = strdup(tablename); + taosArrayPush(pNameArray, &p); + return TSDB_CODE_SUCCESS; +} + +int32_t nameComparFn(const void* n1, const void* n2) { + int32_t ret = strcmp(*(char**)n1, *(char**)n2); + if (ret == 0) { + return 0; + } else { + return ret > 0? 1:-1; + } +} + +static void freeContent(void* p) { + char* ptr = *(char**)p; + tfree(ptr); +} + + +int tscTransferTableNameList(SSqlObj *pSql, const char *pNameList, int32_t length, SArray* pNameArray) { + SSqlCmd *pCmd = &pSql->cmd; + + pCmd->command = TSDB_SQL_MULTI_META; + pCmd->msgType = TSDB_MSG_TYPE_CM_TABLES_META; + + int code = TSDB_CODE_TSC_INVALID_TABLE_ID_LENGTH; + char *str = (char *)pNameList; + + SQueryStmtInfo *pQueryInfo = tscGetQueryInfoS(pCmd); + if (pQueryInfo == NULL) { + pSql->res.code = terrno; + return terrno; + } + + char *nextStr; + while (1) { + nextStr = strchr(str, ','); + if (nextStr == NULL) { + code = doAddTableName(nextStr, &str, pNameArray, pSql); + break; + } + + code = doAddTableName(nextStr, &str, pNameArray, pSql); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + if (taosArrayGetSize(pNameArray) > TSDB_MULTI_TABLEMETA_MAX_NUM) { + code = TSDB_CODE_TSC_INVALID_TABLE_ID_LENGTH; + sprintf(pCmd->payload, "tables over the max number"); + return code; + } + } + + size_t len = taosArrayGetSize(pNameArray); + if (len == 1) { + return TSDB_CODE_SUCCESS; + } + + if (len > TSDB_MULTI_TABLEMETA_MAX_NUM) { + code = TSDB_CODE_TSC_INVALID_TABLE_ID_LENGTH; + sprintf(pCmd->payload, "tables over the max number"); + return code; + } + + taosArraySort(pNameArray, nameComparFn); + taosArrayRemoveDuplicate(pNameArray, nameComparFn, freeContent); + return TSDB_CODE_SUCCESS; +} + +bool vgroupInfoIdentical(SNewVgroupInfo *pExisted, SVgroupMsg* src) { + assert(pExisted != NULL && src != NULL); + if (pExisted->numOfEps != src->numOfEps) { + return false; + } + + for(int32_t i = 0; i < pExisted->numOfEps; ++i) { + if (pExisted->ep[i].port != src->epAddr[i].port) { + return false; + } + + if (strncmp(pExisted->ep[i].fqdn, src->epAddr[i].fqdn, tListLen(pExisted->ep[i].fqdn)) != 0) { + return false; + } + } + + return true; +} + +SNewVgroupInfo createNewVgroupInfo(SVgroupMsg *pVgroupMsg) { + assert(pVgroupMsg != NULL); + + SNewVgroupInfo info = {0}; + info.numOfEps = pVgroupMsg->numOfEps; + info.vgId = pVgroupMsg->vgId; + info.inUse = 0; // 0 is the default value of inUse in case of multiple replica + + assert(info.numOfEps >= 1 && info.vgId >= 1); + for(int32_t i = 0; i < pVgroupMsg->numOfEps; ++i) { + tstrncpy(info.ep[i].fqdn, pVgroupMsg->epAddr[i].fqdn, TSDB_FQDN_LEN); + info.ep[i].port = pVgroupMsg->epAddr[i].port; + } + + return info; +} + +char* cloneCurrentDBName(SSqlObj* pSql) { + char *p = NULL; + HttpContext *pCtx = NULL; + + pthread_mutex_lock(&pSql->pTscObj->mutex); + STscObj *pTscObj = pSql->pTscObj; + switch (pTscObj->from) { + case TAOS_REQ_FROM_HTTP: + pCtx = pSql->param; + if (pCtx && pCtx->db[0] != '\0') { + char db[TSDB_ACCT_ID_LEN + TSDB_DB_NAME_LEN] = {0}; + int32_t len = sprintf(db, "%s%s%s", pTscObj->acctId, TS_PATH_DELIMITER, pCtx->db); + assert(len <= sizeof(db)); + + p = strdup(db); + } + break; + default: + break; + } + if (p == NULL) { + p = strdup(pSql->pTscObj->db); + } + pthread_mutex_unlock(&pSql->pTscObj->mutex); + + return p; +} + +#endif \ No newline at end of file diff --git a/source/libs/parser/src/queryInfoUtil.c b/source/libs/parser/src/queryInfoUtil.c new file mode 100644 index 0000000000..4dd45fd54c --- /dev/null +++ b/source/libs/parser/src/queryInfoUtil.c @@ -0,0 +1,375 @@ +#include "queryInfoUtil.h" +#include "astGenerator.h" +#include "function.h" +#include "os.h" +#include "parser.h" +#include "parserInt.h" +#include "parserUtil.h" + +static struct SSchema _s = { + .colId = TSDB_TBNAME_COLUMN_INDEX, + .type = TSDB_DATA_TYPE_BINARY, + .bytes = TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE, + .name = "tbname", +}; + +SSchema* getTbnameColumnSchema() { + return &_s; +} + +size_t getNumOfExprs(SQueryStmtInfo* pQueryInfo) { + return taosArrayGetSize(pQueryInfo->exprList); +} + +SSchema* getOneColumnSchema(const STableMeta* pTableMeta, int32_t colIndex) { + assert(pTableMeta != NULL && pTableMeta->schema != NULL && colIndex >= 0 && colIndex < getNumOfColumns(pTableMeta)); + + SSchema* pSchema = (SSchema*) pTableMeta->schema; + return &pSchema[colIndex]; +} + +STableComInfo getTableInfo(const STableMeta* pTableMeta) { + assert(pTableMeta != NULL); + return pTableMeta->tableInfo; +} + +int32_t getNumOfColumns(const STableMeta* pTableMeta) { + assert(pTableMeta != NULL); + // table created according to super table, use data from super table + return getTableInfo(pTableMeta).numOfColumns; +} + +int32_t getNumOfTags(const STableMeta* pTableMeta) { + assert(pTableMeta != NULL); + return getTableInfo(pTableMeta).numOfTags; +} + +SSchema *getTableColumnSchema(const STableMeta *pTableMeta) { + assert(pTableMeta != NULL); + return (SSchema*) pTableMeta->schema; +} + +SSchema* getTableTagSchema(const STableMeta* pTableMeta) { + assert(pTableMeta != NULL && (pTableMeta->tableType == TSDB_SUPER_TABLE || pTableMeta->tableType == TSDB_CHILD_TABLE)); + return getOneColumnSchema(pTableMeta, getTableInfo(pTableMeta).numOfColumns); +} + +static tExprNode* createUnaryFunctionExprNode(int32_t functionId, SSchema* pSchema, tExprNode* pColumnNode) { + + if (pColumnNode == NULL) { + pColumnNode = calloc(1, sizeof(tExprNode)); + pColumnNode->nodeType = TEXPR_COL_NODE; + pColumnNode->pSchema = calloc(1, sizeof(SSchema)); + memcpy(pColumnNode->pSchema, pSchema, sizeof(SSchema)); + } else { + assert(pSchema == NULL); + } + + tExprNode* pNode = calloc(1, sizeof(tExprNode)); + pNode->nodeType = TEXPR_UNARYEXPR_NODE; + pNode->_node.functionId = functionId; + pNode->_node.pLeft = pColumnNode; + + return pNode; +} + +SExprInfo* createBinaryExprInfo(tExprNode* pNode, SSchema* pResSchema) { + assert(pNode != NULL && pResSchema != NULL); + + SExprInfo* pExpr = calloc(1, sizeof(SExprInfo)); + if (pExpr == NULL) { + return NULL; + } + + pExpr->pExpr = pNode; + memcpy(&pExpr->base.resSchema, pResSchema, sizeof(SSchema)); + return pExpr; +} + +SExprInfo* createExprInfo(STableMetaInfo* pTableMetaInfo, int16_t functionId, SColumnIndex* pColIndex, tExprNode* pParamExpr, SSchema* pResSchema, int16_t interSize) { + SExprInfo* pExpr = calloc(1, sizeof(SExprInfo)); + if (pExpr == NULL) { + return NULL; + } + + SSqlExpr* p = &pExpr->base; + + if (pParamExpr != NULL) { + pExpr->pExpr = createUnaryFunctionExprNode(functionId, NULL, pParamExpr); + } else if (pColIndex->columnIndex == TSDB_TBNAME_COLUMN_INDEX) { + assert(pParamExpr == NULL); + + SSchema* s = getTbnameColumnSchema(); + p->colInfo.colId = TSDB_TBNAME_COLUMN_INDEX; + pExpr->pExpr = createUnaryFunctionExprNode(functionId, s, pParamExpr); + } else if (pColIndex->columnIndex <= TSDB_UD_COLUMN_INDEX || functionId == FUNCTION_BLKINFO) { + assert(pParamExpr == NULL); + + p->colInfo.colId = pColIndex->columnIndex; + SSchema s = createSchema(pResSchema->type, pResSchema->bytes, pColIndex->columnIndex, pResSchema->name); + pExpr->pExpr = createUnaryFunctionExprNode(functionId, &s, pParamExpr); + } else { + int32_t len = tListLen(p->colInfo.name); + if (TSDB_COL_IS_TAG(pColIndex->type)) { + SSchema* pSchema = getTableTagSchema(pTableMetaInfo->pTableMeta); + p->colInfo.colId = pSchema[pColIndex->columnIndex].colId; + pExpr->pExpr = createUnaryFunctionExprNode(functionId, &pSchema[pColIndex->columnIndex], pParamExpr); + snprintf(p->colInfo.name, len, "%s.%s", pTableMetaInfo->aliasName, pSchema[pColIndex->columnIndex].name); + } else if (pTableMetaInfo->pTableMeta != NULL) { + // in handling select database/version/server_status(), the pTableMeta is NULL + SSchema* pSchema = getOneColumnSchema(pTableMetaInfo->pTableMeta, pColIndex->columnIndex); + p->colInfo.colId = pSchema->colId; + snprintf(p->colInfo.name, len, "%s.%s", pTableMetaInfo->aliasName, pSchema->name); + + pExpr->pExpr = createUnaryFunctionExprNode(functionId, pSchema, pParamExpr); + } + } + + p->colInfo.flag = pColIndex->type; + p->colInfo.colIndex = pColIndex->columnIndex; + p->interBytes = interSize; + memcpy(&p->resSchema, pResSchema, sizeof(SSchema)); + + if (pTableMetaInfo->pTableMeta) { + p->uid = pTableMetaInfo->pTableMeta->uid; + } + + return pExpr; +} + +void addExprInfo(SQueryStmtInfo* pQueryInfo, int32_t index, SExprInfo* pExprInfo) { + assert(pQueryInfo != NULL && pQueryInfo->exprList != NULL); + + int32_t num = (int32_t) taosArrayGetSize(pQueryInfo->exprList); + if (index == num) { + taosArrayPush(pQueryInfo->exprList, &pExprInfo); + } else { + taosArrayInsert(pQueryInfo->exprList, index, &pExprInfo); + } +} + +void updateExprInfo(SExprInfo* pExprInfo, int16_t functionId, int32_t colId, int16_t srcColumnIndex, int16_t resType, int16_t resSize) { + assert(pExprInfo != NULL); + + SSqlExpr* pse = &pExprInfo->base; + pExprInfo->pExpr->_node.functionId = functionId; + + pse->colInfo.colIndex = srcColumnIndex; + pse->colInfo.colId = colId; + pse->resSchema.type = resType; + pse->resSchema.bytes = resSize; +} + +SExprInfo* getExprInfo(SQueryStmtInfo* pQueryInfo, int32_t index) { + assert(pQueryInfo != NULL && pQueryInfo->exprList && index >= 0); + return taosArrayGetP(pQueryInfo->exprList, index); +} + +void destroyExprInfo(SExprInfo* pExprInfo) { + tExprTreeDestroy(pExprInfo->pExpr, NULL); + tfree(pExprInfo); +} + +void dropAllExprInfo(SArray* pExprInfo) { + size_t size = taosArrayGetSize(pExprInfo); + + for(int32_t i = 0; i < size; ++i) { + SExprInfo* pExpr = taosArrayGetP(pExprInfo, i); + destroyExprInfo(pExpr); + } + + taosArrayDestroy(pExprInfo); +} + +void addExprInfoParam(SSqlExpr* pExpr, char* argument, int32_t type, int32_t bytes) { + assert (pExpr != NULL || argument != NULL || bytes != 0); + + // set parameter value + // transfer to tVariant from byte data/no ascii data + taosVariantCreateFromBinary(&pExpr->param[pExpr->numOfParams], argument, bytes, type); + pExpr->numOfParams += 1; + + assert(pExpr->numOfParams <= 3); +} + +void assignExprInfo(SExprInfo* dst, const SExprInfo* src) { + assert(dst != NULL && src != NULL); + + *dst = *src; +#if 0 + if (src->base.flist.numOfFilters > 0) { + dst->base.flist.filterInfo = calloc(src->base.flist.numOfFilters, sizeof(SColumnFilterInfo)); + memcpy(dst->base.flist.filterInfo, src->base.flist.filterInfo, sizeof(SColumnFilterInfo) * src->base.flist.numOfFilters); + } +#endif + +// dst->pExpr = exprdup(src->pExpr); + memset(dst->base.param, 0, sizeof(SVariant) * tListLen(dst->base.param)); + for (int32_t j = 0; j < src->base.numOfParams; ++j) { + taosVariantAssign(&dst->base.param[j], &src->base.param[j]); + } +} + +int32_t copyExprInfoList(SArray* dst, const SArray* src, uint64_t uid, bool deepcopy) { + assert(src != NULL && dst != NULL); + + size_t size = taosArrayGetSize(src); + for (int32_t i = 0; i < size; ++i) { + SExprInfo* pExpr = taosArrayGetP(src, i); + + if (pExpr->base.uid == uid) { + if (deepcopy) { + SExprInfo* p1 = calloc(1, sizeof(SExprInfo)); + assignExprInfo(p1, pExpr); + + taosArrayPush(dst, &p1); + } else { + taosArrayPush(dst, &pExpr); + } + } + } + + return 0; +} + +int32_t copyAllExprInfo(SArray* dst, const SArray* src, bool deepcopy) { + assert(src != NULL && dst != NULL); + + size_t size = taosArrayGetSize(src); + for (int32_t i = 0; i < size; ++i) { + SExprInfo* pExpr = taosArrayGetP(src, i); + + SExprInfo* p1 = calloc(1, sizeof(SExprInfo)); + assignExprInfo(p1, pExpr); + taosArrayPush(dst, &p1); + } + + return 0; +} + +//void* tSqlExprDestroy(SExprInfo* pExpr) { +// if (pExpr == NULL) { +// return NULL; +// } +// +// SSqlExpr* p = &pExpr->base; +// for(int32_t i = 0; i < tListLen(p->param); ++i) { +// taosVariantDestroy(&p->param[i]); +// } +// +// if (p->flist.numOfFilters > 0) { +// tfree(p->flist.filterInfo); +// } +// +// if (pExpr->pExpr != NULL) { +// tExprTreeDestroy(pExpr->pExpr, NULL); +// } +// +// tfree(pExpr); +// return NULL; +//} + +int32_t getResRowLength(SArray* pExprList) { + size_t num = taosArrayGetSize(pExprList); + if (num == 0) { + return 0; + } + + int32_t size = 0; + for(int32_t i = 0; i < num; ++i) { + SExprInfo* pExpr = taosArrayGetP(pExprList, i); + size += pExpr->base.resSchema.bytes; + } + + return size; +} + +static void freeQueryInfoImpl(SQueryStmtInfo* pQueryInfo) { + cleanupTagCond(&pQueryInfo->tagCond); + cleanupColumnCond(&pQueryInfo->colCond); + cleanupFieldInfo(&pQueryInfo->fieldsInfo); + + dropAllExprInfo(pQueryInfo->exprList); + pQueryInfo->exprList = NULL; + + if (pQueryInfo->exprList1 != NULL) { + dropAllExprInfo(pQueryInfo->exprList1); + pQueryInfo->exprList1 = NULL; + } + + columnListDestroy(pQueryInfo->colList); + pQueryInfo->colList = NULL; + + if (pQueryInfo->groupbyExpr.columnInfo != NULL) { + taosArrayDestroy(pQueryInfo->groupbyExpr.columnInfo); + pQueryInfo->groupbyExpr.columnInfo = NULL; + } + + pQueryInfo->fillType = 0; + + tfree(pQueryInfo->fillVal); + tfree(pQueryInfo->buf); + + taosArrayDestroy(pQueryInfo->pUpstream); + pQueryInfo->pUpstream = NULL; + pQueryInfo->bufLen = 0; +} + +void freeQueryInfo(SQueryStmtInfo* pQueryInfo, bool removeCachedMeta, uint64_t id) { + while(pQueryInfo != NULL) { + SQueryStmtInfo* p = pQueryInfo->sibling; + + size_t numOfUpstream = taosArrayGetSize(pQueryInfo->pUpstream); + for(int32_t i = 0; i < numOfUpstream; ++i) { + SQueryStmtInfo* pUpQueryInfo = taosArrayGetP(pQueryInfo->pUpstream, i); + freeQueryInfoImpl(pUpQueryInfo); + clearAllTableMetaInfo(pUpQueryInfo, removeCachedMeta, id); + tfree(pUpQueryInfo); + } + + freeQueryInfoImpl(pQueryInfo); + clearAllTableMetaInfo(pQueryInfo, removeCachedMeta, id); + + tfree(pQueryInfo); + pQueryInfo = p; + } +} + +SArray* extractFunctionIdList(SArray* pExprInfoList) { + assert(pExprInfoList != NULL); + + size_t len = taosArrayGetSize(pExprInfoList); + SArray* p = taosArrayInit(len, sizeof(int16_t)); + for(int32_t i = 0; i < len; ++i) { + SExprInfo* pExprInfo = taosArrayGetP(pExprInfoList, i); + taosArrayPush(p, &pExprInfo->pExpr->_node.functionId); + } + + return p; +} + +bool tscHasColumnFilter(SQueryStmtInfo* pQueryInfo) { + // filter on primary timestamp column + if (pQueryInfo->window.skey != INT64_MIN || pQueryInfo->window.ekey != INT64_MAX) { + return true; + } + + size_t size = taosArrayGetSize(pQueryInfo->colList); + for (int32_t i = 0; i < size; ++i) { + SColumn* pCol = taosArrayGetP(pQueryInfo->colList, i); + if (pCol->info.flist.numOfFilters > 0) { + return true; + } + } + + return false; +} + +//void tscClearInterpInfo(SQueryStmtInfo* pQueryInfo) { +// if (!tscIsPointInterpQuery(pQueryInfo)) { +// return; +// } +// +// pQueryInfo->fillType = TSDB_FILL_NONE; +// tfree(pQueryInfo->fillVal); +//} \ No newline at end of file diff --git a/source/libs/parser/src/sql.c b/source/libs/parser/src/sql.c index 11f1f7ad04..2b12be2a17 100644 --- a/source/libs/parser/src/sql.c +++ b/source/libs/parser/src/sql.c @@ -108,13 +108,13 @@ typedef union { int yy130; SArray* yy135; SIntervalVal yy160; - TAOS_FIELD yy181; SVariant yy191; SLimit yy247; SCreateDbInfo yy256; SWindowStateVal yy258; int32_t yy262; SCreateAcctInfo yy277; + SField yy304; SRelationInfo* yy460; SSqlNode* yy488; SSessionWindowVal yy511; @@ -2431,10 +2431,10 @@ static void yy_reduce( { setCreateDbInfo(pInfo, TSDB_SQL_CREATE_DB, &yymsp[-1].minor.yy0, &yymsp[0].minor.yy256, &yymsp[-2].minor.yy0);} break; case 62: /* cmd ::= CREATE FUNCTION ids AS ids OUTPUTTYPE typename bufsize */ -{ setCreateFuncInfo(pInfo, TSDB_SQL_CREATE_FUNCTION, &yymsp[-5].minor.yy0, &yymsp[-3].minor.yy0, &yymsp[-1].minor.yy181, &yymsp[0].minor.yy0, 1);} +{ setCreateFuncInfo(pInfo, TSDB_SQL_CREATE_FUNCTION, &yymsp[-5].minor.yy0, &yymsp[-3].minor.yy0, &yymsp[-1].minor.yy304, &yymsp[0].minor.yy0, 1);} break; case 63: /* cmd ::= CREATE AGGREGATE FUNCTION ids AS ids OUTPUTTYPE typename bufsize */ -{ setCreateFuncInfo(pInfo, TSDB_SQL_CREATE_FUNCTION, &yymsp[-5].minor.yy0, &yymsp[-3].minor.yy0, &yymsp[-1].minor.yy181, &yymsp[0].minor.yy0, 2);} +{ setCreateFuncInfo(pInfo, TSDB_SQL_CREATE_FUNCTION, &yymsp[-5].minor.yy0, &yymsp[-3].minor.yy0, &yymsp[-1].minor.yy304, &yymsp[0].minor.yy0, 2);} break; case 64: /* cmd ::= CREATE USER ids PASS ids */ { setCreateUserSql(pInfo, &yymsp[-2].minor.yy0, &yymsp[0].minor.yy0);} @@ -2601,29 +2601,29 @@ static void yy_reduce( case 133: /* typename ::= ids */ { yymsp[0].minor.yy0.type = 0; - tSetColumnType (&yylhsminor.yy181, &yymsp[0].minor.yy0); + tSetColumnType (&yylhsminor.yy304, &yymsp[0].minor.yy0); } - yymsp[0].minor.yy181 = yylhsminor.yy181; + yymsp[0].minor.yy304 = yylhsminor.yy304; break; case 134: /* typename ::= ids LP signed RP */ { if (yymsp[-1].minor.yy531 <= 0) { yymsp[-3].minor.yy0.type = 0; - tSetColumnType(&yylhsminor.yy181, &yymsp[-3].minor.yy0); + tSetColumnType(&yylhsminor.yy304, &yymsp[-3].minor.yy0); } else { yymsp[-3].minor.yy0.type = -yymsp[-1].minor.yy531; // negative value of name length - tSetColumnType(&yylhsminor.yy181, &yymsp[-3].minor.yy0); + tSetColumnType(&yylhsminor.yy304, &yymsp[-3].minor.yy0); } } - yymsp[-3].minor.yy181 = yylhsminor.yy181; + yymsp[-3].minor.yy304 = yylhsminor.yy304; break; case 135: /* typename ::= ids UNSIGNED */ { yymsp[-1].minor.yy0.type = 0; yymsp[-1].minor.yy0.n = ((yymsp[0].minor.yy0.z + yymsp[0].minor.yy0.n) - yymsp[-1].minor.yy0.z); - tSetColumnType (&yylhsminor.yy181, &yymsp[-1].minor.yy0); + tSetColumnType (&yylhsminor.yy304, &yymsp[-1].minor.yy0); } - yymsp[-1].minor.yy181 = yylhsminor.yy181; + yymsp[-1].minor.yy304 = yylhsminor.yy304; break; case 136: /* signed ::= INTEGER */ { yylhsminor.yy531 = strtol(yymsp[0].minor.yy0.z, NULL, 10); } @@ -2711,18 +2711,18 @@ static void yy_reduce( yymsp[-4].minor.yy110 = yylhsminor.yy110; break; case 152: /* columnlist ::= columnlist COMMA column */ -{taosArrayPush(yymsp[-2].minor.yy135, &yymsp[0].minor.yy181); yylhsminor.yy135 = yymsp[-2].minor.yy135; } +{taosArrayPush(yymsp[-2].minor.yy135, &yymsp[0].minor.yy304); yylhsminor.yy135 = yymsp[-2].minor.yy135; } yymsp[-2].minor.yy135 = yylhsminor.yy135; break; case 153: /* columnlist ::= column */ -{yylhsminor.yy135 = taosArrayInit(4, sizeof(TAOS_FIELD)); taosArrayPush(yylhsminor.yy135, &yymsp[0].minor.yy181);} +{yylhsminor.yy135 = taosArrayInit(4, sizeof(SField)); taosArrayPush(yylhsminor.yy135, &yymsp[0].minor.yy304);} yymsp[0].minor.yy135 = yylhsminor.yy135; break; case 154: /* column ::= ids typename */ { - tSetColumnInfo(&yylhsminor.yy181, &yymsp[-1].minor.yy0, &yymsp[0].minor.yy181); + tSetColumnInfo(&yylhsminor.yy304, &yymsp[-1].minor.yy0, &yymsp[0].minor.yy304); } - yymsp[-1].minor.yy181 = yylhsminor.yy181; + yymsp[-1].minor.yy304 = yylhsminor.yy304; break; case 161: /* tagitem ::= NULL */ { yymsp[0].minor.yy0.type = 0; taosVariantCreate(&yylhsminor.yy191, yymsp[0].minor.yy0.z, yymsp[0].minor.yy0.n, yymsp[0].minor.yy0.type); } @@ -2893,7 +2893,7 @@ static void yy_reduce( toTSDBType(yymsp[-3].minor.yy0.type); taosVariantCreate(&A, yymsp[-3].minor.yy0.z, yymsp[-3].minor.yy0.n, yymsp[-3].minor.yy0.type); - tVariantListInsert(yymsp[-1].minor.yy135, &A, -1, 0); + tListItemInsert(yymsp[-1].minor.yy135, &A, -1, 0); yymsp[-5].minor.yy135 = yymsp[-1].minor.yy135; } break; @@ -3049,11 +3049,11 @@ static void yy_reduce( yymsp[0].minor.yy526 = yylhsminor.yy526; break; case 247: /* expr ::= ID LP exprlist RP */ -{ tAppendFuncName(pInfo->funcs, &yymsp[-3].minor.yy0); yylhsminor.yy526 = tSqlExprCreateFunction(yymsp[-1].minor.yy135, &yymsp[-3].minor.yy0, &yymsp[0].minor.yy0, yymsp[-3].minor.yy0.type); } +{ tRecordFuncName(pInfo->funcs, &yymsp[-3].minor.yy0); yylhsminor.yy526 = tSqlExprCreateFunction(yymsp[-1].minor.yy135, &yymsp[-3].minor.yy0, &yymsp[0].minor.yy0, yymsp[-3].minor.yy0.type); } yymsp[-3].minor.yy526 = yylhsminor.yy526; break; case 248: /* expr ::= ID LP STAR RP */ -{ tAppendFuncName(pInfo->funcs, &yymsp[-3].minor.yy0); yylhsminor.yy526 = tSqlExprCreateFunction(NULL, &yymsp[-3].minor.yy0, &yymsp[0].minor.yy0, yymsp[-3].minor.yy0.type); } +{ tRecordFuncName(pInfo->funcs, &yymsp[-3].minor.yy0); yylhsminor.yy526 = tSqlExprCreateFunction(NULL, &yymsp[-3].minor.yy0, &yymsp[0].minor.yy0, yymsp[-3].minor.yy0.type); } yymsp[-3].minor.yy526 = yylhsminor.yy526; break; case 249: /* expr ::= expr IS NULL */ diff --git a/source/libs/parser/src/ttokenizer.c b/source/libs/parser/src/ttokenizer.c index 5e3cefea10..b71fb4538e 100644 --- a/source/libs/parser/src/ttokenizer.c +++ b/source/libs/parser/src/ttokenizer.c @@ -411,6 +411,7 @@ uint32_t tGetToken(char* z, uint32_t* tokenId) { *tokenId = TK_QUESTION; return 1; } + case '`': case '\'': case '"': { int delim = z[0]; @@ -434,7 +435,7 @@ uint32_t tGetToken(char* z, uint32_t* tokenId) { if (z[i]) i++; if (strEnd) { - *tokenId = TK_STRING; + *tokenId = (delim == '`')? TK_ID:TK_STRING; return i; } diff --git a/source/libs/parser/test/CMakeLists.txt b/source/libs/parser/test/CMakeLists.txt new file mode 100644 index 0000000000..184d44a53b --- /dev/null +++ b/source/libs/parser/test/CMakeLists.txt @@ -0,0 +1,18 @@ + +MESSAGE(STATUS "build parser unit test") + +# GoogleTest requires at least C++11 +SET(CMAKE_CXX_STANDARD 11) +AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} SOURCE_LIST) + +ADD_EXECUTABLE(astTest ${SOURCE_LIST}) +TARGET_LINK_LIBRARIES( + astTest + PUBLIC os util common parser catalog transport gtest +) + +TARGET_INCLUDE_DIRECTORIES( + astTest + PUBLIC "${CMAKE_SOURCE_DIR}/include/libs/parser/" + PRIVATE "${CMAKE_SOURCE_DIR}/source/libs/parser/inc" +) diff --git a/source/libs/parser/test/parserTests.cpp b/source/libs/parser/test/parserTests.cpp index 6dea4a4e57..1f4203e9bf 100644 --- a/source/libs/parser/test/parserTests.cpp +++ b/source/libs/parser/test/parserTests.cpp @@ -11,4 +11,368 @@ * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . - */ \ No newline at end of file + */ + +#include +#include +#pragma GCC diagnostic ignored "-Wwrite-strings" + +#pragma GCC diagnostic ignored "-Wunused-function" +#pragma GCC diagnostic ignored "-Wunused-variable" +#pragma GCC diagnostic ignored "-Wsign-compare" +#include "os.h" + +#include "astGenerator.h" +#include "parserInt.h" +#include "taos.h" +#include "tdef.h" +#include "tvariant.h" + +namespace { +void setSchema(SSchema* p, int32_t type, int32_t bytes, const char* name, int32_t colId) { + p->colId = colId; + p->bytes = bytes; + p->type = type; + strcpy(p->name, name); +} + +void setTableMetaInfo(SQueryStmtInfo* pQueryInfo, SMetaReq *req) { + pQueryInfo->numOfTables = 1; + + pQueryInfo->pTableMetaInfo = (STableMetaInfo**)calloc(1, POINTER_BYTES); + STableMetaInfo* pTableMetaInfo = (STableMetaInfo*)calloc(1, sizeof(STableMetaInfo)); + pQueryInfo->pTableMetaInfo[0] = pTableMetaInfo; + + SName* name = (SName*)taosArrayGet(req->pTableName, 0); + + memcpy(&pTableMetaInfo->name, taosArrayGet(req->pTableName, 0), sizeof(SName)); + pTableMetaInfo->pTableMeta = (STableMeta*)calloc(1, sizeof(STableMeta) + 4 * sizeof(SSchema)); + strcpy(pTableMetaInfo->aliasName, name->tname); + STableMeta* pTableMeta = pTableMetaInfo->pTableMeta; + pTableMeta->tableType = TSDB_NORMAL_TABLE; + pTableMeta->tableInfo.numOfColumns = 4; + pTableMeta->tableInfo.rowSize = 28; + pTableMeta->uid = 110; + + pTableMetaInfo->tagColList = (SArray*) taosArrayInit(4, POINTER_BYTES); + + SSchema* pSchema = pTableMetaInfo->pTableMeta->schema; + setSchema(&pSchema[0], TSDB_DATA_TYPE_TIMESTAMP, 8, "ts", 0); + setSchema(&pSchema[1], TSDB_DATA_TYPE_INT, 4, "a", 1); + setSchema(&pSchema[2], TSDB_DATA_TYPE_DOUBLE, 8, "b", 2); + setSchema(&pSchema[3], TSDB_DATA_TYPE_DOUBLE, 8, "col", 3); + +} +} + +//TEST(testCase, validateAST_test) { +// SSqlInfo info1 = doGenerateAST("select a a1111, a+b + 22, tbname from `t.1abc` where tsexprList; +// ASSERT_EQ(taosArrayGetSize(pExprList), 3); +// +// SExprInfo* p1 = (SExprInfo*) taosArrayGetP(pExprList, 0); +// ASSERT_EQ(p1->base.uid, 110); +// ASSERT_EQ(p1->base.numOfParams, 0); +// ASSERT_EQ(p1->base.resSchema.type, TSDB_DATA_TYPE_INT); +// ASSERT_STRCASEEQ(p1->base.resSchema.name, "a1111"); +// ASSERT_STRCASEEQ(p1->base.colInfo.name, "t.1abc.a"); +// ASSERT_EQ(p1->base.colInfo.colId, 1); +// ASSERT_EQ(p1->base.colInfo.flag, TSDB_COL_NORMAL); +// ASSERT_STRCASEEQ(p1->base.token, "a"); +// +// ASSERT_EQ(taosArrayGetSize(pExprList), 3); +// +// SExprInfo* p2 = (SExprInfo*) taosArrayGetP(pExprList, 1); +// ASSERT_EQ(p2->base.uid, 0); +// ASSERT_EQ(p2->base.numOfParams, 1); // it is the serialized binary string of expression. +// ASSERT_EQ(p2->base.resSchema.type, TSDB_DATA_TYPE_DOUBLE); +// ASSERT_STRCASEEQ(p2->base.resSchema.name, "a+b + 22"); +// +//// ASSERT_STRCASEEQ(p2->base.colInfo.name, "t.1abc.a"); +//// ASSERT_EQ(p1->base.colInfo.colId, 1); +//// ASSERT_EQ(p1->base.colInfo.flag, TSDB_COL_NORMAL); +// ASSERT_STRCASEEQ(p2->base.token, "a+b + 22"); +// +// ASSERT_EQ(taosArrayGetSize(pQueryInfo->colList), 3); +// ASSERT_EQ(pQueryInfo->fieldsInfo.numOfOutput, 3); +//} +// +//TEST(testCase, function_Test) { +// SSqlInfo info1 = doGenerateAST("select count(a) from `t.1abc`"); +// ASSERT_EQ(info1.valid, true); +// +// char msg[128] = {0}; +// SMsgBuf buf; +// buf.len = 128; +// buf.buf = msg; +// +// SSqlNode* pNode = (SSqlNode*) taosArrayGetP(((SArray*)info1.list), 0); +// int32_t code = evaluateSqlNode(pNode, TSDB_TIME_PRECISION_NANO, &buf); +// ASSERT_EQ(code, 0); +// +// SMetaReq req = {0}; +// int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, msg, 128); +// ASSERT_EQ(ret, 0); +// ASSERT_EQ(taosArrayGetSize(req.pTableName), 1); +// +// SQueryStmtInfo* pQueryInfo = (SQueryStmtInfo*)calloc(1, sizeof(SQueryStmtInfo)); +// initQueryInfo(pQueryInfo); +// setTableMetaInfo(pQueryInfo, &req); +// +// SSqlNode* pSqlNode = (SSqlNode*)taosArrayGetP(info1.list, 0); +// ret = validateSqlNode(pSqlNode, pQueryInfo, &buf); +// +// SArray* pExprList = pQueryInfo->exprList; +// ASSERT_EQ(taosArrayGetSize(pExprList), 1); +// +// SExprInfo* p1 = (SExprInfo*) taosArrayGetP(pExprList, 0); +// ASSERT_EQ(p1->base.uid, 110); +// ASSERT_EQ(p1->base.numOfParams, 0); +// ASSERT_EQ(p1->base.resSchema.type, TSDB_DATA_TYPE_BIGINT); +// ASSERT_STRCASEEQ(p1->base.resSchema.name, "count(a)"); +// ASSERT_STRCASEEQ(p1->base.colInfo.name, "t.1abc.a"); +// ASSERT_EQ(p1->base.colInfo.colId, 1); +// ASSERT_EQ(p1->base.colInfo.flag, TSDB_COL_NORMAL); +// ASSERT_STRCASEEQ(p1->base.token, "count(a)"); +// ASSERT_EQ(p1->base.interBytes, 8); +// +// ASSERT_EQ(taosArrayGetSize(pQueryInfo->colList), 2); +// ASSERT_EQ(pQueryInfo->fieldsInfo.numOfOutput, 1); +//} +// +//TEST(testCase, function_Test2) { +// SSqlInfo info1 = doGenerateAST("select count(a) abc from `t.1abc`"); +// ASSERT_EQ(info1.valid, true); +// +// char msg[128] = {0}; +// SMsgBuf buf; +// buf.len = 128; +// buf.buf = msg; +// +// SSqlNode* pNode = (SSqlNode*) taosArrayGetP(((SArray*)info1.list), 0); +// int32_t code = evaluateSqlNode(pNode, TSDB_TIME_PRECISION_NANO, &buf); +// ASSERT_EQ(code, 0); +// +// SMetaReq req = {0}; +// int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, msg, 128); +// ASSERT_EQ(ret, 0); +// ASSERT_EQ(taosArrayGetSize(req.pTableName), 1); +// +// SQueryStmtInfo* pQueryInfo = (SQueryStmtInfo*)calloc(1, sizeof(SQueryStmtInfo)); +// initQueryInfo(pQueryInfo); +// setTableMetaInfo(pQueryInfo, &req); +// +// SSqlNode* pSqlNode = (SSqlNode*)taosArrayGetP(info1.list, 0); +// ret = validateSqlNode(pSqlNode, pQueryInfo, &buf); +// +// SArray* pExprList = pQueryInfo->exprList; +// ASSERT_EQ(taosArrayGetSize(pExprList), 1); +// +// SExprInfo* p1 = (SExprInfo*) taosArrayGetP(pExprList, 0); +// ASSERT_EQ(p1->base.uid, 110); +// ASSERT_EQ(p1->base.numOfParams, 0); +// ASSERT_EQ(p1->base.resSchema.type, TSDB_DATA_TYPE_BIGINT); +// ASSERT_STRCASEEQ(p1->base.resSchema.name, "abc"); +// ASSERT_STRCASEEQ(p1->base.colInfo.name, "t.1abc.a"); +// ASSERT_EQ(p1->base.colInfo.colId, 1); +// ASSERT_EQ(p1->base.colInfo.flag, TSDB_COL_NORMAL); +// ASSERT_STRCASEEQ(p1->base.token, "count(a)"); +// ASSERT_EQ(p1->base.interBytes, 8); +// +// ASSERT_EQ(taosArrayGetSize(pQueryInfo->colList), 2); +// ASSERT_EQ(pQueryInfo->fieldsInfo.numOfOutput, 1); +//} +// +//TEST(testCase, function_Test3) { +// SSqlInfo info1 = doGenerateAST("select first(*) from `t.1abc`"); +// ASSERT_EQ(info1.valid, true); +// +// char msg[128] = {0}; +// SMsgBuf buf; +// buf.len = 128; +// buf.buf = msg; +// +// SSqlNode* pNode = (SSqlNode*) taosArrayGetP(((SArray*)info1.list), 0); +// int32_t code = evaluateSqlNode(pNode, TSDB_TIME_PRECISION_NANO, &buf); +// ASSERT_EQ(code, 0); +// +// SMetaReq req = {0}; +// int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, msg, 128); +// ASSERT_EQ(ret, 0); +// ASSERT_EQ(taosArrayGetSize(req.pTableName), 1); +// +// SQueryStmtInfo* pQueryInfo = (SQueryStmtInfo*)calloc(1, sizeof(SQueryStmtInfo)); +// initQueryInfo(pQueryInfo); +// setTableMetaInfo(pQueryInfo, &req); +// +// SSqlNode* pSqlNode = (SSqlNode*)taosArrayGetP(info1.list, 0); +// ret = validateSqlNode(pSqlNode, pQueryInfo, &buf); +// +// SArray* pExprList = pQueryInfo->exprList; +// ASSERT_EQ(taosArrayGetSize(pExprList), 4); +// +// SExprInfo* p1 = (SExprInfo*) taosArrayGetP(pExprList, 0); +// ASSERT_EQ(p1->base.uid, 110); +// ASSERT_EQ(p1->base.numOfParams, 0); +// ASSERT_EQ(p1->base.resSchema.type, TSDB_DATA_TYPE_TIMESTAMP); +// ASSERT_STRCASEEQ(p1->base.resSchema.name, "first(ts)"); +// ASSERT_STRCASEEQ(p1->base.colInfo.name, "t.1abc.ts"); +// ASSERT_EQ(p1->base.colInfo.colId, 0); +// ASSERT_EQ(p1->base.colInfo.flag, TSDB_COL_NORMAL); +// ASSERT_STRCASEEQ(p1->base.token, "first(ts)"); +// ASSERT_EQ(p1->base.interBytes, 24); +// +// ASSERT_EQ(pQueryInfo->fieldsInfo.numOfOutput, 4); +//} +// +//TEST(testCase, function_Test4) { +// SSqlInfo info1 = doGenerateAST("select _block_dist() as a1 from `t.1abc`"); +// ASSERT_EQ(info1.valid, true); +// +// char msg[128] = {0}; +// SMsgBuf buf; +// buf.len = 128; +// buf.buf = msg; +// +// SSqlNode* pNode = (SSqlNode*) taosArrayGetP(((SArray*)info1.list), 0); +// int32_t code = evaluateSqlNode(pNode, TSDB_TIME_PRECISION_NANO, &buf); +// ASSERT_EQ(code, 0); +// +// SMetaReq req = {0}; +// int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, msg, 128); +// ASSERT_EQ(ret, 0); +// ASSERT_EQ(taosArrayGetSize(req.pTableName), 1); +// +// SQueryStmtInfo* pQueryInfo = (SQueryStmtInfo*)calloc(1, sizeof(SQueryStmtInfo)); +// initQueryInfo(pQueryInfo); +// setTableMetaInfo(pQueryInfo, &req); +// +// SSqlNode* pSqlNode = (SSqlNode*)taosArrayGetP(info1.list, 0); +// ret = validateSqlNode(pSqlNode, pQueryInfo, &buf); +// +// SArray* pExprList = pQueryInfo->exprList; +// ASSERT_EQ(taosArrayGetSize(pExprList), 1); +// +// SExprInfo* p1 = (SExprInfo*) taosArrayGetP(pExprList, 0); +// ASSERT_EQ(p1->base.uid, 110); +// ASSERT_EQ(p1->base.numOfParams, 1); +// ASSERT_EQ(p1->base.resSchema.type, TSDB_DATA_TYPE_BINARY); +// ASSERT_STRCASEEQ(p1->base.resSchema.name, "a1"); +//// ASSERT_STRCASEEQ(p1->base.colInfo.name, "t.1abc.ts"); +//// ASSERT_EQ(p1->base.colInfo.colId, 0); +// ASSERT_EQ(p1->base.colInfo.flag, TSDB_COL_NORMAL); +// ASSERT_STRCASEEQ(p1->base.token, "_block_dist()"); +// ASSERT_EQ(p1->base.interBytes, 0); +// +// ASSERT_EQ(taosArrayGetSize(pQueryInfo->colList), 1); +// ASSERT_EQ(pQueryInfo->fieldsInfo.numOfOutput, 1); +//} +// +//TEST(testCase, function_Test5) { +// SSqlInfo info1 = doGenerateAST("select sum(a) + avg(b) as a1 from `t.1abc`"); +// ASSERT_EQ(info1.valid, true); +// +// char msg[128] = {0}; +// SMsgBuf buf; +// buf.len = 128; +// buf.buf = msg; +// +// SSqlNode* pNode = (SSqlNode*) taosArrayGetP(((SArray*)info1.list), 0); +// int32_t code = evaluateSqlNode(pNode, TSDB_TIME_PRECISION_NANO, &buf); +// ASSERT_EQ(code, 0); +// +// SMetaReq req = {0}; +// int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, msg, 128); +// ASSERT_EQ(ret, 0); +// ASSERT_EQ(taosArrayGetSize(req.pTableName), 1); +// +// SQueryStmtInfo* pQueryInfo = (SQueryStmtInfo*)calloc(1, sizeof(SQueryStmtInfo)); +// initQueryInfo(pQueryInfo); +// setTableMetaInfo(pQueryInfo, &req); +// +// SSqlNode* pSqlNode = (SSqlNode*)taosArrayGetP(info1.list, 0); +// ret = validateSqlNode(pSqlNode, pQueryInfo, &buf); +// ASSERT_EQ(ret, 0); +// +// SArray* pExprList = pQueryInfo->exprList; +// ASSERT_EQ(taosArrayGetSize(pExprList), 3); +// +// SExprInfo* p1 = (SExprInfo*) taosArrayGetP(pExprList, 0); +// ASSERT_EQ(p1->base.uid, 0); +// ASSERT_EQ(p1->base.numOfParams, 1); +// ASSERT_EQ(p1->base.resSchema.type, TSDB_DATA_TYPE_DOUBLE); +// ASSERT_STRCASEEQ(p1->base.resSchema.name, "a1"); +//// ASSERT_STRCASEEQ(p1->base.colInfo.name, "t.1abc.ts"); +//// ASSERT_EQ(p1->base.colInfo.colId, 0); +// ASSERT_EQ(p1->base.colInfo.flag, TSDB_COL_NORMAL); +// ASSERT_STRCASEEQ(p1->base.token, "sum(a) + avg(b)"); +// ASSERT_EQ(p1->base.interBytes, 0); +// +// ASSERT_EQ(taosArrayGetSize(pQueryInfo->colList), 3); +// ASSERT_EQ(pQueryInfo->fieldsInfo.numOfOutput, 1); +//} + +TEST(testCase, function_Test6) { + SSqlInfo info1 = doGenerateAST("select sum(a+b) as a1, first(b*a) from `t.1abc`"); + ASSERT_EQ(info1.valid, true); + + char msg[128] = {0}; + SMsgBuf buf; + buf.len = 128; + buf.buf = msg; + + SSqlNode* pNode = (SSqlNode*) taosArrayGetP(((SArray*)info1.list), 0); + int32_t code = evaluateSqlNode(pNode, TSDB_TIME_PRECISION_NANO, &buf); + ASSERT_EQ(code, 0); + + SMetaReq req = {0}; + int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, msg, 128); + ASSERT_EQ(ret, 0); + ASSERT_EQ(taosArrayGetSize(req.pTableName), 1); + + SQueryStmtInfo* pQueryInfo = (SQueryStmtInfo*)calloc(1, sizeof(SQueryStmtInfo)); + initQueryInfo(pQueryInfo); + setTableMetaInfo(pQueryInfo, &req); + + SSqlNode* pSqlNode = (SSqlNode*)taosArrayGetP(info1.list, 0); + ret = validateSqlNode(pSqlNode, pQueryInfo, &buf); + ASSERT_EQ(ret, 0); + + SArray* pExprList = pQueryInfo->exprList; + ASSERT_EQ(taosArrayGetSize(pExprList), 2); + + SExprInfo* p1 = (SExprInfo*) taosArrayGetP(pExprList, 0); + ASSERT_EQ(p1->base.uid, 110); + ASSERT_EQ(p1->base.numOfParams, 0); + ASSERT_EQ(p1->base.resSchema.type, TSDB_DATA_TYPE_DOUBLE); + ASSERT_STRCASEEQ(p1->base.resSchema.name, "a1"); + ASSERT_EQ(p1->base.colInfo.flag, TSDB_COL_NORMAL); + ASSERT_STRCASEEQ(p1->base.token, "sum(a+b)"); + ASSERT_EQ(p1->base.interBytes, 16); + + ASSERT_EQ(taosArrayGetSize(pQueryInfo->colList), 3); + ASSERT_EQ(pQueryInfo->fieldsInfo.numOfOutput, 2); +} \ No newline at end of file diff --git a/source/libs/parser/test/tokenizerTest.cpp b/source/libs/parser/test/tokenizerTest.cpp new file mode 100644 index 0000000000..3527e27eb4 --- /dev/null +++ b/source/libs/parser/test/tokenizerTest.cpp @@ -0,0 +1,717 @@ +#include +#include +#pragma GCC diagnostic ignored "-Wwrite-strings" + +#pragma GCC diagnostic ignored "-Wunused-function" +#pragma GCC diagnostic ignored "-Wunused-variable" +#pragma GCC diagnostic ignored "-Wsign-compare" +#include "os.h" + +#include "taos.h" +#include "tvariant.h" +#include "tdef.h" +#include "ttoken.h" +#include "astGenerator.h" +#include "parserUtil.h" +#include "parserInt.h" + +namespace { +int32_t testValidateName(char* name) { + SToken token = {0}; + token.z = name; + token.n = strlen(name); + token.type = 0; + + tGetToken(name, &token.type); + return parserValidateIdToken(&token); +} + +SToken createToken(char* s) { + SToken t = {0}; + + t.type = TK_STRING; + t.z = s; + t.n = strlen(s); + return t; +} +} // namespace + +static void _init_tvariant_bool(SVariant* t) { + t->i64 = TSDB_FALSE; + t->nType = TSDB_DATA_TYPE_BOOL; +} + +static void _init_tvariant_tinyint(SVariant* t) { + t->i64 = -27; + t->nType = TSDB_DATA_TYPE_TINYINT; +} + +static void _init_tvariant_int(SVariant* t) { + t->i64 = -23997659; + t->nType = TSDB_DATA_TYPE_INT; +} + +static void _init_tvariant_bigint(SVariant* t) { + t->i64 = -3333333333333; + t->nType = TSDB_DATA_TYPE_BIGINT; +} + +static void _init_tvariant_float(SVariant* t) { + t->d = -8991212199.8987878776; + t->nType = TSDB_DATA_TYPE_FLOAT; +} + +static void _init_tvariant_binary(SVariant* t) { + taosVariantDestroy(t); + + t->pz = (char*)calloc(1, 20); //"2e3"); + t->nType = TSDB_DATA_TYPE_BINARY; + strcpy(t->pz, "2e5"); + t->nLen = strlen(t->pz); +} + +static void _init_tvariant_nchar(SVariant* t) { + taosVariantDestroy(t); + + t->wpz = (wchar_t*)calloc(1, 20 * TSDB_NCHAR_SIZE); + t->nType = TSDB_DATA_TYPE_NCHAR; + wcscpy(t->wpz, L"-2000000.8765"); + t->nLen = twcslen(t->wpz); +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + +TEST(testCase, validateToken_test) { + char t01[] = "abc"; + EXPECT_EQ(testValidateName(t01), TSDB_CODE_SUCCESS); + + char t110[] = "`1233abc.911`"; + EXPECT_EQ(testValidateName(t110), TSDB_CODE_SUCCESS); + + char t02[] = "'abc'"; + EXPECT_EQ(testValidateName(t02), TSDB_CODE_TSC_INVALID_OPERATION); + + char t1[] = "abc.def"; + EXPECT_EQ(testValidateName(t1), TSDB_CODE_SUCCESS); + printf("%s\n", t1); + + char t98[] = "abc.DeF"; + EXPECT_EQ(testValidateName(t98), TSDB_CODE_SUCCESS); + EXPECT_STREQ(t98, "abc.def"); + printf("%s\n", t98); + + char t97[] = "257.abc"; + EXPECT_EQ(testValidateName(t97), TSDB_CODE_TSC_INVALID_OPERATION); + printf("%s\n", t97); + + char t96[] = "_257.aBc"; + EXPECT_EQ(testValidateName(t96), TSDB_CODE_SUCCESS); + EXPECT_STREQ(t96, "_257.abc"); + printf("%s\n", t96); + + char t99[] = "abc . def"; + EXPECT_EQ(testValidateName(t99), TSDB_CODE_TSC_INVALID_OPERATION); + printf("%s\n", t99); + + char t2[] = "'abc.def'"; + EXPECT_EQ(testValidateName(t2), TSDB_CODE_TSC_INVALID_OPERATION); + printf("%s\n", t2); + + char t3[] = "'abc'.def"; + EXPECT_EQ(testValidateName(t3), TSDB_CODE_TSC_INVALID_OPERATION); + printf("%s\n", t3); + + char t4[] = "'abc'.'def'"; + EXPECT_EQ(testValidateName(t4), TSDB_CODE_TSC_INVALID_OPERATION); + + char t5[] = "table.'def'"; + EXPECT_EQ(testValidateName(t5), TSDB_CODE_TSC_INVALID_OPERATION); + + char t6[] = "'table'.'def'"; + EXPECT_EQ(testValidateName(t6), TSDB_CODE_TSC_INVALID_OPERATION); + + char t7[] = "'_ab1234'.'def'"; + EXPECT_EQ(testValidateName(t7), TSDB_CODE_TSC_INVALID_OPERATION); + printf("%s\n", t7); + + char t8[] = "'_ab&^%1234'.'def'"; + EXPECT_EQ(testValidateName(t8), TSDB_CODE_TSC_INVALID_OPERATION); + + char t9[] = "'_123'.'gtest中文'"; + EXPECT_EQ(testValidateName(t9), TSDB_CODE_TSC_INVALID_OPERATION); + + char t10[] = "abc.'gtest中文'"; + EXPECT_EQ(testValidateName(t10), TSDB_CODE_TSC_INVALID_OPERATION); + + char t10_1[] = "abc.'中文gtest'"; + EXPECT_EQ(testValidateName(t10_1), TSDB_CODE_TSC_INVALID_OPERATION); + + char t11[] = "'192.168.0.1'.abc"; + EXPECT_EQ(testValidateName(t11), TSDB_CODE_TSC_INVALID_OPERATION); + + char t12[] = "192.168.0.1.abc"; + EXPECT_EQ(testValidateName(t12), TSDB_CODE_TSC_INVALID_OPERATION); + + char t13[] = "abc."; + EXPECT_EQ(testValidateName(t13), TSDB_CODE_TSC_INVALID_OPERATION); + + char t14[] = ".abc"; + EXPECT_EQ(testValidateName(t14), TSDB_CODE_TSC_INVALID_OPERATION); + + char t15[] = ".'abc'"; + EXPECT_EQ(testValidateName(t15), TSDB_CODE_TSC_INVALID_OPERATION); + + char t16[] = ".abc'"; + EXPECT_EQ(testValidateName(t16), TSDB_CODE_TSC_INVALID_OPERATION); + + char t17[] = "123a.\"abc\""; + EXPECT_EQ(testValidateName(t17), TSDB_CODE_TSC_INVALID_OPERATION); + printf("%s\n", t17); + + char t18[] = "a.\"abc\""; + EXPECT_EQ(testValidateName(t18), TSDB_CODE_TSC_INVALID_OPERATION); + printf("%s\n", t18); + + char t19[] = "'_ab1234'.'def'.'ab123'"; + EXPECT_EQ(testValidateName(t19), TSDB_CODE_TSC_INVALID_OPERATION); + + char t20[] = "'_ab1234*&^'"; + EXPECT_EQ(testValidateName(t20), TSDB_CODE_TSC_INVALID_OPERATION); + + char t21[] = "'1234_abc'"; + EXPECT_EQ(testValidateName(t21), TSDB_CODE_TSC_INVALID_OPERATION); + + // =======Containing capital letters================= + char t30[] = "ABC"; + EXPECT_EQ(testValidateName(t30), TSDB_CODE_SUCCESS); + + char t31[] = "'ABC'"; + EXPECT_EQ(testValidateName(t31), TSDB_CODE_TSC_INVALID_OPERATION); + + char t32[] = "ABC.def"; + EXPECT_EQ(testValidateName(t32), TSDB_CODE_SUCCESS); + + char t33[] = "'ABC.def"; + EXPECT_EQ(testValidateName(t33), TSDB_CODE_TSC_INVALID_OPERATION); + + char t33_0[] = "abc.DEF'"; + EXPECT_EQ(testValidateName(t33_0), TSDB_CODE_TSC_INVALID_OPERATION); + + char t34[] = "'ABC.def'"; + // int32_t tmp0 = testValidateName(t34); + EXPECT_EQ(testValidateName(t34), TSDB_CODE_TSC_INVALID_OPERATION); + + char t35[] = "'ABC'.def"; + EXPECT_EQ(testValidateName(t35), TSDB_CODE_TSC_INVALID_OPERATION); + + char t36[] = "ABC.DEF"; + EXPECT_EQ(testValidateName(t36), TSDB_CODE_SUCCESS); + + char t37[] = "abc.DEF"; + EXPECT_EQ(testValidateName(t37), TSDB_CODE_SUCCESS); + + char t37_1[] = "abc._123DEF"; + EXPECT_EQ(testValidateName(t37_1), TSDB_CODE_SUCCESS); + + char t38[] = "'abc'.\"DEF\""; + EXPECT_EQ(testValidateName(t38), TSDB_CODE_TSC_INVALID_OPERATION); + + // do not use key words + char t39[] = "table.'DEF'"; + EXPECT_EQ(testValidateName(t39), TSDB_CODE_TSC_INVALID_OPERATION); + + char t40[] = "'table'.'DEF'"; + EXPECT_EQ(testValidateName(t40), TSDB_CODE_TSC_INVALID_OPERATION); + + char t41[] = "'_abXYZ1234'.'deFF'"; + EXPECT_EQ(testValidateName(t41), TSDB_CODE_TSC_INVALID_OPERATION); + + char t42[] = "'_abDEF&^%1234'.'DIef'"; + EXPECT_EQ(testValidateName(t42), TSDB_CODE_TSC_INVALID_OPERATION); + + char t43[] = "'_123'.'Gtest中文'"; + EXPECT_EQ(testValidateName(t43), TSDB_CODE_TSC_INVALID_OPERATION); + + char t44[] = "'aABC'.'Gtest中文'"; + EXPECT_EQ(testValidateName(t44), TSDB_CODE_TSC_INVALID_OPERATION); + + char t45[] = "'ABC'."; + EXPECT_EQ(testValidateName(t45), TSDB_CODE_TSC_INVALID_OPERATION); + + char t46[] = ".'ABC'"; + EXPECT_EQ(testValidateName(t46), TSDB_CODE_TSC_INVALID_OPERATION); + + char t47[] = "a.\"aTWc\""; + EXPECT_EQ(testValidateName(t47), TSDB_CODE_TSC_INVALID_OPERATION); + + // ================has space ================= + char t60[] = " ABC "; + EXPECT_EQ(testValidateName(t60), TSDB_CODE_TSC_INVALID_OPERATION); + + char t60_1[] = " ABC "; + EXPECT_EQ(testValidateName(t60_1), TSDB_CODE_TSC_INVALID_OPERATION); + + char t61[] = "' ABC '"; + EXPECT_EQ(testValidateName(t61), TSDB_CODE_TSC_INVALID_OPERATION); + + char t61_1[] = "' ABC '"; + EXPECT_EQ(testValidateName(t61_1), TSDB_CODE_TSC_INVALID_OPERATION); + + char t62[] = " ABC . def "; + EXPECT_EQ(testValidateName(t62), TSDB_CODE_TSC_INVALID_OPERATION); + + char t63[] = "' ABC . def "; + EXPECT_EQ(testValidateName(t63), TSDB_CODE_TSC_INVALID_OPERATION); + + char t63_0[] = " abc . DEF ' "; + EXPECT_EQ(testValidateName(t63_0), TSDB_CODE_TSC_INVALID_OPERATION); + + char t64[] = " ' ABC . def ' "; + // int32_t tmp1 = testValidateName(t64); + EXPECT_EQ(testValidateName(t64), TSDB_CODE_TSC_INVALID_OPERATION); + + char t65[] = " ' ABC '. def "; + EXPECT_EQ(testValidateName(t65), TSDB_CODE_TSC_INVALID_OPERATION); + + char t66[] = "' ABC '.' DEF '"; + EXPECT_EQ(testValidateName(t66), TSDB_CODE_TSC_INVALID_OPERATION); + + char t67[] = "abc . ' DEF '"; + EXPECT_EQ(testValidateName(t67), TSDB_CODE_TSC_INVALID_OPERATION); + + char t68[] = "' abc '.' DEF '"; + EXPECT_EQ(testValidateName(t68), TSDB_CODE_TSC_INVALID_OPERATION); + + // do not use key words + char t69[] = "table.'DEF'"; + EXPECT_EQ(testValidateName(t69), TSDB_CODE_TSC_INVALID_OPERATION); + + char t70[] = "'table'.'DEF'"; + EXPECT_EQ(testValidateName(t70), TSDB_CODE_TSC_INVALID_OPERATION); + + char t71[] = "'_abXYZ1234 '.' deFF '"; + EXPECT_EQ(testValidateName(t71), TSDB_CODE_TSC_INVALID_OPERATION); + + char t72[] = "'_abDEF&^%1234'.' DIef'"; + EXPECT_EQ(testValidateName(t72), TSDB_CODE_TSC_INVALID_OPERATION); + + char t73[] = "'_123'.' Gtest中文'"; + EXPECT_EQ(testValidateName(t73), TSDB_CODE_TSC_INVALID_OPERATION); + + char t74[] = "' aABC'.'Gtest中文'"; + EXPECT_EQ(testValidateName(t74), TSDB_CODE_TSC_INVALID_OPERATION); + + char t75[] = "' ABC '."; + EXPECT_EQ(testValidateName(t75), TSDB_CODE_TSC_INVALID_OPERATION); + + char t76[] = ".' ABC'"; + EXPECT_EQ(testValidateName(t76), TSDB_CODE_TSC_INVALID_OPERATION); + + char t77[] = " a . \"aTWc\" "; + EXPECT_EQ(testValidateName(t77), TSDB_CODE_TSC_INVALID_OPERATION); + + char t78[] = " a.\"aTWc \""; + EXPECT_EQ(testValidateName(t78), TSDB_CODE_TSC_INVALID_OPERATION); + + // ===============muti string by space =================== + // There's no such case. + // char t160[] = "A BC"; + // EXPECT_EQ(testValidateName(t160), TSDB_CODE_TSC_INVALID_OPERATION); + // printf("end:%s\n", t160); + + // There's no such case. + // char t161[] = "' A BC '"; + // EXPECT_EQ(testValidateName(t161), TSDB_CODE_TSC_INVALID_OPERATION); + + char t162[] = " AB C . de f "; + EXPECT_EQ(testValidateName(t162), TSDB_CODE_TSC_INVALID_OPERATION); + + char t163[] = "' AB C . de f "; + EXPECT_EQ(testValidateName(t163), TSDB_CODE_TSC_INVALID_OPERATION); + + char t163_0[] = " ab c . DE F ' "; + EXPECT_EQ(testValidateName(t163_0), TSDB_CODE_TSC_INVALID_OPERATION); + + char t164[] = " ' AB C . de f ' "; + // int32_t tmp2 = testValidateName(t164); + EXPECT_EQ(testValidateName(t164), TSDB_CODE_TSC_INVALID_OPERATION); + + char t165[] = " ' A BC '. de f "; + EXPECT_EQ(testValidateName(t165), TSDB_CODE_TSC_INVALID_OPERATION); + + char t166[] = "' AB C '.' DE F '"; + EXPECT_EQ(testValidateName(t166), TSDB_CODE_TSC_INVALID_OPERATION); + + char t167[] = "ab c . ' D EF '"; + EXPECT_EQ(testValidateName(t167), TSDB_CODE_TSC_INVALID_OPERATION); + + char t168[] = "' a bc '.' DE F '"; + EXPECT_EQ(testValidateName(t168), TSDB_CODE_TSC_INVALID_OPERATION); +} + +#if 0 +TEST(testCase, tvariant_convert) { + // 1. bool data to all other data types + SVariant t = {0}; + _init_tvariant_bool(&t); + + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_BOOL), 0); + EXPECT_EQ(t.i64, 0); + + _init_tvariant_bool(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_TINYINT), 0); + EXPECT_EQ(t.i64, 0); + + _init_tvariant_bool(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_SMALLINT), 0); + EXPECT_EQ(t.i64, 0); + + _init_tvariant_bool(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_BIGINT), 0); + EXPECT_EQ(t.i64, 0); + + _init_tvariant_bool(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_FLOAT), 0); + EXPECT_EQ(t.d, 0); + + _init_tvariant_bool(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_DOUBLE), 0); + EXPECT_EQ(t.d, 0); + + _init_tvariant_bool(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_BINARY), 0); + EXPECT_STREQ(t.pz, "FALSE"); + taosVariantDestroy(&t); + + _init_tvariant_bool(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_NCHAR), 0); + EXPECT_STREQ(t.wpz, L"FALSE"); + taosVariantDestroy(&t); + + // 2. tinyint to other data types + _init_tvariant_tinyint(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_BOOL), 0); + EXPECT_EQ(t.i64, 1); + + _init_tvariant_tinyint(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_TINYINT), 0); + EXPECT_EQ(t.i64, -27); + + _init_tvariant_tinyint(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_SMALLINT), 0); + EXPECT_EQ(t.i64, -27); + + _init_tvariant_tinyint(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_INT), 0); + EXPECT_EQ(t.i64, -27); + + _init_tvariant_tinyint(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_BIGINT), 0); + EXPECT_EQ(t.i64, -27); + + _init_tvariant_tinyint(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_FLOAT), 0); + EXPECT_EQ(t.d, -27); + + _init_tvariant_tinyint(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_DOUBLE), 0); + EXPECT_EQ(t.d, -27); + + _init_tvariant_tinyint(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_BINARY), 0); + EXPECT_STREQ(t.pz, "-27"); + taosVariantDestroy(&t); + + _init_tvariant_tinyint(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_NCHAR), 0); + EXPECT_STREQ(t.wpz, L"-27"); + taosVariantDestroy(&t); + + // 3. int to other data + // types////////////////////////////////////////////////////////////////// + _init_tvariant_int(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_BOOL), 0); + EXPECT_EQ(t.i64, 1); + + _init_tvariant_int(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_TINYINT), 0); + + _init_tvariant_int(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_SMALLINT), 0); + + _init_tvariant_int(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_INT), 0); + EXPECT_EQ(t.i64, -23997659); + + _init_tvariant_int(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_BIGINT), 0); + EXPECT_EQ(t.i64, -23997659); + + _init_tvariant_int(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_FLOAT), 0); + EXPECT_EQ(t.d, -23997659); + + _init_tvariant_int(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_DOUBLE), 0); + EXPECT_EQ(t.d, -23997659); + + _init_tvariant_int(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_BINARY), 0); + EXPECT_STREQ(t.pz, "-23997659"); + taosVariantDestroy(&t); + + _init_tvariant_int(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_NCHAR), 0); + EXPECT_STREQ(t.wpz, L"-23997659"); + taosVariantDestroy(&t); + + // 4. bigint to other data + // type////////////////////////////////////////////////////////////////////////////// + _init_tvariant_bigint(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_BOOL), 0); + EXPECT_EQ(t.i64, 1); + + _init_tvariant_bigint(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_TINYINT), 0); + + _init_tvariant_bigint(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_SMALLINT), 0); + + _init_tvariant_bigint(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_INT), 0); + + _init_tvariant_bigint(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_BIGINT), 0); + EXPECT_EQ(t.i64, -3333333333333); + + _init_tvariant_bigint(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_FLOAT), 0); + EXPECT_EQ(t.d, -3333333333333); + + _init_tvariant_bigint(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_DOUBLE), 0); + EXPECT_EQ(t.d, -3333333333333); + + _init_tvariant_bigint(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_BINARY), 0); + EXPECT_STREQ(t.pz, "-3333333333333"); + taosVariantDestroy(&t); + + _init_tvariant_bigint(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_NCHAR), 0); + EXPECT_STREQ(t.wpz, L"-3333333333333"); + taosVariantDestroy(&t); + + // 5. float to other data + // types//////////////////////////////////////////////////////////////////////// + _init_tvariant_float(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_BOOL), 0); + EXPECT_EQ(t.i64, 1); + + _init_tvariant_float(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_BIGINT), 0); + EXPECT_EQ(t.i64, -8991212199); + + _init_tvariant_float(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_FLOAT), 0); + EXPECT_DOUBLE_EQ(t.d, -8991212199.8987885); + + _init_tvariant_float(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_DOUBLE), 0); + EXPECT_DOUBLE_EQ(t.d, -8991212199.8987885); + + _init_tvariant_float(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_BINARY), 0); + EXPECT_STREQ(t.pz, "-8991212199.898788"); + taosVariantDestroy(&t); + + _init_tvariant_float(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_NCHAR), 0); + EXPECT_STREQ(t.wpz, L"-8991212199.898788"); + taosVariantDestroy(&t); + + // 6. binary to other data types + // ////////////////////////////////////////////////////////////////// + t.pz = "true"; + t.nLen = strlen(t.pz); + t.nType = TSDB_DATA_TYPE_BINARY; + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_BOOL), 0); + EXPECT_EQ(t.i64, 1); + + _init_tvariant_binary(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_BOOL), -1); + + _init_tvariant_binary(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_BIGINT), 0); + EXPECT_EQ(t.i64, 200000); + + _init_tvariant_binary(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_FLOAT), 0); + EXPECT_DOUBLE_EQ(t.d, 200000); + + _init_tvariant_binary(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_DOUBLE), 0); + EXPECT_DOUBLE_EQ(t.d, 200000); + + _init_tvariant_binary(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_BINARY), 0); + EXPECT_STREQ(t.pz, "2e5"); + taosVariantDestroy(&t); + + _init_tvariant_binary(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_NCHAR), 0); + EXPECT_STREQ(t.wpz, L"2e5"); + taosVariantDestroy(&t); + + // 7. nchar to other data types + // ////////////////////////////////////////////////////////////////// + t.wpz = L"FALSE"; + t.nLen = wcslen(t.wpz); + t.nType = TSDB_DATA_TYPE_NCHAR; + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_BOOL), 0); + EXPECT_EQ(t.i64, 0); + + _init_tvariant_nchar(&t); + EXPECT_LE(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_BOOL), 0); + + _init_tvariant_nchar(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_BIGINT), 0); + EXPECT_EQ(t.i64, -2000000); + + _init_tvariant_nchar(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_FLOAT), 0); + EXPECT_DOUBLE_EQ(t.d, -2000000.8765); + + _init_tvariant_nchar(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_DOUBLE), 0); + EXPECT_DOUBLE_EQ(t.d, -2000000.8765); + + _init_tvariant_nchar(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_BINARY), 0); + EXPECT_STREQ(t.pz, "-2000000.8765"); + taosVariantDestroy(&t); + + _init_tvariant_nchar(&t); + EXPECT_EQ(taosVariantTypeSetType(&t, TSDB_DATA_TYPE_NCHAR), 0); + EXPECT_STREQ(t.wpz, L"-2000000.8765"); + taosVariantDestroy(&t); +} +#endif + +TEST(testCase, tGetToken_Test) { + char* s = ".123 "; + uint32_t type = 0; + + int32_t len = tGetToken(s, &type); + EXPECT_EQ(type, TK_FLOAT); + EXPECT_EQ(len, strlen(s) - 1); + + char s1[] = "1.123e10 "; + len = tGetToken(s1, &type); + EXPECT_EQ(type, TK_FLOAT); + EXPECT_EQ(len, strlen(s1) - 1); + + char s4[] = "0xff "; + len = tGetToken(s4, &type); + EXPECT_EQ(type, TK_HEX); + EXPECT_EQ(len, strlen(s4) - 1); + + // invalid data type + char s2[] = "e10 "; + len = tGetToken(s2, &type); + EXPECT_FALSE(type == TK_FLOAT); + + char s3[] = "1.1.1.1"; + len = tGetToken(s3, &type); + EXPECT_EQ(type, TK_IPTOKEN); + EXPECT_EQ(len, strlen(s3)); + + char s5[] = "0x "; + len = tGetToken(s5, &type); + EXPECT_FALSE(type == TK_HEX); +} + +TEST(testCase, isValidNumber_test) { + SToken t1 = createToken("123abc"); + + EXPECT_EQ(tGetNumericStringType(&t1), TK_ILLEGAL); + + t1 = createToken("0xabc"); + EXPECT_EQ(tGetNumericStringType(&t1), TK_HEX); + + t1 = createToken("0b11101"); + EXPECT_EQ(tGetNumericStringType(&t1), TK_BIN); + + t1 = createToken(".134abc"); + EXPECT_EQ(tGetNumericStringType(&t1), TK_ILLEGAL); + + t1 = createToken("1e1 "); + EXPECT_EQ(tGetNumericStringType(&t1), TK_ILLEGAL); + + t1 = createToken("1+2"); + EXPECT_EQ(tGetNumericStringType(&t1), TK_ILLEGAL); + + t1 = createToken("-0x123"); + EXPECT_EQ(tGetNumericStringType(&t1), TK_HEX); + + t1 = createToken("-1"); + EXPECT_EQ(tGetNumericStringType(&t1), TK_INTEGER); + + t1 = createToken("-0b1110"); + EXPECT_EQ(tGetNumericStringType(&t1), TK_BIN); + + t1 = createToken("-.234"); + EXPECT_EQ(tGetNumericStringType(&t1), TK_FLOAT); +} + +TEST(testCase, generateAST_test) { + SSqlInfo info = doGenerateAST("select * from t1 where ts < now"); + ASSERT_EQ(info.valid, true); + + SSqlInfo info1 = doGenerateAST("select * from `t.1abc` where tsinfo.type = type; + pNode->info.name = strdup(name); + + if (pTableInfo->uid != 0 && pTableInfo->tableName) { // it is a true table + pNode->tableInfo.uid = pTableInfo->uid; + pNode->tableInfo.tableName = strdup(pTableInfo->tableName); + } + + pNode->numOfOutput = numOfOutput; + pNode->pExpr = calloc(numOfOutput, sizeof(SExprInfo)); + for(int32_t i = 0; i < numOfOutput; ++i) { + SExprInfo* pExprInfo = taosArrayGet(pNode->pExpr, i); + assignExprInfo(pExprInfo, pExpr[i]); + } + + pNode->pPrevNodes = taosArrayInit(4, POINTER_BYTES); + for(int32_t i = 0; i < numOfPrev; ++i) { + taosArrayPush(pNode->pPrevNodes, &prev[i]); + } + + switch(type) { + case QNODE_TABLESCAN: { + STimeWindow* window = calloc(1, sizeof(STimeWindow)); + memcpy(window, pExtInfo, sizeof(STimeWindow)); + pNode->pExtInfo = window; + break; + } + + case QNODE_TIMEWINDOW: { + SInterval* pInterval = calloc(1, sizeof(SInterval)); + pNode->pExtInfo = pInterval; + memcpy(pInterval, pExtInfo, sizeof(SInterval)); + break; + } + + case QNODE_GROUPBY: { + SGroupbyExpr* p = (SGroupbyExpr*) pExtInfo; + SGroupbyExpr* pGroupbyExpr = calloc(1, sizeof(SGroupbyExpr)); + + pGroupbyExpr->tableIndex = p->tableIndex; + pGroupbyExpr->orderType = p->orderType; + pGroupbyExpr->orderIndex = p->orderIndex; + pGroupbyExpr->columnInfo = taosArrayDup(p->columnInfo); + pNode->pExtInfo = pGroupbyExpr; + break; + } + + case QNODE_FILL: { // todo !! + pNode->pExtInfo = pExtInfo; + break; + } + + case QNODE_LIMIT: { + pNode->pExtInfo = calloc(1, sizeof(SLimit)); + memcpy(pNode->pExtInfo, pExtInfo, sizeof(SLimit)); + break; + } + } + + return pNode; +} + +static SQueryPlanNode* doAddTableColumnNode(SQueryStmtInfo* pQueryInfo, STableMetaInfo* pTableMetaInfo, SQueryTableInfo* info, + SArray* pExprs, SArray* tableCols) { + if (pQueryInfo->info.onlyTagQuery) { + int32_t num = (int32_t) taosArrayGetSize(pExprs); + SQueryPlanNode* pNode = createQueryNode(QNODE_TAGSCAN, "TableTagScan", NULL, 0, pExprs->pData, num, info, NULL); + + if (pQueryInfo->distinct) { + pNode = createQueryNode(QNODE_DISTINCT, "Distinct", &pNode, 1, pExprs->pData, num, info, NULL); + } + + return pNode; + } + + STimeWindow* window = &pQueryInfo->window; + SQueryPlanNode* pNode = createQueryNode(QNODE_TABLESCAN, "TableScan", NULL, 0, NULL, 0, info, window); + + if (pQueryInfo->info.projectionQuery) { + int32_t numOfOutput = (int32_t) taosArrayGetSize(pExprs); + pNode = createQueryNode(QNODE_PROJECT, "Projection", &pNode, 1, pExprs->pData, numOfOutput, info, NULL); + } else { + // table source column projection, generate the projection expr + int32_t numOfCols = (int32_t) taosArrayGetSize(tableCols); + SExprInfo** pExpr = calloc(numOfCols, POINTER_BYTES); + SSchema* pSchema = pTableMetaInfo->pTableMeta->schema; + + STableMetaInfo* pTableMetaInfo1 = getMetaInfo(pQueryInfo, 0); + SSchema resultSchema = *pSchema; + resultSchema.colId = getNewResColId(); + + for (int32_t i = 0; i < numOfCols; ++i) { + SColumn* pCol = taosArrayGetP(tableCols, i); + SColumnIndex index = {.tableIndex = 0, .columnIndex = pCol->columnIndex}; + + SExprInfo* p = createExprInfo(pTableMetaInfo1, FUNCTION_PRJ, &index, NULL, &resultSchema, 0); + pExpr[i] = p; + } + + pNode = createQueryNode(QNODE_PROJECT, "Projection", &pNode, 1, pExpr, numOfCols, info, NULL); +// dropAllExprInfo(pExpr); + tfree(pExpr); + } + + return pNode; +} + +static SQueryPlanNode* doCreateQueryPlanForOneTableImpl(SQueryStmtInfo* pQueryInfo, SQueryPlanNode* pNode, SQueryTableInfo* info, + SArray* pExprs) { + // check for aggregation + size_t numOfGroupCols = taosArrayGetSize(pQueryInfo->groupbyExpr.columnInfo); + + if (pQueryInfo->interval.interval > 0) { + int32_t numOfOutput = (int32_t)taosArrayGetSize(pExprs); + + pNode = createQueryNode(QNODE_TIMEWINDOW, "TimeWindowAgg", &pNode, 1, pExprs->pData, numOfOutput, info, &pQueryInfo->interval); + if (numOfGroupCols != 0) { + pNode = createQueryNode(QNODE_GROUPBY, "Groupby", &pNode, 1, pExprs->pData, numOfOutput, info, &pQueryInfo->groupbyExpr); + } + } else if (numOfGroupCols > 0) { + int32_t numOfOutput = (int32_t)taosArrayGetSize(pExprs); + pNode = createQueryNode(QNODE_GROUPBY, "Groupby", &pNode, 1, pExprs->pData, numOfOutput, info, + &pQueryInfo->groupbyExpr); + } else if (pQueryInfo->sessionWindow.gap > 0) { + pNode = createQueryNode(QNODE_SESSIONWINDOW, "SessionWindowAgg", &pNode, 1, NULL, 0, info, NULL); + } else if (pQueryInfo->info.simpleAgg) { + int32_t numOfOutput = (int32_t)taosArrayGetSize(pExprs); + pNode = createQueryNode(QNODE_AGGREGATE, "Aggregate", &pNode, 1, pExprs->pData, numOfOutput, info, NULL); + } + + if (pQueryInfo->havingFieldNum > 0 || pQueryInfo->info.arithmeticOnAgg) { + int32_t numOfExpr = (int32_t)taosArrayGetSize(pQueryInfo->exprList1); + pNode = + createQueryNode(QNODE_PROJECT, "Projection", &pNode, 1, pQueryInfo->exprList1->pData, numOfExpr, info, NULL); + } + + if (pQueryInfo->fillType != TSDB_FILL_NONE) { + SFillEssInfo* pInfo = calloc(1, sizeof(SFillEssInfo)); + pInfo->fillType = pQueryInfo->fillType; + pInfo->val = calloc(pNode->numOfOutput, sizeof(int64_t)); + memcpy(pInfo->val, pQueryInfo->fillVal, pNode->numOfOutput); + + pNode = createQueryNode(QNODE_FILL, "Fill", &pNode, 1, NULL, 0, info, pInfo); + } + + if (pQueryInfo->limit.limit != -1 || pQueryInfo->limit.offset != 0) { + pNode = createQueryNode(QNODE_LIMIT, "Limit", &pNode, 1, NULL, 0, info, &pQueryInfo->limit); + } + + return pNode; +} + +static SQueryPlanNode* doCreateQueryPlanForOneTable(SQueryStmtInfo* pQueryInfo, STableMetaInfo* pTableMetaInfo, SArray* pExprs, + SArray* tableCols) { + char name[TSDB_TABLE_FNAME_LEN] = {0}; + tNameExtractFullName(&pTableMetaInfo->name, name); + + SQueryTableInfo info = {.tableName = strdup(name), .uid = pTableMetaInfo->pTableMeta->uid,}; + + // handle the only tag query + SQueryPlanNode* pNode = doAddTableColumnNode(pQueryInfo, pTableMetaInfo, &info, pExprs, tableCols); + if (pQueryInfo->info.onlyTagQuery) { + tfree(info.tableName); + return pNode; + } + + SQueryPlanNode* pNode1 = doCreateQueryPlanForOneTableImpl(pQueryInfo, pNode, &info, pExprs); + tfree(info.tableName); + return pNode1; +} + +SArray* createQueryPlanImpl(SQueryStmtInfo* pQueryInfo) { + SArray* upstream = NULL; + + if (pQueryInfo->pUpstream != NULL && taosArrayGetSize(pQueryInfo->pUpstream) > 0) { // subquery in the from clause + upstream = taosArrayInit(4, POINTER_BYTES); + + size_t size = taosArrayGetSize(pQueryInfo->pUpstream); + for(int32_t i = 0; i < size; ++i) { + SQueryStmtInfo* pq = taosArrayGet(pQueryInfo->pUpstream, i); + SArray* p = createQueryPlanImpl(pq); + taosArrayAddBatch(upstream, p->pData, (int32_t) taosArrayGetSize(p)); + } + } + + if (pQueryInfo->numOfTables > 1) { // it is a join query + // 1. separate the select clause according to table + taosArrayDestroy(upstream); + upstream = taosArrayInit(5, POINTER_BYTES); + + for(int32_t i = 0; i < pQueryInfo->numOfTables; ++i) { + STableMetaInfo* pTableMetaInfo = pQueryInfo->pTableMetaInfo[i]; + uint64_t uid = pTableMetaInfo->pTableMeta->uid; + + SArray* exprList = taosArrayInit(4, POINTER_BYTES); + if (copyExprInfoList(exprList, pQueryInfo->exprList, uid, true) != 0) { + terrno = TSDB_CODE_TSC_OUT_OF_MEMORY; + dropAllExprInfo(exprList); + exit(-1); + } + + // 2. create the query execution node + char name[TSDB_TABLE_FNAME_LEN] = {0}; + tNameExtractFullName(&pTableMetaInfo->name, name); + SQueryTableInfo info = {.tableName = strdup(name), .uid = pTableMetaInfo->pTableMeta->uid,}; + + // 3. get the required table column list + SArray* tableColumnList = taosArrayInit(4, sizeof(SColumn)); + columnListCopy(tableColumnList, pQueryInfo->colList, uid); + + // 4. add the projection query node + SQueryPlanNode* pNode = doAddTableColumnNode(pQueryInfo, pTableMetaInfo, &info, exprList, tableColumnList); + columnListDestroy(tableColumnList); + dropAllExprInfo(exprList); + taosArrayPush(upstream, &pNode); + } + + // 3. add the join node here + SQueryTableInfo info = {0}; + int32_t num = (int32_t) taosArrayGetSize(pQueryInfo->exprList); + SQueryPlanNode* pNode = createQueryNode(QNODE_JOIN, "Join", upstream->pData, pQueryInfo->numOfTables, + pQueryInfo->exprList->pData, num, &info, NULL); + + // 4. add the aggregation or projection execution node + pNode = doCreateQueryPlanForOneTableImpl(pQueryInfo, pNode, &info, pQueryInfo->exprList); + upstream = taosArrayInit(5, POINTER_BYTES); + taosArrayPush(upstream, &pNode); + } else { // only one table, normal query process + STableMetaInfo* pTableMetaInfo = pQueryInfo->pTableMetaInfo[0]; + SQueryPlanNode* pNode = doCreateQueryPlanForOneTable(pQueryInfo, pTableMetaInfo, pQueryInfo->exprList, pQueryInfo->colList); + upstream = taosArrayInit(5, POINTER_BYTES); + taosArrayPush(upstream, &pNode); + } + + return upstream; +} + +static void doDestroyQueryNode(SQueryPlanNode* pQueryNode) { + tfree(pQueryNode->pExtInfo); + tfree(pQueryNode->pSchema); + tfree(pQueryNode->info.name); + + tfree(pQueryNode->tableInfo.tableName); + dropAllExprInfo(pQueryNode->pExpr); + + if (pQueryNode->pPrevNodes != NULL) { + int32_t size = (int32_t) taosArrayGetSize(pQueryNode->pPrevNodes); + for(int32_t i = 0; i < size; ++i) { + SQueryPlanNode* p = taosArrayGetP(pQueryNode->pPrevNodes, i); + doDestroyQueryNode(p); + } + + taosArrayDestroy(pQueryNode->pPrevNodes); + } + + tfree(pQueryNode); +} + +bool hasAliasName(SExprInfo* pExpr) { + assert(pExpr != NULL); + return true; +// return strncmp(pExpr->base.token, pExpr->base., tListLen(pExpr->base.aliasName)) != 0; +} + +static int32_t doPrintPlan(char* buf, SQueryPlanNode* pQueryNode, int32_t level, int32_t totalLen) { + if (level > 0) { + sprintf(buf + totalLen, "%*c", level, ' '); + totalLen += level; + } + + int32_t len1 = sprintf(buf + totalLen, "%s(", pQueryNode->info.name); + int32_t len = len1 + totalLen; + + switch(pQueryNode->info.type) { + case QNODE_TABLESCAN: { + STimeWindow* win = (STimeWindow*)pQueryNode->pExtInfo; + len1 = sprintf(buf + len, "%s #%" PRIu64 ") time_range: %" PRId64 " - %" PRId64 "\n", + pQueryNode->tableInfo.tableName, pQueryNode->tableInfo.uid, win->skey, win->ekey); + len += len1; + break; + } + + case QNODE_PROJECT: { + len1 = sprintf(buf + len, "cols: "); + len += len1; + + for(int32_t i = 0; i < pQueryNode->numOfOutput; ++i) { + SExprInfo* pExprInfo = taosArrayGetP(pQueryNode->pExpr, i); + + SSqlExpr* p = &pExprInfo->base; + len1 = sprintf(buf + len, "[%s #%d]", p->resSchema.name, p->resSchema.colId); + len += len1; + + if (i < pQueryNode->numOfOutput - 1) { + len1 = sprintf(buf + len, ", "); + len += len1; + } + } + + len1 = sprintf(buf + len, ")"); + len += len1; + + //todo print filter info + len1 = sprintf(buf + len, " filters:(nil)\n"); + len += len1; + break; + } + + case QNODE_AGGREGATE: { + for(int32_t i = 0; i < pQueryNode->numOfOutput; ++i) { + SExprInfo* pExprInfo = taosArrayGetP(pQueryNode->pExpr, i); + + SSqlExpr* pExpr = &pExprInfo->base; +// if (hasAliasName(&pQueryNode->pExpr[i])) { + len1 = sprintf(buf + len,"[%s #%s]", pExpr->token, pExpr->resSchema.name); +// } else { +// len1 = sprintf(buf + len,"[%s]", pExpr->token); +// } + + len += len1; + if (i < pQueryNode->numOfOutput - 1) { + len1 = sprintf(buf + len, ", "); + len += len1; + } + } + + len1 = sprintf(buf + len, ")\n"); + len += len1; + break; + } + + case QNODE_TIMEWINDOW: { + for(int32_t i = 0; i < pQueryNode->numOfOutput; ++i) { + SExprInfo* pExprInfo = taosArrayGetP(pQueryNode->pExpr, i); + + SSqlExpr* pExpr = &pExprInfo->base; + if (hasAliasName(pExprInfo)) { + len1 = sprintf(buf + len,"[%s #%s]", pExpr->token, pExpr->resSchema.name); + } else { + len1 = sprintf(buf + len,"[%s]", pExpr->token); + } + + len += len1; + if (i < pQueryNode->numOfOutput - 1) { + len1 = sprintf(buf + len,", "); + len += len1; + } + } + + len1 = sprintf(buf + len,") "); + len += len1; + + SInterval* pInterval = pQueryNode->pExtInfo; + len1 = sprintf(buf + len, "interval:%" PRId64 "(%s), sliding:%" PRId64 "(%s), offset:%" PRId64 "\n", + pInterval->interval, TSDB_TIME_PRECISION_MILLI_STR, pInterval->sliding, TSDB_TIME_PRECISION_MILLI_STR, + pInterval->offset); + len += len1; + + break; + } + + case QNODE_GROUPBY: { // todo hide the invisible column + for(int32_t i = 0; i < pQueryNode->numOfOutput; ++i) { + SExprInfo* pExprInfo = taosArrayGetP(pQueryNode->pExpr, i); + + SSqlExpr* pExpr = &pExprInfo->base; + + if (hasAliasName(pExprInfo)) { + len1 = sprintf(buf + len,"[%s #%s]", pExpr->token, pExpr->resSchema.name); + } else { + len1 = sprintf(buf + len,"[%s]", pExpr->token); + } + + len += len1; + if (i < pQueryNode->numOfOutput - 1) { + len1 = sprintf(buf + len,", "); + len += len1; + } + } + + SGroupbyExpr* pGroupbyExpr = pQueryNode->pExtInfo; + SColIndex* pIndex = taosArrayGet(pGroupbyExpr->columnInfo, 0); + + len1 = sprintf(buf + len,") groupby_col: [%s #%d]\n", pIndex->name, pIndex->colId); + len += len1; + + break; + } + + case QNODE_FILL: { + SFillEssInfo* pEssInfo = pQueryNode->pExtInfo; + len1 = sprintf(buf + len,"%d", pEssInfo->fillType); + len += len1; + + if (pEssInfo->fillType == TSDB_FILL_SET_VALUE) { + len1 = sprintf(buf + len,", val:"); + len += len1; + + // todo get the correct fill data type + for(int32_t i = 0; i < pQueryNode->numOfOutput; ++i) { + len1 = sprintf(buf + len,"%"PRId64, pEssInfo->val[i]); + len += len1; + + if (i < pQueryNode->numOfOutput - 1) { + len1 = sprintf(buf + len,", "); + len += len1; + } + } + } + + len1 = sprintf(buf + len,")\n"); + len += len1; + break; + } + + case QNODE_LIMIT: { + SLimit* pVal = pQueryNode->pExtInfo; + len1 = sprintf(buf + len,"limit: %"PRId64", offset: %"PRId64")\n", pVal->limit, pVal->offset); + len += len1; + break; + } + + case QNODE_DISTINCT: + case QNODE_TAGSCAN: { + len1 = sprintf(buf + len,"cols: "); + len += len1; + + for(int32_t i = 0; i < pQueryNode->numOfOutput; ++i) { + SExprInfo* pExprInfo = taosArrayGetP(pQueryNode->pExpr, i); + SSchema* resSchema = &pExprInfo->base.resSchema; + + len1 = sprintf(buf + len,"[%s #%d]", resSchema->name, resSchema->colId); + len += len1; + + if (i < pQueryNode->numOfOutput - 1) { + len1 = sprintf(buf + len,", "); + len += len1; + } + } + + len1 = sprintf(buf + len,")\n"); + len += len1; + + break; + } + + case QNODE_JOIN: { + // print join condition + len1 = sprintf(buf + len, ")\n"); + len += len1; + break; + } + } + + return len; +} + +int32_t queryPlanToStringImpl(char* buf, SQueryPlanNode* pQueryNode, int32_t level, int32_t totalLen) { + int32_t len = doPrintPlan(buf, pQueryNode, level, totalLen); + + for(int32_t i = 0; i < taosArrayGetSize(pQueryNode->pPrevNodes); ++i) { + SQueryPlanNode* p1 = taosArrayGetP(pQueryNode->pPrevNodes, i); + int32_t len1 = queryPlanToStringImpl(buf, p1, level + 1, len); + len = len1; + } + + return len; +} + +char* queryPlanToString(SQueryPlanNode* pQueryNode) { + assert(pQueryNode); + + char* buf = calloc(1, 4096); + + int32_t len = sprintf(buf, "===== logic plan =====\n"); + queryPlanToStringImpl(buf, pQueryNode, 0, len); + return buf; +} + +SQueryPlanNode* queryPlanFromString() { + return NULL; +} diff --git a/source/util/src/tpagedfile.c b/source/util/src/tpagedfile.c new file mode 100644 index 0000000000..fcd4f2b155 --- /dev/null +++ b/source/util/src/tpagedfile.c @@ -0,0 +1,451 @@ +#include "tpagedfile.h" +#include "thash.h" +#include "stddef.h" +#include "taoserror.h" +#include "tcompression.h" + +#define GET_DATA_PAYLOAD(_p) ((char *)(_p)->pData + POINTER_BYTES) +#define NO_IN_MEM_AVAILABLE_PAGES(_b) (listNEles((_b)->lruList) >= (_b)->inMemPages) + +int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t pagesize, int32_t inMemBufSize, uint64_t qId, const char* dir) { + *pResultBuf = calloc(1, sizeof(SDiskbasedResultBuf)); + + SDiskbasedResultBuf* pResBuf = *pResultBuf; + if (pResBuf == NULL) { + return TSDB_CODE_COM_OUT_OF_MEMORY; + } + + pResBuf->pageSize = pagesize; + pResBuf->numOfPages = 0; // all pages are in buffer in the first place + pResBuf->totalBufSize = 0; + pResBuf->inMemPages = inMemBufSize/pagesize; // maximum allowed pages, it is a soft limit. + pResBuf->allocateId = -1; + pResBuf->comp = true; + pResBuf->file = NULL; + pResBuf->qId = qId; + pResBuf->fileSize = 0; + + // at least more than 2 pages must be in memory + assert(inMemBufSize >= pagesize * 2); + + pResBuf->lruList = tdListNew(POINTER_BYTES); + + // init id hash table + pResBuf->groupSet = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, false); + pResBuf->assistBuf = malloc(pResBuf->pageSize + 2); // EXTRA BYTES + pResBuf->all = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, false); + + char path[PATH_MAX] = {0}; + taosGetTmpfilePath(dir, "qbuf", path); + pResBuf->path = strdup(path); + + pResBuf->emptyDummyIdList = taosArrayInit(1, sizeof(int32_t)); + +// qDebug("QInfo:0x%"PRIx64" create resBuf for output, page size:%d, inmem buf pages:%d, file:%s", qId, pResBuf->pageSize, +// pResBuf->inMemPages, pResBuf->path); + + return TSDB_CODE_SUCCESS; +} + +static int32_t createDiskFile(SDiskbasedResultBuf* pResultBuf) { + pResultBuf->file = fopen(pResultBuf->path, "wb+"); + if (pResultBuf->file == NULL) { +// qError("failed to create tmp file: %s on disk. %s", pResultBuf->path, strerror(errno)); + return TAOS_SYSTEM_ERROR(errno); + } + + return TSDB_CODE_SUCCESS; +} + +static char* doCompressData(void* data, int32_t srcSize, int32_t *dst, SDiskbasedResultBuf* pResultBuf) { // do nothing + if (!pResultBuf->comp) { + *dst = srcSize; + return data; + } + + *dst = tsCompressString(data, srcSize, 1, pResultBuf->assistBuf, srcSize, ONE_STAGE_COMP, NULL, 0); + + memcpy(data, pResultBuf->assistBuf, *dst); + return data; +} + +static char* doDecompressData(void* data, int32_t srcSize, int32_t *dst, SDiskbasedResultBuf* pResultBuf) { // do nothing + if (!pResultBuf->comp) { + *dst = srcSize; + return data; + } + + *dst = tsDecompressString(data, srcSize, 1, pResultBuf->assistBuf, pResultBuf->pageSize, ONE_STAGE_COMP, NULL, 0); + if (*dst > 0) { + memcpy(data, pResultBuf->assistBuf, *dst); + } + return data; +} + +static int32_t allocatePositionInFile(SDiskbasedResultBuf* pResultBuf, size_t size) { + if (pResultBuf->pFree == NULL) { + return pResultBuf->nextPos; + } else { + int32_t offset = -1; + + size_t num = taosArrayGetSize(pResultBuf->pFree); + for(int32_t i = 0; i < num; ++i) { + SFreeListItem* pi = taosArrayGet(pResultBuf->pFree, i); + if (pi->len >= size) { + offset = pi->offset; + pi->offset += (int32_t)size; + pi->len -= (int32_t)size; + + return offset; + } + } + + // no available recycle space, allocate new area in file + return pResultBuf->nextPos; + } +} + +static char* doFlushPageToDisk(SDiskbasedResultBuf* pResultBuf, SPageInfo* pg) { + assert(!pg->used && pg->pData != NULL); + + int32_t size = -1; + char* t = doCompressData(GET_DATA_PAYLOAD(pg), pResultBuf->pageSize, &size, pResultBuf); + + // this page is flushed to disk for the first time + if (pg->info.offset == -1) { + pg->info.offset = allocatePositionInFile(pResultBuf, size); + pResultBuf->nextPos += size; + + int32_t ret = fseek(pResultBuf->file, pg->info.offset, SEEK_SET); + assert(ret == 0); + + ret = (int32_t) fwrite(t, 1, size, pResultBuf->file); + assert(ret == size); + + if (pResultBuf->fileSize < pg->info.offset + pg->info.length) { + pResultBuf->fileSize = pg->info.offset + pg->info.length; + } + } else { + // length becomes greater, current space is not enough, allocate new place, otherwise, do nothing + if (pg->info.length < size) { + // 1. add current space to free list + taosArrayPush(pResultBuf->pFree, &pg->info); + + // 2. allocate new position, and update the info + pg->info.offset = allocatePositionInFile(pResultBuf, size); + pResultBuf->nextPos += size; + } + + //3. write to disk. + int32_t ret = fseek(pResultBuf->file, pg->info.offset, SEEK_SET); + if (ret != 0) { // todo handle the error case + + } + + ret = (int32_t)fwrite(t, size, 1, pResultBuf->file); + if (ret != size) { // todo handle the error case + + } + + if (pResultBuf->fileSize < pg->info.offset + pg->info.length) { + pResultBuf->fileSize = pg->info.offset + pg->info.length; + } + } + + char* ret = pg->pData; + memset(ret, 0, pResultBuf->pageSize); + + pg->pData = NULL; + pg->info.length = size; + + pResultBuf->statis.flushBytes += pg->info.length; + + return ret; +} + +static char* flushPageToDisk(SDiskbasedResultBuf* pResultBuf, SPageInfo* pg) { + int32_t ret = TSDB_CODE_SUCCESS; + assert(((int64_t) pResultBuf->numOfPages * pResultBuf->pageSize) == pResultBuf->totalBufSize && pResultBuf->numOfPages >= pResultBuf->inMemPages); + + if (pResultBuf->file == NULL) { + if ((ret = createDiskFile(pResultBuf)) != TSDB_CODE_SUCCESS) { + terrno = ret; + return NULL; + } + } + + return doFlushPageToDisk(pResultBuf, pg); +} + +// load file block data in disk +static char* loadPageFromDisk(SDiskbasedResultBuf* pResultBuf, SPageInfo* pg) { + int32_t ret = fseek(pResultBuf->file, pg->info.offset, SEEK_SET); + ret = (int32_t)fread(GET_DATA_PAYLOAD(pg), 1, pg->info.length, pResultBuf->file); + if (ret != pg->info.length) { + terrno = errno; + return NULL; + } + + pResultBuf->statis.loadBytes += pg->info.length; + + int32_t fullSize = 0; + doDecompressData(GET_DATA_PAYLOAD(pg), pg->info.length, &fullSize, pResultBuf); + + return (char*)GET_DATA_PAYLOAD(pg); +} + +static SIDList addNewGroup(SDiskbasedResultBuf* pResultBuf, int32_t groupId) { + assert(taosHashGet(pResultBuf->groupSet, (const char*) &groupId, sizeof(int32_t)) == NULL); + + SArray* pa = taosArrayInit(1, POINTER_BYTES); + int32_t ret = taosHashPut(pResultBuf->groupSet, (const char*)&groupId, sizeof(int32_t), &pa, POINTER_BYTES); + assert(ret == 0); + + return pa; +} + +static SPageInfo* registerPage(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t pageId) { + SIDList list = NULL; + + char** p = taosHashGet(pResultBuf->groupSet, (const char*)&groupId, sizeof(int32_t)); + if (p == NULL) { // it is a new group id + list = addNewGroup(pResultBuf, groupId); + } else { + list = (SIDList) (*p); + } + + pResultBuf->numOfPages += 1; + + SPageInfo* ppi = malloc(sizeof(SPageInfo));//{ .info = PAGE_INFO_INITIALIZER, .pageId = pageId, .pn = NULL}; + + ppi->pageId = pageId; + ppi->pData = NULL; + ppi->info = PAGE_INFO_INITIALIZER; + ppi->used = true; + ppi->pn = NULL; + + return *(SPageInfo**) taosArrayPush(list, &ppi); +} + +static SListNode* getEldestUnrefedPage(SDiskbasedResultBuf* pResultBuf) { + SListIter iter = {0}; + tdListInitIter(pResultBuf->lruList, &iter, TD_LIST_BACKWARD); + + SListNode* pn = NULL; + while((pn = tdListNext(&iter)) != NULL) { + assert(pn != NULL); + + SPageInfo* pageInfo = *(SPageInfo**) pn->data; + assert(pageInfo->pageId >= 0 && pageInfo->pn == pn); + + if (!pageInfo->used) { + break; + } + } + + return pn; +} + +static char* evicOneDataPage(SDiskbasedResultBuf* pResultBuf) { + char* bufPage = NULL; + SListNode* pn = getEldestUnrefedPage(pResultBuf); + + // all pages are referenced by user, try to allocate new space + if (pn == NULL) { + int32_t prev = pResultBuf->inMemPages; + + // increase by 50% of previous mem pages + pResultBuf->inMemPages = (int32_t)(pResultBuf->inMemPages * 1.5f); + +// qWarn("%p in memory buf page not sufficient, expand from %d to %d, page size:%d", pResultBuf, prev, +// pResultBuf->inMemPages, pResultBuf->pageSize); + } else { + pResultBuf->statis.flushPages += 1; + tdListPopNode(pResultBuf->lruList, pn); + + SPageInfo* d = *(SPageInfo**) pn->data; + assert(d->pn == pn); + + d->pn = NULL; + tfree(pn); + + bufPage = flushPageToDisk(pResultBuf, d); + } + + return bufPage; +} + +static void lruListPushFront(SList *pList, SPageInfo* pi) { + tdListPrepend(pList, &pi); + SListNode* front = tdListGetHead(pList); + pi->pn = front; +} + +static void lruListMoveToFront(SList *pList, SPageInfo* pi) { + tdListPopNode(pList, pi->pn); + tdListPrependNode(pList, pi->pn); +} + +static FORCE_INLINE size_t getAllocPageSize(int32_t pageSize) { + return pageSize + POINTER_BYTES + 2 + sizeof(SFilePage); +} + +SFilePage* getNewDataBuf(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t* pageId) { + pResultBuf->statis.getPages += 1; + + char* availablePage = NULL; + if (NO_IN_MEM_AVAILABLE_PAGES(pResultBuf)) { + availablePage = evicOneDataPage(pResultBuf); + } + + // register new id in this group + *pageId = (++pResultBuf->allocateId); + + // register page id info + SPageInfo* pi = registerPage(pResultBuf, groupId, *pageId); + + // add to LRU list + assert(listNEles(pResultBuf->lruList) < pResultBuf->inMemPages && pResultBuf->inMemPages > 0); + + lruListPushFront(pResultBuf->lruList, pi); + + // add to hash map + taosHashPut(pResultBuf->all, pageId, sizeof(int32_t), &pi, POINTER_BYTES); + + // allocate buf + if (availablePage == NULL) { + pi->pData = calloc(1, getAllocPageSize(pResultBuf->pageSize)); // add extract bytes in case of zipped buffer increased. + } else { + pi->pData = availablePage; + } + + pResultBuf->totalBufSize += pResultBuf->pageSize; + + ((void**)pi->pData)[0] = pi; + pi->used = true; + + return (void *)(GET_DATA_PAYLOAD(pi)); +} + +SFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id) { + assert(pResultBuf != NULL && id >= 0); + pResultBuf->statis.getPages += 1; + + SPageInfo** pi = taosHashGet(pResultBuf->all, &id, sizeof(int32_t)); + assert(pi != NULL && *pi != NULL); + + if ((*pi)->pData != NULL) { // it is in memory + // no need to update the LRU list if only one page exists + if (pResultBuf->numOfPages == 1) { + (*pi)->used = true; + return (void *)(GET_DATA_PAYLOAD(*pi)); + } + + SPageInfo** pInfo = (SPageInfo**) ((*pi)->pn->data); + assert(*pInfo == *pi); + + lruListMoveToFront(pResultBuf->lruList, (*pi)); + (*pi)->used = true; + + return (void *)(GET_DATA_PAYLOAD(*pi)); + + } else { // not in memory + assert((*pi)->pData == NULL && (*pi)->pn == NULL && (*pi)->info.length >= 0 && (*pi)->info.offset >= 0); + + char* availablePage = NULL; + if (NO_IN_MEM_AVAILABLE_PAGES(pResultBuf)) { + availablePage = evicOneDataPage(pResultBuf); + } + + if (availablePage == NULL) { + (*pi)->pData = calloc(1, getAllocPageSize(pResultBuf->pageSize)); + } else { + (*pi)->pData = availablePage; + } + + ((void**)((*pi)->pData))[0] = (*pi); + + lruListPushFront(pResultBuf->lruList, *pi); + (*pi)->used = true; + + loadPageFromDisk(pResultBuf, *pi); + return (void *)(GET_DATA_PAYLOAD(*pi)); + } +} + +void releaseResBufPage(SDiskbasedResultBuf* pResultBuf, void* page) { + assert(pResultBuf != NULL && page != NULL); + char* p = (char*) page - POINTER_BYTES; + + SPageInfo* ppi = ((SPageInfo**) p)[0]; + releaseResBufPageInfo(pResultBuf, ppi); +} + +void releaseResBufPageInfo(SDiskbasedResultBuf* pResultBuf, SPageInfo* pi) { + assert(pi->pData != NULL && pi->used); + + pi->used = false; + pResultBuf->statis.releasePages += 1; +} + +size_t getNumOfResultBufGroupId(const SDiskbasedResultBuf* pResultBuf) { return taosHashGetSize(pResultBuf->groupSet); } + +size_t getResBufSize(const SDiskbasedResultBuf* pResultBuf) { return (size_t)pResultBuf->totalBufSize; } + +SIDList getDataBufPagesIdList(SDiskbasedResultBuf* pResultBuf, int32_t groupId) { + assert(pResultBuf != NULL); + + char** p = taosHashGet(pResultBuf->groupSet, (const char*)&groupId, sizeof(int32_t)); + if (p == NULL) { // it is a new group id + return pResultBuf->emptyDummyIdList; + } else { + return (SArray*) (*p); + } +} + +void destroyResultBuf(SDiskbasedResultBuf* pResultBuf) { + if (pResultBuf == NULL) { + return; + } + + if (pResultBuf->file != NULL) { +// qDebug("QInfo:0x%"PRIx64" res output buffer closed, total:%.2f Kb, inmem size:%.2f Kb, file size:%.2f Kb", +// pResultBuf->qId, pResultBuf->totalBufSize/1024.0, listNEles(pResultBuf->lruList) * pResultBuf->pageSize / 1024.0, +// pResultBuf->fileSize/1024.0); + + fclose(pResultBuf->file); + } else { +// qDebug("QInfo:0x%"PRIx64" res output buffer closed, total:%.2f Kb, no file created", pResultBuf->qId, +// pResultBuf->totalBufSize/1024.0); + } + + remove(pResultBuf->path); + tfree(pResultBuf->path); + + SArray** p = taosHashIterate(pResultBuf->groupSet, NULL); + while(p) { + size_t n = taosArrayGetSize(*p); + for(int32_t i = 0; i < n; ++i) { + SPageInfo* pi = taosArrayGetP(*p, i); + tfree(pi->pData); + tfree(pi); + } + + taosArrayDestroy(*p); + p = taosHashIterate(pResultBuf->groupSet, p); + } + + tdListFree(pResultBuf->lruList); + taosArrayDestroy(pResultBuf->emptyDummyIdList); + taosHashCleanup(pResultBuf->groupSet); + taosHashCleanup(pResultBuf->all); + + tfree(pResultBuf->assistBuf); + tfree(pResultBuf); +} + +SPageInfo* getLastPageInfo(SIDList pList) { + size_t size = taosArrayGetSize(pList); + return (SPageInfo*) taosArrayGetP(pList, size - 1); +} + diff --git a/source/util/src/tutil.c b/source/util/src/tutil.c index 22e378d067..ee524e4448 100644 --- a/source/util/src/tutil.c +++ b/source/util/src/tutil.c @@ -14,11 +14,7 @@ */ #include "os.h" -#include "tcrc32c.h" #include "tdef.h" -#include "tutil.h" -#include "ulog.h" -#include "taoserror.h" int32_t strdequote(char *z) { if (z == NULL) { @@ -26,7 +22,7 @@ int32_t strdequote(char *z) { } int32_t quote = z[0]; - if (quote != '\'' && quote != '"') { + if (quote != '\'' && quote != '"' && quote != '`') { return (int32_t)strlen(z); } @@ -51,7 +47,6 @@ int32_t strdequote(char *z) { return j + 1; // only one quote, do nothing } - int32_t strRmquote(char *z, int32_t len){ // delete escape character: \\, \', \" char delim = z[0]; @@ -83,6 +78,33 @@ int32_t strRmquote(char *z, int32_t len){ return len - 2 - cnt; } +int32_t strndequote(char *dst, const char* z, int32_t len) { + assert(dst != NULL); + if (z == NULL || len == 0) { + return 0; + } + + int32_t quote = z[0]; + int32_t i = 1, j = 0; + + while (z[i] != 0) { + if (z[i] == quote) { + if (z[i + 1] == quote) { + dst[j++] = (char) quote; + i++; + } else { + dst[j++] = 0; + return (j - 1); + } + } else { + dst[j++] = z[i]; + } + + i++; + } + + return j + 1; // only one quote, do nothing +} size_t strtrim(char *z) { int32_t i = 0; @@ -164,8 +186,6 @@ char *strnchr(char *haystack, char needle, int32_t len, bool skipquote) { return NULL; } - - char* strtolower(char *dst, const char *src) { int esc = 0; char quote = 0, *p = dst, c; @@ -380,66 +400,6 @@ int32_t taosHexStrToByteArray(char hexstr[], char bytes[]) { return 0; } -// TODO move to comm module -bool taosGetVersionNumber(char *versionStr, int *versionNubmer) { - if (versionStr == NULL || versionNubmer == NULL) { - return false; - } - - int versionNumberPos[5] = {0}; - int len = (int)strlen(versionStr); - int dot = 0; - for (int pos = 0; pos < len && dot < 4; ++pos) { - if (versionStr[pos] == '.') { - versionStr[pos] = 0; - versionNumberPos[++dot] = pos + 1; - } - } - - if (dot != 3) { - return false; - } - - for (int pos = 0; pos < 4; ++pos) { - versionNubmer[pos] = atoi(versionStr + versionNumberPos[pos]); - } - versionStr[versionNumberPos[1] - 1] = '.'; - versionStr[versionNumberPos[2] - 1] = '.'; - versionStr[versionNumberPos[3] - 1] = '.'; - - return true; -} - -int taosCheckVersion(char *input_client_version, char *input_server_version, int comparedSegments) { - char client_version[TSDB_VERSION_LEN] = {0}; - char server_version[TSDB_VERSION_LEN] = {0}; - int clientVersionNumber[4] = {0}; - int serverVersionNumber[4] = {0}; - - tstrncpy(client_version, input_client_version, sizeof(client_version)); - tstrncpy(server_version, input_server_version, sizeof(server_version)); - - if (!taosGetVersionNumber(client_version, clientVersionNumber)) { - uError("invalid client version:%s", client_version); - return TSDB_CODE_TSC_INVALID_VERSION; - } - - if (!taosGetVersionNumber(server_version, serverVersionNumber)) { - uError("invalid server version:%s", server_version); - return TSDB_CODE_TSC_INVALID_VERSION; - } - - for(int32_t i = 0; i < comparedSegments; ++i) { - if (clientVersionNumber[i] != serverVersionNumber[i]) { - uError("the %d-th number of server version:%s not matched with client version:%s", i, server_version, - client_version); - return TSDB_CODE_TSC_INVALID_VERSION; - } - } - - return 0; -} - char *taosIpStr(uint32_t ipInt) { static char ipStrArray[3][30]; static int ipStrIndex = 0; diff --git a/src/client/src/tscUtil.c b/src/client/src/tscUtil.c index 2bd601d812..59c20270ec 100644 --- a/src/client/src/tscUtil.c +++ b/src/client/src/tscUtil.c @@ -2667,19 +2667,7 @@ void tscColumnCopy(SColumn* pDest, const SColumn* pSrc) { pDest->info.bytes = pSrc->info.bytes; } -void tscColumnListCopy(SArray* dst, const SArray* src, uint64_t tableUid) { - assert(src != NULL && dst != NULL); - size_t num = taosArrayGetSize(src); - for (int32_t i = 0; i < num; ++i) { - SColumn* pCol = taosArrayGetP(src, i); - - if (pCol->tableUid == tableUid) { - SColumn* p = tscColumnClone(pCol); - taosArrayPush(dst, &p); - } - } -} void tscColumnListCopyAll(SArray* dst, const SArray* src) { assert(src != NULL && dst != NULL); diff --git a/src/query/tests/astTest.cpp b/src/query/tests/astTest.cpp index 1143d00e8d..75a703ac1f 100644 --- a/src/query/tests/astTest.cpp +++ b/src/query/tests/astTest.cpp @@ -459,7 +459,7 @@ namespace { // two level expression tree tExprNode *createExpr1() { auto *pLeft = (tExprNode*) calloc(1, sizeof(tExprNode)); - pLeft->nodeType = TSQL_NODE_COL; + pLeft->nodeType = TEXPR_COL_NODE; pLeft->pSchema = (SSchema*) calloc(1, sizeof(SSchema)); strcpy(pLeft->pSchema->name, "col_a"); @@ -468,14 +468,14 @@ tExprNode *createExpr1() { pLeft->pSchema->colId = 1; auto *pRight = (tExprNode*) calloc(1, sizeof(tExprNode)); - pRight->nodeType = TSQL_NODE_VALUE; + pRight->nodeType = TEXPR_VALUE_NODE; pRight->pVal = (tVariant*) calloc(1, sizeof(tVariant)); pRight->pVal->nType = TSDB_DATA_TYPE_INT; pRight->pVal->i64 = 12; auto *pRoot = (tExprNode*) calloc(1, sizeof(tExprNode)); - pRoot->nodeType = TSQL_NODE_EXPR; + pRoot->nodeType = TEXPR_NODE_EXPR; pRoot->_node.optr = TSDB_RELATION_EQUAL; pRoot->_node.pLeft = pLeft; @@ -488,7 +488,7 @@ tExprNode *createExpr1() { // thress level expression tree tExprNode* createExpr2() { auto *pLeft2 = (tExprNode*) calloc(1, sizeof(tExprNode)); - pLeft2->nodeType = TSQL_NODE_COL; + pLeft2->nodeType = TEXPR_COL_NODE; pLeft2->pSchema = (SSchema*) calloc(1, sizeof(SSchema)); strcpy(pLeft2->pSchema->name, "col_a"); @@ -497,7 +497,7 @@ tExprNode* createExpr2() { pLeft2->pSchema->colId = 1; auto *pRight2 = (tExprNode*) calloc(1, sizeof(tExprNode)); - pRight2->nodeType = TSQL_NODE_VALUE; + pRight2->nodeType = TEXPR_VALUE_NODE; pRight2->pVal = (tVariant*) calloc(1, sizeof(tVariant)); pRight2->pVal->nType = TSDB_DATA_TYPE_BINARY; @@ -506,7 +506,7 @@ tExprNode* createExpr2() { pRight2->pVal->nLen = strlen(v); auto *p1 = (tExprNode*) calloc(1, sizeof(tExprNode)); - p1->nodeType = TSQL_NODE_EXPR; + p1->nodeType = TEXPR_NODE_EXPR; p1->_node.optr = TSDB_RELATION_LIKE; p1->_node.pLeft = pLeft2; @@ -514,7 +514,7 @@ tExprNode* createExpr2() { p1->_node.hasPK = false; auto *pLeft1 = (tExprNode*) calloc(1, sizeof(tExprNode)); - pLeft1->nodeType = TSQL_NODE_COL; + pLeft1->nodeType = TEXPR_COL_NODE; pLeft1->pSchema = (SSchema*) calloc(1, sizeof(SSchema)); strcpy(pLeft1->pSchema->name, "col_b"); @@ -523,14 +523,14 @@ tExprNode* createExpr2() { pLeft1->pSchema->colId = 99; auto *pRight1 = (tExprNode*) calloc(1, sizeof(tExprNode)); - pRight1->nodeType = TSQL_NODE_VALUE; + pRight1->nodeType = TEXPR_VALUE_NODE; pRight1->pVal = (tVariant*) calloc(1, sizeof(tVariant)); pRight1->pVal->nType = TSDB_DATA_TYPE_DOUBLE; pRight1->pVal->dKey = 91.99; auto *p2 = (tExprNode*) calloc(1, sizeof(tExprNode)); - p2->nodeType = TSQL_NODE_EXPR; + p2->nodeType = TEXPR_NODE_EXPR; p2->_node.optr = TSDB_RELATION_GREATER_EQUAL; p2->_node.pLeft = pLeft1; @@ -538,7 +538,7 @@ tExprNode* createExpr2() { p2->_node.hasPK = false; auto *pRoot = (tExprNode*) calloc(1, sizeof(tExprNode)); - pRoot->nodeType = TSQL_NODE_EXPR; + pRoot->nodeType = TEXPR_NODE_EXPR; pRoot->_node.optr = TSDB_RELATION_OR; pRoot->_node.pLeft = p1; @@ -605,11 +605,11 @@ void exprSerializeTest2() { ASSERT_EQ(c1Left->nodeType, c2Left->nodeType); - ASSERT_EQ(c2Left->nodeType, TSQL_NODE_EXPR); + ASSERT_EQ(c2Left->nodeType, TEXPR_NODE_EXPR); ASSERT_EQ(c2Left->_node.optr, TSDB_RELATION_LIKE); ASSERT_STRCASEEQ(c2Left->_node.pLeft->pSchema->name, "col_a"); - ASSERT_EQ(c2Left->_node.pRight->nodeType, TSQL_NODE_VALUE); + ASSERT_EQ(c2Left->_node.pRight->nodeType, TEXPR_VALUE_NODE); ASSERT_STRCASEEQ(c2Left->_node.pRight->pVal->pz, "hello world!"); @@ -617,7 +617,7 @@ void exprSerializeTest2() { tExprNode* c2Right = p2->_node.pRight; ASSERT_EQ(c1Right->nodeType, c2Right->nodeType); - ASSERT_EQ(c2Right->nodeType, TSQL_NODE_EXPR); + ASSERT_EQ(c2Right->nodeType, TEXPR_NODE_EXPR); ASSERT_EQ(c2Right->_node.optr, TSDB_RELATION_GREATER_EQUAL); ASSERT_EQ(c2Right->_node.pRight->pVal->dKey, 91.99);