diff --git a/Jenkinsfile b/Jenkinsfile index dc7836c3da..edbe11d428 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -14,10 +14,12 @@ pipeline { sh ''' date cd ${WKC} + git reset --hard git checkout develop git pull git submodule update cd ${WK} + git reset --hard git checkout develop git pull export TZ=Asia/Harbin @@ -39,11 +41,13 @@ pipeline { steps { sh ''' cd ${WKC} + git reset --hard git checkout develop git pull git submodule update cd ${WK} + git reset --hard git checkout develop git pull export TZ=Asia/Harbin @@ -65,11 +69,13 @@ pipeline { steps { sh ''' cd ${WKC} + git reset --hard git checkout develop git pull git submodule update cd ${WK} + git reset --hard git checkout develop git pull export TZ=Asia/Harbin @@ -108,11 +114,13 @@ pipeline { steps { sh ''' cd ${WKC} + git reset --hard git checkout develop git pull git submodule update cd ${WK} + git reset --hard git checkout develop git pull export TZ=Asia/Harbin @@ -167,7 +175,47 @@ pipeline { } } - + stage('arm64_build'){ + agent{label 'arm64'} + steps{ + sh ''' + cd ${WK} + git fetch + git checkout develop + git pull + cd ${WKC} + git fetch + git checkout develop + git pull + git submodule update + cd ${WKC}/packaging + ./release.sh -v cluster -c aarch64 -n 2.0.0.0 -m 2.0.0.0 + + ''' + } + } + stage('arm32_build'){ + agent{label 'arm32'} + steps{ + catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE') { + sh ''' + cd ${WK} + git fetch + git checkout develop + git pull + cd ${WKC} + git fetch + git checkout develop + git pull + git submodule update + cd ${WKC}/packaging + ./release.sh -v cluster -c aarch32 -n 2.0.0.0 -m 2.0.0.0 + + ''' + } + + } + } } } diff --git a/documentation20/webdocs/markdowndocs/administrator-ch.md b/documentation20/webdocs/markdowndocs/administrator-ch.md index 36466d2b7e..f54c6b91a1 100644 --- a/documentation20/webdocs/markdowndocs/administrator-ch.md +++ b/documentation20/webdocs/markdowndocs/administrator-ch.md @@ -95,6 +95,7 @@ TDengine系统后台服务由taosd提供,可以在配置文件taos.cfg里修 - logKeepDays:日志文件的最长保存时间。大于0时,日志文件会被重命名为taosdlog.xxx,其中xxx为日志文件最后修改的时间戳,单位为秒。默认值:0天。 - maxSQLLength:单条SQL语句允许最长限制。默认值:65380字节。 - telemetryReporting: 是否允许 TDengine 采集和上报基本使用信息,0表示不允许,1表示允许。 默认值:1。 +- stream: 是否启用连续查询(流计算功能),0表示不允许,1表示允许。 默认值:1。 **注意:**对于端口,TDengine会使用从serverPort起13个连续的TCP和UDP端口号,请务必在防火墙打开。因此如果是缺省配置,需要打开从6030都6042共13个端口,而且必须TCP和UDP都打开。 diff --git a/documentation20/webdocs/markdowndocs/connector-ch.md b/documentation20/webdocs/markdowndocs/connector-ch.md index c5a955f43f..69c560bbc4 100644 --- a/documentation20/webdocs/markdowndocs/connector-ch.md +++ b/documentation20/webdocs/markdowndocs/connector-ch.md @@ -142,7 +142,7 @@ C/C++的API类似于MySQL的C API。应用程序使用时,需要包含TDengine 获取最近一次API调用失败的原因,返回值为错误代码。 -**注意**:对于单个数据库连接,在同一时刻只能有一个线程使用该连接调用API,否则会有未定义的行为出现并可能导致客户端crash。客户端应用可以通过建立多个连接进行多线程的数据写入或查询处理。 +**注意**:对于每个数据库应用,2.0及以上版本 TDengine 推荐只建立一个连接。同时在应用中将该连接 (TAOS*) 结构体传递到不同的线程共享使用。基于 TAOS 结构体发出的查询、写入等操作具有多线程安全性。C 语言的连接器可以按照需求动态建立面向数据库的新连接(该过程对用户不可见),同时建议只有在程序最后退出的时候才调用 taos_close 关闭连接。 ### 异步查询API diff --git a/packaging/cfg/taos.cfg b/packaging/cfg/taos.cfg index ca88bca3c8..ff4beea6e2 100644 --- a/packaging/cfg/taos.cfg +++ b/packaging/cfg/taos.cfg @@ -236,7 +236,7 @@ # httpDebugFlag 131 # debug flag for monitor -# monitorDebugFlag 131 +# monDebugFlag 131 # debug flag for query # qDebugflag 131 @@ -260,4 +260,7 @@ # maxBinaryDisplayWidth 30 # enable/disable telemetry reporting -# telemetryReporting 1 \ No newline at end of file +# telemetryReporting 1 + +# enable/disable stream (continuous query) +# stream 1 diff --git a/packaging/tools/install.sh b/packaging/tools/install.sh index ddf7114f08..d6dccf7045 100755 --- a/packaging/tools/install.sh +++ b/packaging/tools/install.sh @@ -172,6 +172,7 @@ function install_bin() { ${csudo} rm -f ${bin_link_dir}/taos || : ${csudo} rm -f ${bin_link_dir}/taosd || : ${csudo} rm -f ${bin_link_dir}/taosdemo || : + ${csudo} rm -f ${bin_link_dir}/taosdump || : ${csudo} rm -f ${bin_link_dir}/rmtaos || : ${csudo} rm -f ${bin_link_dir}/tarbitrator || : ${csudo} rm -f ${bin_link_dir}/set_core || : @@ -182,6 +183,7 @@ function install_bin() { [ -x ${install_main_dir}/bin/taos ] && ${csudo} ln -s ${install_main_dir}/bin/taos ${bin_link_dir}/taos || : [ -x ${install_main_dir}/bin/taosd ] && ${csudo} ln -s ${install_main_dir}/bin/taosd ${bin_link_dir}/taosd || : [ -x ${install_main_dir}/bin/taosdemo ] && ${csudo} ln -s ${install_main_dir}/bin/taosdemo ${bin_link_dir}/taosdemo || : + [ -x ${install_main_dir}/bin/taosdump ] && ${csudo} ln -s ${install_main_dir}/bin/taosdump ${bin_link_dir}/taosdump || : [ -x ${install_main_dir}/bin/remove.sh ] && ${csudo} ln -s ${install_main_dir}/bin/remove.sh ${bin_link_dir}/rmtaos || : [ -x ${install_main_dir}/bin/set_core.sh ] && ${csudo} ln -s ${install_main_dir}/bin/set_core.sh ${bin_link_dir}/set_core || : [ -x ${install_main_dir}/bin/tarbitrator ] && ${csudo} ln -s ${install_main_dir}/bin/tarbitrator ${bin_link_dir}/tarbitrator || : diff --git a/packaging/tools/install_client.sh b/packaging/tools/install_client.sh index 34a9bfaecb..0467300953 100755 --- a/packaging/tools/install_client.sh +++ b/packaging/tools/install_client.sh @@ -84,8 +84,9 @@ function install_main_path() { function install_bin() { # Remove links ${csudo} rm -f ${bin_link_dir}/taos || : - if [ "$osType" == "Darwin" ]; then + if [ "$osType" != "Darwin" ]; then ${csudo} rm -f ${bin_link_dir}/taosdemo || : + ${csudo} rm -f ${bin_link_dir}/taosdump || : fi ${csudo} rm -f ${bin_link_dir}/rmtaos || : ${csudo} rm -f ${bin_link_dir}/set_core || : @@ -94,8 +95,9 @@ function install_bin() { #Make link [ -x ${install_main_dir}/bin/taos ] && ${csudo} ln -s ${install_main_dir}/bin/taos ${bin_link_dir}/taos || : - if [ "$osType" == "Darwin" ]; then + if [ "$osType" != "Darwin" ]; then [ -x ${install_main_dir}/bin/taosdemo ] && ${csudo} ln -s ${install_main_dir}/bin/taosdemo ${bin_link_dir}/taosdemo || : + [ -x ${install_main_dir}/bin/taosdump ] && ${csudo} ln -s ${install_main_dir}/bin/taosdump ${bin_link_dir}/taosdump || : fi [ -x ${install_main_dir}/bin/remove_client.sh ] && ${csudo} ln -s ${install_main_dir}/bin/remove_client.sh ${bin_link_dir}/rmtaos || : [ -x ${install_main_dir}/bin/set_core.sh ] && ${csudo} ln -s ${install_main_dir}/bin/set_core.sh ${bin_link_dir}/set_core || : diff --git a/packaging/tools/install_client_power.sh b/packaging/tools/install_client_power.sh index 0108d1d44a..26977e12f4 100755 --- a/packaging/tools/install_client_power.sh +++ b/packaging/tools/install_client_power.sh @@ -84,8 +84,9 @@ function install_main_path() { function install_bin() { # Remove links ${csudo} rm -f ${bin_link_dir}/power || : - if [ "$osType" == "Darwin" ]; then + if [ "$osType" != "Darwin" ]; then ${csudo} rm -f ${bin_link_dir}/powerdemo || : + ${csudo} rm -f ${bin_link_dir}/powerdump || : fi ${csudo} rm -f ${bin_link_dir}/rmpower || : ${csudo} rm -f ${bin_link_dir}/set_core || : @@ -94,8 +95,9 @@ function install_bin() { #Make link [ -x ${install_main_dir}/bin/power ] && ${csudo} ln -s ${install_main_dir}/bin/power ${bin_link_dir}/power || : - if [ "$osType" == "Darwin" ]; then + if [ "$osType" != "Darwin" ]; then [ -x ${install_main_dir}/bin/powerdemo ] && ${csudo} ln -s ${install_main_dir}/bin/powerdemo ${bin_link_dir}/powerdemo || : + [ -x ${install_main_dir}/bin/powerdump ] && ${csudo} ln -s ${install_main_dir}/bin/powerdump ${bin_link_dir}/powerdump || : fi [ -x ${install_main_dir}/bin/remove_client_power.sh ] && ${csudo} ln -s ${install_main_dir}/bin/remove_client_power.sh ${bin_link_dir}/rmpower || : [ -x ${install_main_dir}/bin/set_core.sh ] && ${csudo} ln -s ${install_main_dir}/bin/set_core.sh ${bin_link_dir}/set_core || : diff --git a/packaging/tools/install_power.sh b/packaging/tools/install_power.sh index df6291f4ae..5929b52afc 100755 --- a/packaging/tools/install_power.sh +++ b/packaging/tools/install_power.sh @@ -172,6 +172,7 @@ function install_bin() { ${csudo} rm -f ${bin_link_dir}/power || : ${csudo} rm -f ${bin_link_dir}/powerd || : ${csudo} rm -f ${bin_link_dir}/powerdemo || : + ${csudo} rm -f ${bin_link_dir}/powerdump || : ${csudo} rm -f ${bin_link_dir}/rmpower || : ${csudo} rm -f ${bin_link_dir}/tarbitrator || : ${csudo} rm -f ${bin_link_dir}/set_core || : @@ -182,6 +183,7 @@ function install_bin() { [ -x ${install_main_dir}/bin/power ] && ${csudo} ln -s ${install_main_dir}/bin/power ${bin_link_dir}/power || : [ -x ${install_main_dir}/bin/powerd ] && ${csudo} ln -s ${install_main_dir}/bin/powerd ${bin_link_dir}/powerd || : [ -x ${install_main_dir}/bin/powerdemo ] && ${csudo} ln -s ${install_main_dir}/bin/powerdemo ${bin_link_dir}/powerdemo || : + [ -x ${install_main_dir}/bin/powerdump ] && ${csudo} ln -s ${install_main_dir}/bin/powerdump ${bin_link_dir}/powerdump || : [ -x ${install_main_dir}/bin/remove_power.sh ] && ${csudo} ln -s ${install_main_dir}/bin/remove_power.sh ${bin_link_dir}/rmpower || : [ -x ${install_main_dir}/bin/set_core.sh ] && ${csudo} ln -s ${install_main_dir}/bin/set_core.sh ${bin_link_dir}/set_core || : [ -x ${install_main_dir}/bin/tarbitrator ] && ${csudo} ln -s ${install_main_dir}/bin/tarbitrator ${bin_link_dir}/tarbitrator || : diff --git a/packaging/tools/post.sh b/packaging/tools/post.sh index 00705fad77..52919976ee 100755 --- a/packaging/tools/post.sh +++ b/packaging/tools/post.sh @@ -92,6 +92,7 @@ function install_bin() { ${csudo} rm -f ${bin_link_dir}/taos || : ${csudo} rm -f ${bin_link_dir}/taosd || : ${csudo} rm -f ${bin_link_dir}/taosdemo || : + ${csudo} rm -f ${bin_link_dir}/taosdump || : ${csudo} rm -f ${bin_link_dir}/rmtaos || : ${csudo} rm -f ${bin_link_dir}/set_core || : @@ -101,6 +102,7 @@ function install_bin() { [ -x ${bin_dir}/taos ] && ${csudo} ln -s ${bin_dir}/taos ${bin_link_dir}/taos || : [ -x ${bin_dir}/taosd ] && ${csudo} ln -s ${bin_dir}/taosd ${bin_link_dir}/taosd || : [ -x ${bin_dir}/taosdemo ] && ${csudo} ln -s ${bin_dir}/taosdemo ${bin_link_dir}/taosdemo || : + [ -x ${bin_dir}/taosdump ] && ${csudo} ln -s ${bin_dir}/taosdump ${bin_link_dir}/taosdump || : [ -x ${bin_dir}/set_core.sh ] && ${csudo} ln -s ${bin_dir}/set_core.sh ${bin_link_dir}/set_core || : } diff --git a/packaging/tools/remove.sh b/packaging/tools/remove.sh index e9a4f48cf7..2f2660d446 100755 --- a/packaging/tools/remove.sh +++ b/packaging/tools/remove.sh @@ -72,6 +72,7 @@ function clean_bin() { ${csudo} rm -f ${bin_link_dir}/taos || : ${csudo} rm -f ${bin_link_dir}/taosd || : ${csudo} rm -f ${bin_link_dir}/taosdemo || : + ${csudo} rm -f ${bin_link_dir}/taosdump || : ${csudo} rm -f ${bin_link_dir}/rmtaos || : ${csudo} rm -f ${bin_link_dir}/tarbitrator || : ${csudo} rm -f ${bin_link_dir}/set_core || : @@ -222,4 +223,4 @@ elif echo $osinfo | grep -qwi "centos" ; then fi echo -e "${GREEN}TDengine is removed successfully!${NC}" -echo \ No newline at end of file +echo diff --git a/packaging/tools/remove_client.sh b/packaging/tools/remove_client.sh index 2c28b7b6bf..7579162dc6 100755 --- a/packaging/tools/remove_client.sh +++ b/packaging/tools/remove_client.sh @@ -38,6 +38,7 @@ function clean_bin() { # Remove link ${csudo} rm -f ${bin_link_dir}/taos || : ${csudo} rm -f ${bin_link_dir}/taosdemo || : + ${csudo} rm -f ${bin_link_dir}/taosdump || : ${csudo} rm -f ${bin_link_dir}/rmtaos || : ${csudo} rm -f ${bin_link_dir}/set_core || : } diff --git a/packaging/tools/remove_client_power.sh b/packaging/tools/remove_client_power.sh index 7a3c99e100..580c46e207 100755 --- a/packaging/tools/remove_client_power.sh +++ b/packaging/tools/remove_client_power.sh @@ -38,6 +38,7 @@ function clean_bin() { # Remove link ${csudo} rm -f ${bin_link_dir}/power || : ${csudo} rm -f ${bin_link_dir}/powerdemo || : + ${csudo} rm -f ${bin_link_dir}/powerdump || : ${csudo} rm -f ${bin_link_dir}/rmpower || : ${csudo} rm -f ${bin_link_dir}/set_core || : } diff --git a/packaging/tools/remove_power.sh b/packaging/tools/remove_power.sh index d6d6c5dd7c..816869cf44 100755 --- a/packaging/tools/remove_power.sh +++ b/packaging/tools/remove_power.sh @@ -72,6 +72,7 @@ function clean_bin() { ${csudo} rm -f ${bin_link_dir}/power || : ${csudo} rm -f ${bin_link_dir}/powerd || : ${csudo} rm -f ${bin_link_dir}/powerdemo || : + ${csudo} rm -f ${bin_link_dir}/powerdump || : ${csudo} rm -f ${bin_link_dir}/rmpower || : ${csudo} rm -f ${bin_link_dir}/tarbitrator || : ${csudo} rm -f ${bin_link_dir}/set_core || : @@ -223,4 +224,4 @@ fi #fi echo -e "${GREEN}PowerDB is removed successfully!${NC}" -echo \ No newline at end of file +echo diff --git a/src/balance/inc/bnInt.h b/src/balance/inc/bnInt.h new file mode 100644 index 0000000000..e924776ff1 --- /dev/null +++ b/src/balance/inc/bnInt.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TDENGINE_BALANCE_INT_H +#define TDENGINE_BALANCE_INT_H + +#ifdef __cplusplus +extern "C" { +#endif +#include "mnodeInt.h" +#include "mnodeDef.h" +#include "mnodeDnode.h" + +typedef struct { + int32_t size; + int32_t maxSize; + SDnodeObj **list; +} SBnDnodes; + +typedef struct { + void * timer; + bool stop; + pthread_mutex_t mutex; + pthread_cond_t cond; + pthread_t thread; +} SBnThread; + +typedef struct { + pthread_mutex_t mutex; +} SBnMgmt; + +int32_t bnInit(); +void bnCleanUp(); +bool bnStart(); +void bnCheckStatus(); +void bnCheckModules(); + +extern SBnDnodes tsBnDnodes; +extern void *tsMnodeTmr; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/balance/inc/bnScore.h b/src/balance/inc/bnScore.h new file mode 100644 index 0000000000..a28c4459dd --- /dev/null +++ b/src/balance/inc/bnScore.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TDENGINE_BALANCE_SCORE_H +#define TDENGINE_BALANCE_SCORE_H + +#ifdef __cplusplus +extern "C" { +#endif +#include "bnInt.h" + +void bnInitDnodes(); +void bnCleanupDnodes(); +void bnAccquireDnodes(); +void bnReleaseDnodes(); +float bnTryCalcDnodeScore(SDnodeObj *pDnode, int32_t extraVnode); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/balance/inc/bnThread.h b/src/balance/inc/bnThread.h new file mode 100644 index 0000000000..8f54b66028 --- /dev/null +++ b/src/balance/inc/bnThread.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TDENGINE_BALANCE_THREAD_H +#define TDENGINE_BALANCE_THREAD_H + +#ifdef __cplusplus +extern "C" { +#endif +#include "bnInt.h" + +int32_t bnInitThread(); +void bnCleanupThread(); +void bnNotify(); +void bnStartTimer(int64_t mseconds); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/balance/src/balance.c b/src/balance/src/bnMain.c similarity index 50% rename from src/balance/src/balance.c rename to src/balance/src/bnMain.c index df78f4fe27..383f981913 100644 --- a/src/balance/src/balance.c +++ b/src/balance/src/bnMain.c @@ -15,17 +15,12 @@ #define _DEFAULT_SOURCE #include "os.h" -#include "tutil.h" -#include "tbalance.h" #include "tsync.h" -#include "ttimer.h" #include "tglobal.h" -#include "tdataformat.h" #include "dnode.h" -#include "mnode.h" -#include "mnodeDef.h" -#include "mnodeInt.h" -#include "mnodeDnode.h" +#include "bnInt.h" +#include "bnScore.h" +#include "bnThread.h" #include "mnodeDb.h" #include "mnodeMnode.h" #include "mnodeSdb.h" @@ -33,36 +28,18 @@ #include "mnodeUser.h" #include "mnodeVgroup.h" -/* - * once sdb work as mater, then tsAccessSquence reset to zero - * increase tsAccessSquence every balance interval - */ -extern void * tsMnodeTmr; -static void * tsBalanceTimer = NULL; -static int32_t tsBalanceDnodeListSize = 0; -static SDnodeObj ** tsBalanceDnodeList = NULL; -static int32_t tsBalanceDnodeListMallocSize = 16; -static pthread_mutex_t tsBalanceMutex; +static SBnMgmt tsBnMgmt;; +static void bnMonitorDnodeModule(); -static void balanceStartTimer(int64_t mseconds); -static void balanceInitDnodeList(); -static void balanceCleanupDnodeList(); -static void balanceAccquireDnodeList(); -static void balanceReleaseDnodeList(); -static void balanceMonitorDnodeModule(); -static float balanceTryCalcDnodeScore(SDnodeObj *pDnode, int32_t extraVnode); -static int32_t balanceGetScoresMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn); -static int32_t balanceRetrieveScores(SShowObj *pShow, char *data, int32_t rows, void *pConn); - -static void balanceLock() { - pthread_mutex_lock(&tsBalanceMutex); +static void bnLock() { + pthread_mutex_lock(&tsBnMgmt.mutex); } -static void balanceUnLock() { - pthread_mutex_unlock(&tsBalanceMutex); +static void bnUnLock() { + pthread_mutex_unlock(&tsBnMgmt.mutex); } -static bool balanceCheckFree(SDnodeObj *pDnode) { +static bool bnCheckFree(SDnodeObj *pDnode) { if (pDnode->status == TAOS_DN_STATUS_DROPPING || pDnode->status == TAOS_DN_STATUS_OFFLINE) { mError("dnode:%d, status:%s not available", pDnode->dnodeId, mnodeGetDnodeStatusStr(pDnode->status)); return false; @@ -86,7 +63,7 @@ static bool balanceCheckFree(SDnodeObj *pDnode) { return true; } -static void balanceDiscardVnode(SVgObj *pVgroup, SVnodeGid *pVnodeGid) { +static void bnDiscardVnode(SVgObj *pVgroup, SVnodeGid *pVnodeGid) { mDebug("vgId:%d, dnode:%d is dropping", pVgroup->vgId, pVnodeGid->dnodeId); SDnodeObj *pDnode = mnodeGetDnode(pVnodeGid->dnodeId); @@ -111,27 +88,26 @@ static void balanceDiscardVnode(SVgObj *pVgroup, SVnodeGid *pVnodeGid) { mnodeUpdateVgroup(pVgroup); } -static void balanceSwapVnodeGid(SVnodeGid *pVnodeGid1, SVnodeGid *pVnodeGid2) { +static void bnSwapVnodeGid(SVnodeGid *pVnodeGid1, SVnodeGid *pVnodeGid2) { // SVnodeGid tmp = *pVnodeGid1; // *pVnodeGid1 = *pVnodeGid2; // *pVnodeGid2 = tmp; } -int32_t balanceAllocVnodes(SVgObj *pVgroup) { +int32_t bnAllocVnodes(SVgObj *pVgroup) { static int32_t randIndex = 0; int32_t dnode = 0; int32_t vnodes = 0; - balanceLock(); - - balanceAccquireDnodeList(); + bnLock(); + bnAccquireDnodes(); mDebug("db:%s, try alloc %d vnodes to vgroup, dnodes total:%d, avail:%d", pVgroup->dbName, pVgroup->numOfVnodes, - mnodeGetDnodesNum(), tsBalanceDnodeListSize); + mnodeGetDnodesNum(), tsBnDnodes.size); for (int32_t i = 0; i < pVgroup->numOfVnodes; ++i) { - for (; dnode < tsBalanceDnodeListSize; ++dnode) { - SDnodeObj *pDnode = tsBalanceDnodeList[dnode]; - if (balanceCheckFree(pDnode)) { + for (; dnode < tsBnDnodes.size; ++dnode) { + SDnodeObj *pDnode = tsBnDnodes.list[dnode]; + if (bnCheckFree(pDnode)) { SVnodeGid *pVnodeGid = pVgroup->vnodeGid + i; pVnodeGid->dnodeId = pDnode->dnodeId; pVnodeGid->pDnode = pDnode; @@ -148,8 +124,8 @@ int32_t balanceAllocVnodes(SVgObj *pVgroup) { } if (vnodes != pVgroup->numOfVnodes) { - balanceReleaseDnodeList(); - balanceUnLock(); + bnReleaseDnodes(); + bnUnLock(); mDebug("db:%s, need vnodes:%d, but alloc:%d", pVgroup->dbName, pVgroup->numOfVnodes, vnodes); @@ -162,7 +138,6 @@ int32_t balanceAllocVnodes(SVgObj *pVgroup) { pDnode->openVnodes, pDnode->diskAvailable, pDnode->alternativeRole); mnodeDecDnodeRef(pDnode); } - sdbFreeIter(pIter); if (mnodeGetOnlineDnodesNum() == 0) { return TSDB_CODE_MND_NOT_READY; @@ -180,33 +155,33 @@ int32_t balanceAllocVnodes(SVgObj *pVgroup) { if (pVgroup->numOfVnodes == 1) { } else if (pVgroup->numOfVnodes == 2) { if (randIndex++ % 2 == 0) { - balanceSwapVnodeGid(pVgroup->vnodeGid, pVgroup->vnodeGid + 1); + bnSwapVnodeGid(pVgroup->vnodeGid, pVgroup->vnodeGid + 1); } } else { int32_t randVal = randIndex++ % 6; if (randVal == 1) { // 1, 0, 2 - balanceSwapVnodeGid(pVgroup->vnodeGid + 0, pVgroup->vnodeGid + 1); + bnSwapVnodeGid(pVgroup->vnodeGid + 0, pVgroup->vnodeGid + 1); } else if (randVal == 2) { // 1, 2, 0 - balanceSwapVnodeGid(pVgroup->vnodeGid + 0, pVgroup->vnodeGid + 1); - balanceSwapVnodeGid(pVgroup->vnodeGid + 1, pVgroup->vnodeGid + 2); + bnSwapVnodeGid(pVgroup->vnodeGid + 0, pVgroup->vnodeGid + 1); + bnSwapVnodeGid(pVgroup->vnodeGid + 1, pVgroup->vnodeGid + 2); } else if (randVal == 3) { // 2, 1, 0 - balanceSwapVnodeGid(pVgroup->vnodeGid + 0, pVgroup->vnodeGid + 2); + bnSwapVnodeGid(pVgroup->vnodeGid + 0, pVgroup->vnodeGid + 2); } else if (randVal == 4) { // 2, 0, 1 - balanceSwapVnodeGid(pVgroup->vnodeGid + 0, pVgroup->vnodeGid + 2); - balanceSwapVnodeGid(pVgroup->vnodeGid + 1, pVgroup->vnodeGid + 2); + bnSwapVnodeGid(pVgroup->vnodeGid + 0, pVgroup->vnodeGid + 2); + bnSwapVnodeGid(pVgroup->vnodeGid + 1, pVgroup->vnodeGid + 2); } if (randVal == 5) { // 0, 2, 1 - balanceSwapVnodeGid(pVgroup->vnodeGid + 1, pVgroup->vnodeGid + 2); + bnSwapVnodeGid(pVgroup->vnodeGid + 1, pVgroup->vnodeGid + 2); } else { } // 0, 1, 2 } - balanceReleaseDnodeList(); - balanceUnLock(); + bnReleaseDnodes(); + bnUnLock(); return TSDB_CODE_SUCCESS; } -static bool balanceCheckVgroupReady(SVgObj *pVgroup, SVnodeGid *pRmVnode) { +static bool bnCheckVgroupReady(SVgObj *pVgroup, SVnodeGid *pRmVnode) { if (pVgroup->lbTime + 5 * tsStatusInterval > tsAccessSquence) { return false; } @@ -233,7 +208,7 @@ static bool balanceCheckVgroupReady(SVgObj *pVgroup, SVnodeGid *pRmVnode) { * desc: remove one vnode from vgroup * all vnodes in vgroup should in ready state, except the balancing one **/ -static int32_t balanceRemoveVnode(SVgObj *pVgroup) { +static int32_t bnRemoveVnode(SVgObj *pVgroup) { if (pVgroup->numOfVnodes <= 1) return -1; SVnodeGid *pRmVnode = NULL; @@ -275,17 +250,17 @@ static int32_t balanceRemoveVnode(SVgObj *pVgroup) { pSelVnode = pRmVnode; } - if (!balanceCheckVgroupReady(pVgroup, pSelVnode)) { + if (!bnCheckVgroupReady(pVgroup, pSelVnode)) { mDebug("vgId:%d, is not ready", pVgroup->vgId); return -1; } else { mDebug("vgId:%d, is ready, discard dnode:%d", pVgroup->vgId, pSelVnode->dnodeId); - balanceDiscardVnode(pVgroup, pSelVnode); + bnDiscardVnode(pVgroup, pSelVnode); return TSDB_CODE_SUCCESS; } } -static bool balanceCheckDnodeInVgroup(SDnodeObj *pDnode, SVgObj *pVgroup) { +static bool bnCheckDnodeInVgroup(SDnodeObj *pDnode, SVgObj *pVgroup) { for (int32_t i = 0; i < pVgroup->numOfVnodes; ++i) { SVnodeGid *pGid = &pVgroup->vnodeGid[i]; if (pGid->dnodeId == 0) break; @@ -300,13 +275,13 @@ static bool balanceCheckDnodeInVgroup(SDnodeObj *pDnode, SVgObj *pVgroup) { /** * desc: add vnode to vgroup, find a new one if dest dnode is null **/ -static int32_t balanceAddVnode(SVgObj *pVgroup, SDnodeObj *pSrcDnode, SDnodeObj *pDestDnode) { +static int32_t bnAddVnode(SVgObj *pVgroup, SDnodeObj *pSrcDnode, SDnodeObj *pDestDnode) { if (pDestDnode == NULL) { - for (int32_t i = 0; i < tsBalanceDnodeListSize; ++i) { - SDnodeObj *pDnode = tsBalanceDnodeList[i]; + for (int32_t i = 0; i < tsBnDnodes.size; ++i) { + SDnodeObj *pDnode = tsBnDnodes.list[i]; if (pDnode == pSrcDnode) continue; - if (balanceCheckDnodeInVgroup(pDnode, pVgroup)) continue; - if (!balanceCheckFree(pDnode)) continue; + if (bnCheckDnodeInVgroup(pDnode, pVgroup)) continue; + if (!bnCheckFree(pDnode)) continue; pDestDnode = pDnode; mDebug("vgId:%d, add vnode to dnode:%d", pVgroup->vgId, pDnode->dnodeId); @@ -334,25 +309,25 @@ static int32_t balanceAddVnode(SVgObj *pVgroup, SDnodeObj *pSrcDnode, SDnodeObj return TSDB_CODE_SUCCESS; } -static bool balanceMonitorBalance() { - if (tsBalanceDnodeListSize < 2) return false; +static bool bnMonitorBalance() { + if (tsBnDnodes.size < 2) return false; - for (int32_t src = tsBalanceDnodeListSize - 1; src >= 0; --src) { - SDnodeObj *pDnode = tsBalanceDnodeList[src]; - mDebug("%d-dnode:%d, state:%s, score:%.1f, numOfCores:%d, openVnodes:%d", tsBalanceDnodeListSize - src - 1, + for (int32_t src = tsBnDnodes.size - 1; src >= 0; --src) { + SDnodeObj *pDnode = tsBnDnodes.list[src]; + mDebug("%d-dnode:%d, state:%s, score:%.1f, numOfCores:%d, openVnodes:%d", tsBnDnodes.size - src - 1, pDnode->dnodeId, mnodeGetDnodeStatusStr(pDnode->status), pDnode->score, pDnode->numOfCores, pDnode->openVnodes); } - float scoresDiff = tsBalanceDnodeList[tsBalanceDnodeListSize - 1]->score - tsBalanceDnodeList[0]->score; + float scoresDiff = tsBnDnodes.list[tsBnDnodes.size - 1]->score - tsBnDnodes.list[0]->score; if (scoresDiff < 0.01) { - mDebug("all dnodes:%d is already balanced, scoresDiff:%f", tsBalanceDnodeListSize, scoresDiff); + mDebug("all dnodes:%d is already balanced, scoresDiff:%f", tsBnDnodes.size, scoresDiff); return false; } - for (int32_t src = tsBalanceDnodeListSize - 1; src > 0; --src) { - SDnodeObj *pSrcDnode = tsBalanceDnodeList[src]; - float srcScore = balanceTryCalcDnodeScore(pSrcDnode, -1); + for (int32_t src = tsBnDnodes.size - 1; src > 0; --src) { + SDnodeObj *pSrcDnode = tsBnDnodes.list[src]; + float srcScore = bnTryCalcDnodeScore(pSrcDnode, -1); if (tsEnableBalance == 0 && pSrcDnode->status != TAOS_DN_STATUS_DROPPING) { continue; } @@ -363,29 +338,27 @@ static bool balanceMonitorBalance() { pIter = mnodeGetNextVgroup(pIter, &pVgroup); if (pVgroup == NULL) break; - if (balanceCheckDnodeInVgroup(pSrcDnode, pVgroup)) { + if (bnCheckDnodeInVgroup(pSrcDnode, pVgroup)) { for (int32_t dest = 0; dest < src; dest++) { - SDnodeObj *pDestDnode = tsBalanceDnodeList[dest]; - if (balanceCheckDnodeInVgroup(pDestDnode, pVgroup)) continue; + SDnodeObj *pDestDnode = tsBnDnodes.list[dest]; + if (bnCheckDnodeInVgroup(pDestDnode, pVgroup)) continue; - float destScore = balanceTryCalcDnodeScore(pDestDnode, 1); + float destScore = bnTryCalcDnodeScore(pDestDnode, 1); if (srcScore + 0.0001 < destScore) continue; - if (!balanceCheckFree(pDestDnode)) continue; + if (!bnCheckFree(pDestDnode)) continue; mDebug("vgId:%d, balance from dnode:%d to dnode:%d, srcScore:%.1f:%.1f, destScore:%.1f:%.1f", pVgroup->vgId, pSrcDnode->dnodeId, pDestDnode->dnodeId, pSrcDnode->score, srcScore, pDestDnode->score, destScore); - balanceAddVnode(pVgroup, pSrcDnode, pDestDnode); + bnAddVnode(pVgroup, pSrcDnode, pDestDnode); mnodeDecVgroupRef(pVgroup); - sdbFreeIter(pIter); + mnodeCancelGetNextVgroup(pIter); return true; } } mnodeDecVgroupRef(pVgroup); } - - sdbFreeIter(pIter); } return false; @@ -395,7 +368,7 @@ static bool balanceMonitorBalance() { // 1. reset balanceAccessSquence to zero // 2. reset state of dnodes to offline // 3. reset lastAccess of dnodes to zero -void balanceReset() { +void bnReset() { void * pIter = NULL; SDnodeObj *pDnode = NULL; while (1) { @@ -413,12 +386,10 @@ void balanceReset() { mnodeDecDnodeRef(pDnode); } - sdbFreeIter(pIter); - tsAccessSquence = 0; } -static int32_t balanceMonitorVgroups() { +static int32_t bnMonitorVgroups() { void * pIter = NULL; SVgObj *pVgroup = NULL; bool hasUpdatingVgroup = false; @@ -434,25 +405,24 @@ static int32_t balanceMonitorVgroups() { if (vgReplica > dbReplica) { mInfo("vgId:%d, replica:%d numOfVnodes:%d, try remove one vnode", pVgroup->vgId, dbReplica, vgReplica); hasUpdatingVgroup = true; - code = balanceRemoveVnode(pVgroup); + code = bnRemoveVnode(pVgroup); } else if (vgReplica < dbReplica) { mInfo("vgId:%d, replica:%d numOfVnodes:%d, try add one vnode", pVgroup->vgId, dbReplica, vgReplica); hasUpdatingVgroup = true; - code = balanceAddVnode(pVgroup, NULL, NULL); + code = bnAddVnode(pVgroup, NULL, NULL); } mnodeDecVgroupRef(pVgroup); if (code == TSDB_CODE_SUCCESS) { + mnodeCancelGetNextVgroup(pIter); break; } } - sdbFreeIter(pIter); - return hasUpdatingVgroup; } -static bool balanceMonitorDnodeDropping(SDnodeObj *pDnode) { +static bool bnMonitorDnodeDropping(SDnodeObj *pDnode) { mDebug("dnode:%d, in dropping state", pDnode->dnodeId); void * pIter = NULL; @@ -462,14 +432,15 @@ static bool balanceMonitorDnodeDropping(SDnodeObj *pDnode) { pIter = mnodeGetNextVgroup(pIter, &pVgroup); if (pVgroup == NULL) break; - hasThisDnode = balanceCheckDnodeInVgroup(pDnode, pVgroup); + hasThisDnode = bnCheckDnodeInVgroup(pDnode, pVgroup); mnodeDecVgroupRef(pVgroup); - if (hasThisDnode) break; + if (hasThisDnode) { + mnodeCancelGetNextVgroup(pIter); + break; + } } - sdbFreeIter(pIter); - if (!hasThisDnode) { mInfo("dnode:%d, dropped for all vnodes are moving to other dnodes", pDnode->dnodeId); mnodeDropDnode(pDnode, NULL); @@ -479,7 +450,7 @@ static bool balanceMonitorDnodeDropping(SDnodeObj *pDnode) { return false; } -static bool balanceMontiorDropping() { +static bool bnMontiorDropping() { void *pIter = NULL; SDnodeObj *pDnode = NULL; @@ -499,50 +470,46 @@ static bool balanceMontiorDropping() { pDnode->status = TAOS_DN_STATUS_DROPPING; mnodeUpdateDnode(pDnode); mnodeDecDnodeRef(pDnode); - sdbFreeIter(pIter); + mnodeCancelGetNextDnode(pIter); return true; } if (pDnode->status == TAOS_DN_STATUS_DROPPING) { - bool ret = balanceMonitorDnodeDropping(pDnode); + bool ret = bnMonitorDnodeDropping(pDnode); mnodeDecDnodeRef(pDnode); - sdbFreeIter(pIter); + mnodeCancelGetNextDnode(pIter); return ret; } } - sdbFreeIter(pIter); - return false; } -static bool balanceStart() { +bool bnStart() { if (!sdbIsMaster()) return false; - balanceLock(); + bnLock(); + bnAccquireDnodes(); - balanceAccquireDnodeList(); + bnMonitorDnodeModule(); - balanceMonitorDnodeModule(); - - bool updateSoon = balanceMontiorDropping(); + bool updateSoon = bnMontiorDropping(); if (!updateSoon) { - updateSoon = balanceMonitorVgroups(); + updateSoon = bnMonitorVgroups(); } if (!updateSoon) { - updateSoon = balanceMonitorBalance(); + updateSoon = bnMonitorBalance(); } - balanceReleaseDnodeList(); - - balanceUnLock(); + bnReleaseDnodes(); + bnUnLock(); return updateSoon; } -static void balanceSetVgroupOffline(SDnodeObj* pDnode) { +static void bnSetVgroupOffline(SDnodeObj* pDnode) { void *pIter = NULL; while (1) { SVgObj *pVgroup; @@ -556,11 +523,9 @@ static void balanceSetVgroupOffline(SDnodeObj* pDnode) { } mnodeDecVgroupRef(pVgroup); } - - sdbFreeIter(pIter); } -static void balanceCheckDnodeAccess() { +void bnCheckStatus() { void * pIter = NULL; SDnodeObj *pDnode = NULL; @@ -573,85 +538,39 @@ static void balanceCheckDnodeAccess() { pDnode->offlineReason = TAOS_DN_OFF_STATUS_MSG_TIMEOUT; mInfo("dnode:%d, set to offline state, access seq:%d last seq:%d laststat:%d", pDnode->dnodeId, tsAccessSquence, pDnode->lastAccess, pDnode->status); - balanceSetVgroupOffline(pDnode); + bnSetVgroupOffline(pDnode); } } mnodeDecDnodeRef(pDnode); } - - sdbFreeIter(pIter); } -static void balanceProcessBalanceTimer(void *handle, void *tmrId) { - if (!sdbIsMaster()) return; - - tsBalanceTimer = NULL; - tsAccessSquence ++; - - balanceCheckDnodeAccess(); - bool updateSoon = false; - - if (handle == NULL) { - if (tsAccessSquence % tsBalanceInterval == 0) { - mDebug("balance function is scheduled by timer"); - updateSoon = balanceStart(); - } - } else { - int64_t mseconds = (int64_t)handle; - mDebug("balance function is scheduled by event for %" PRId64 " mseconds arrived", mseconds); - updateSoon = balanceStart(); - } - - if (updateSoon) { - balanceStartTimer(1000); - } else { - taosTmrReset(balanceProcessBalanceTimer, tsStatusInterval * 1000, NULL, tsMnodeTmr, &tsBalanceTimer); - } -} - -static void balanceStartTimer(int64_t mseconds) { - taosTmrReset(balanceProcessBalanceTimer, mseconds, (void *)mseconds, tsMnodeTmr, &tsBalanceTimer); -} - -void balanceSyncNotify() { +void bnCheckModules() { if (sdbIsMaster()) { - balanceLock(); - balanceAccquireDnodeList(); - balanceMonitorDnodeModule(); - balanceReleaseDnodeList(); - balanceUnLock(); + bnLock(); + bnAccquireDnodes(); + bnMonitorDnodeModule(); + bnReleaseDnodes(); + bnUnLock(); } } -void balanceAsyncNotify() { - balanceStartTimer(500); -} - -int32_t balanceInit() { - mnodeAddShowMetaHandle(TSDB_MGMT_TABLE_SCORES, balanceGetScoresMeta); - mnodeAddShowRetrieveHandle(TSDB_MGMT_TABLE_SCORES, balanceRetrieveScores); - - pthread_mutex_init(&tsBalanceMutex, NULL); - balanceInitDnodeList(); - balanceStartTimer(2000); - mDebug("balance start fp:%p initialized", balanceProcessBalanceTimer); - - balanceReset(); +int32_t bnInit() { + pthread_mutex_init(&tsBnMgmt.mutex, NULL); + bnInitDnodes(); + bnInitThread(); + bnReset(); return 0; } -void balanceCleanUp() { - if (tsBalanceTimer != NULL) { - taosTmrStopA(&tsBalanceTimer); - pthread_mutex_destroy(&tsBalanceMutex); - tsBalanceTimer = NULL; - mDebug("stop balance timer"); - } - balanceCleanupDnodeList(); +void bnCleanUp() { + bnCleanupThread(); + bnCleanupDnodes(); + pthread_mutex_destroy(&tsBnMgmt.mutex); } -int32_t balanceDropDnode(SDnodeObj *pDnode) { +int32_t bnDropDnode(SDnodeObj *pDnode) { int32_t totalFreeVnodes = 0; void * pIter = NULL; SDnodeObj *pTempDnode = NULL; @@ -660,15 +579,13 @@ int32_t balanceDropDnode(SDnodeObj *pDnode) { pIter = mnodeGetNextDnode(pIter, &pTempDnode); if (pTempDnode == NULL) break; - if (pTempDnode != pDnode && balanceCheckFree(pTempDnode)) { + if (pTempDnode != pDnode && bnCheckFree(pTempDnode)) { totalFreeVnodes += (TSDB_MAX_VNODES - pTempDnode->openVnodes); } mnodeDecDnodeRef(pTempDnode); } - sdbFreeIter(pIter); - if (pDnode->openVnodes > totalFreeVnodes) { mError("dnode:%d, openVnodes:%d totalFreeVnodes:%d no enough dnodes", pDnode->dnodeId, pDnode->openVnodes, totalFreeVnodes); return TSDB_CODE_MND_NO_ENOUGH_DNODES; @@ -677,296 +594,17 @@ int32_t balanceDropDnode(SDnodeObj *pDnode) { pDnode->status = TAOS_DN_STATUS_DROPPING; mnodeUpdateDnode(pDnode); - balanceStartTimer(1100); + bnStartTimer(1100); return TSDB_CODE_SUCCESS; } -static int32_t balanceCalcCpuScore(SDnodeObj *pDnode) { - if (pDnode->cpuAvgUsage < 80) - return 0; - else if (pDnode->cpuAvgUsage < 90) - return 10; - else - return 50; -} - -static int32_t balanceCalcMemoryScore(SDnodeObj *pDnode) { - if (pDnode->memoryAvgUsage < 80) - return 0; - else if (pDnode->memoryAvgUsage < 90) - return 10; - else - return 50; -} - -static int32_t balanceCalcDiskScore(SDnodeObj *pDnode) { - if (pDnode->diskAvgUsage < 80) - return 0; - else if (pDnode->diskAvgUsage < 90) - return 10; - else - return 50; -} - -static int32_t balanceCalcBandwidthScore(SDnodeObj *pDnode) { - if (pDnode->bandwidthUsage < 30) - return 0; - else if (pDnode->bandwidthUsage < 80) - return 10; - else - return 50; -} - -static float balanceCalcModuleScore(SDnodeObj *pDnode) { - if (pDnode->numOfCores <= 0) return 0; - if (pDnode->isMgmt) { - return (float)tsMnodeEqualVnodeNum / pDnode->numOfCores; - } - return 0; -} - -static float balanceCalcVnodeScore(SDnodeObj *pDnode, int32_t extra) { - if (pDnode->status == TAOS_DN_STATUS_DROPPING || pDnode->status == TAOS_DN_STATUS_OFFLINE) return 100000000; - if (pDnode->numOfCores <= 0) return 0; - return (float)(pDnode->openVnodes + extra) / pDnode->numOfCores; -} - -/** - * calc singe score, such as cpu/memory/disk/bandwitdh/vnode - * 1. get the score config - * 2. if the value is out of range, use border data - * 3. otherwise use interpolation method - **/ -void balanceCalcDnodeScore(SDnodeObj *pDnode) { - pDnode->score = balanceCalcCpuScore(pDnode) + balanceCalcMemoryScore(pDnode) + balanceCalcDiskScore(pDnode) + - balanceCalcBandwidthScore(pDnode) + balanceCalcModuleScore(pDnode) + - balanceCalcVnodeScore(pDnode, 0) + pDnode->customScore; -} - -float balanceTryCalcDnodeScore(SDnodeObj *pDnode, int32_t extra) { - int32_t systemScore = balanceCalcCpuScore(pDnode) + balanceCalcMemoryScore(pDnode) + balanceCalcDiskScore(pDnode) + - balanceCalcBandwidthScore(pDnode); - float moduleScore = balanceCalcModuleScore(pDnode); - float vnodeScore = balanceCalcVnodeScore(pDnode, extra); - - float score = systemScore + moduleScore + vnodeScore + pDnode->customScore; - return score; -} - -static void balanceInitDnodeList() { - tsBalanceDnodeList = calloc(tsBalanceDnodeListMallocSize, sizeof(SDnodeObj *)); -} - -static void balanceCleanupDnodeList() { - if (tsBalanceDnodeList != NULL) { - free(tsBalanceDnodeList); - tsBalanceDnodeList = NULL; - } -} - -static void balanceCheckDnodeListSize(int32_t dnodesNum) { - if (tsBalanceDnodeListMallocSize <= dnodesNum) { - tsBalanceDnodeListMallocSize = dnodesNum * 2; - tsBalanceDnodeList = realloc(tsBalanceDnodeList, tsBalanceDnodeListMallocSize * sizeof(SDnodeObj *)); - } -} - -void balanceAccquireDnodeList() { - int32_t dnodesNum = mnodeGetDnodesNum(); - balanceCheckDnodeListSize(dnodesNum); - - void * pIter = NULL; - SDnodeObj *pDnode = NULL; - int32_t dnodeIndex = 0; - - while (1) { - if (dnodeIndex >= dnodesNum) break; - pIter = mnodeGetNextDnode(pIter, &pDnode); - if (pDnode == NULL) break; - if (pDnode->status == TAOS_DN_STATUS_OFFLINE) { - mnodeDecDnodeRef(pDnode); - continue; - } - - balanceCalcDnodeScore(pDnode); - - int32_t orderIndex = dnodeIndex; - for (; orderIndex > 0; --orderIndex) { - if (pDnode->score > tsBalanceDnodeList[orderIndex - 1]->score) { - break; - } - tsBalanceDnodeList[orderIndex] = tsBalanceDnodeList[orderIndex - 1]; - } - tsBalanceDnodeList[orderIndex] = pDnode; - dnodeIndex++; - } - - sdbFreeIter(pIter); - - tsBalanceDnodeListSize = dnodeIndex; -} - -void balanceReleaseDnodeList() { - for (int32_t i = 0; i < tsBalanceDnodeListSize; ++i) { - SDnodeObj *pDnode = tsBalanceDnodeList[i]; - if (pDnode != NULL) { - mnodeDecDnodeRef(pDnode); - } - } -} - -static int32_t balanceGetScoresMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn) { - SUserObj *pUser = mnodeGetUserFromConn(pConn); - if (pUser == NULL) return 0; - - if (strcmp(pUser->pAcct->user, "root") != 0) { - mnodeDecUserRef(pUser); - return TSDB_CODE_MND_NO_RIGHTS; - } - - int32_t cols = 0; - SSchema *pSchema = pMeta->schema; - - pShow->bytes[cols] = 2; - pSchema[cols].type = TSDB_DATA_TYPE_SMALLINT; - strcpy(pSchema[cols].name, "id"); - pSchema[cols].bytes = htons(pShow->bytes[cols]); - cols++; - - pShow->bytes[cols] = 4; - pSchema[cols].type = TSDB_DATA_TYPE_FLOAT; - strcpy(pSchema[cols].name, "system scores"); - pSchema[cols].bytes = htons(pShow->bytes[cols]); - cols++; - - pShow->bytes[cols] = 4; - pSchema[cols].type = TSDB_DATA_TYPE_FLOAT; - strcpy(pSchema[cols].name, "custom scores"); - pSchema[cols].bytes = htons(pShow->bytes[cols]); - cols++; - - pShow->bytes[cols] = 4; - pSchema[cols].type = TSDB_DATA_TYPE_FLOAT; - strcpy(pSchema[cols].name, "module scores"); - pSchema[cols].bytes = htons(pShow->bytes[cols]); - cols++; - - pShow->bytes[cols] = 4; - pSchema[cols].type = TSDB_DATA_TYPE_FLOAT; - strcpy(pSchema[cols].name, "vnode scores"); - pSchema[cols].bytes = htons(pShow->bytes[cols]); - cols++; - - pShow->bytes[cols] = 4; - pSchema[cols].type = TSDB_DATA_TYPE_FLOAT; - strcpy(pSchema[cols].name, "total scores"); - pSchema[cols].bytes = htons(pShow->bytes[cols]); - cols++; - - pShow->bytes[cols] = 4; - pSchema[cols].type = TSDB_DATA_TYPE_INT; - strcpy(pSchema[cols].name, "open vnodes"); - pSchema[cols].bytes = htons(pShow->bytes[cols]); - cols++; - - pShow->bytes[cols] = 4; - pSchema[cols].type = TSDB_DATA_TYPE_INT; - strcpy(pSchema[cols].name, "cpu cores"); - pSchema[cols].bytes = htons(pShow->bytes[cols]); - cols++; - - pShow->bytes[cols] = 18 + VARSTR_HEADER_SIZE; - pSchema[cols].type = TSDB_DATA_TYPE_BINARY; - strcpy(pSchema[cols].name, "balance state"); - pSchema[cols].bytes = htons(pShow->bytes[cols]); - cols++; - - pMeta->numOfColumns = htons(cols); - pShow->numOfColumns = cols; - - pShow->offset[0] = 0; - for (int32_t i = 1; i < cols; ++i) { - pShow->offset[i] = pShow->offset[i - 1] + pShow->bytes[i - 1]; - } - - pShow->numOfRows = mnodeGetDnodesNum(); - pShow->rowSize = pShow->offset[cols - 1] + pShow->bytes[cols - 1]; - pShow->pIter = NULL; - - mnodeDecUserRef(pUser); - - return 0; -} - -static int32_t balanceRetrieveScores(SShowObj *pShow, char *data, int32_t rows, void *pConn) { - int32_t numOfRows = 0; - SDnodeObj *pDnode = NULL; - char * pWrite; - int32_t cols = 0; - - while (numOfRows < rows) { - pShow->pIter = mnodeGetNextDnode(pShow->pIter, &pDnode); - if (pDnode == NULL) break; - - int32_t systemScore = balanceCalcCpuScore(pDnode) + balanceCalcMemoryScore(pDnode) + balanceCalcDiskScore(pDnode) + - balanceCalcBandwidthScore(pDnode); - float moduleScore = balanceCalcModuleScore(pDnode); - float vnodeScore = balanceCalcVnodeScore(pDnode, 0); - - cols = 0; - - pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; - *(int16_t *)pWrite = pDnode->dnodeId; - cols++; - - pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; - *(float *)pWrite = systemScore; - cols++; - - pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; - *(float *)pWrite = pDnode->customScore; - cols++; - - pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; - *(float *)pWrite = (int32_t)moduleScore; - cols++; - - pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; - *(float *)pWrite = (int32_t)vnodeScore; - cols++; - - pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; - *(float *)pWrite = (int32_t)(vnodeScore + moduleScore + pDnode->customScore + systemScore); - cols++; - - pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; - *(int32_t *)pWrite = pDnode->openVnodes; - cols++; - - pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; - *(int32_t *)pWrite = pDnode->numOfCores; - cols++; - - pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; - STR_TO_VARSTR(pWrite, mnodeGetDnodeStatusStr(pDnode->status)); - cols++; - - numOfRows++; - mnodeDecDnodeRef(pDnode); - } - - mnodeVacuumResult(data, pShow->numOfColumns, numOfRows, rows, pShow); - pShow->numOfReads += numOfRows; - return numOfRows; -} - -static void balanceMonitorDnodeModule() { +static void bnMonitorDnodeModule() { int32_t numOfMnodes = mnodeGetMnodesNum(); if (numOfMnodes >= tsNumOfMnodes) return; - for (int32_t i = 0; i < tsBalanceDnodeListSize; ++i) { - SDnodeObj *pDnode = tsBalanceDnodeList[i]; + for (int32_t i = 0; i < tsBnDnodes.size; ++i) { + SDnodeObj *pDnode = tsBnDnodes.list[i]; if (pDnode == NULL) break; if (pDnode->isMgmt || pDnode->status == TAOS_DN_STATUS_DROPPING || pDnode->status == TAOS_DN_STATUS_OFFLINE) { @@ -990,7 +628,7 @@ static void balanceMonitorDnodeModule() { } } -int32_t balanceAlterDnode(struct SDnodeObj *pSrcDnode, int32_t vnodeId, int32_t dnodeId) { +int32_t bnAlterDnode(struct SDnodeObj *pSrcDnode, int32_t vnodeId, int32_t dnodeId) { if (!sdbIsMaster()) { mError("dnode:%d, failed to alter vgId:%d to dnode:%d, for self not master", pSrcDnode->dnodeId, vnodeId, dnodeId); return TSDB_CODE_MND_DNODE_NOT_EXIST; @@ -1014,29 +652,29 @@ int32_t balanceAlterDnode(struct SDnodeObj *pSrcDnode, int32_t vnodeId, int32_t return TSDB_CODE_MND_DNODE_NOT_EXIST; } - balanceLock(); - balanceAccquireDnodeList(); + bnLock(); + bnAccquireDnodes(); int32_t code = TSDB_CODE_SUCCESS; - if (!balanceCheckDnodeInVgroup(pSrcDnode, pVgroup)) { + if (!bnCheckDnodeInVgroup(pSrcDnode, pVgroup)) { mError("dnode:%d, failed to alter vgId:%d to dnode:%d, vgroup not in dnode:%d", pSrcDnode->dnodeId, vnodeId, dnodeId, pSrcDnode->dnodeId); code = TSDB_CODE_MND_VGROUP_NOT_IN_DNODE; - } else if (balanceCheckDnodeInVgroup(pDestDnode, pVgroup)) { + } else if (bnCheckDnodeInVgroup(pDestDnode, pVgroup)) { mError("dnode:%d, failed to alter vgId:%d to dnode:%d, vgroup already in dnode:%d", pSrcDnode->dnodeId, vnodeId, dnodeId, dnodeId); code = TSDB_CODE_MND_VGROUP_ALREADY_IN_DNODE; - } else if (!balanceCheckFree(pDestDnode)) { + } else if (!bnCheckFree(pDestDnode)) { mError("dnode:%d, failed to alter vgId:%d to dnode:%d, for dnode:%d not free", pSrcDnode->dnodeId, vnodeId, dnodeId, dnodeId); code = TSDB_CODE_MND_DNODE_NOT_FREE; } else { - code = balanceAddVnode(pVgroup, pSrcDnode, pDestDnode); + code = bnAddVnode(pVgroup, pSrcDnode, pDestDnode); mInfo("dnode:%d, alter vgId:%d to dnode:%d, result:%s", pSrcDnode->dnodeId, vnodeId, dnodeId, tstrerror(code)); } - balanceReleaseDnodeList(); - balanceUnLock(); + bnReleaseDnodes(); + bnUnLock(); mnodeDecVgroupRef(pVgroup); mnodeDecDnodeRef(pDestDnode); diff --git a/src/balance/src/bnScore.c b/src/balance/src/bnScore.c new file mode 100644 index 0000000000..e5ad7a2119 --- /dev/null +++ b/src/balance/src/bnScore.c @@ -0,0 +1,312 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#define _DEFAULT_SOURCE +#include "os.h" +#include "tglobal.h" +#include "mnodeShow.h" +#include "mnodeUser.h" +#include "bnScore.h" + +SBnDnodes tsBnDnodes; + +static int32_t bnGetScoresMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn); +static int32_t bnRetrieveScores(SShowObj *pShow, char *data, int32_t rows, void *pConn); + +static int32_t bnCalcCpuScore(SDnodeObj *pDnode) { + if (pDnode->cpuAvgUsage < 80) + return 0; + else if (pDnode->cpuAvgUsage < 90) + return 10; + else + return 50; +} + +static int32_t bnCalcMemoryScore(SDnodeObj *pDnode) { + if (pDnode->memoryAvgUsage < 80) + return 0; + else if (pDnode->memoryAvgUsage < 90) + return 10; + else + return 50; +} + +static int32_t bnCalcDiskScore(SDnodeObj *pDnode) { + if (pDnode->diskAvgUsage < 80) + return 0; + else if (pDnode->diskAvgUsage < 90) + return 10; + else + return 50; +} + +static int32_t bnCalcBandScore(SDnodeObj *pDnode) { + if (pDnode->bandwidthUsage < 30) + return 0; + else if (pDnode->bandwidthUsage < 80) + return 10; + else + return 50; +} + +static float bnCalcModuleScore(SDnodeObj *pDnode) { + if (pDnode->numOfCores <= 0) return 0; + if (pDnode->isMgmt) { + return (float)tsMnodeEqualVnodeNum / pDnode->numOfCores; + } + return 0; +} + +static float bnCalcVnodeScore(SDnodeObj *pDnode, int32_t extra) { + if (pDnode->status == TAOS_DN_STATUS_DROPPING || pDnode->status == TAOS_DN_STATUS_OFFLINE) return 100000000; + if (pDnode->numOfCores <= 0) return 0; + return (float)(pDnode->openVnodes + extra) / pDnode->numOfCores; +} + +/** + * calc singe score, such as cpu/memory/disk/bandwitdh/vnode + * 1. get the score config + * 2. if the value is out of range, use border data + * 3. otherwise use interpolation method + **/ +static void bnCalcDnodeScore(SDnodeObj *pDnode) { + pDnode->score = bnCalcCpuScore(pDnode) + bnCalcMemoryScore(pDnode) + bnCalcDiskScore(pDnode) + + bnCalcBandScore(pDnode) + bnCalcModuleScore(pDnode) + bnCalcVnodeScore(pDnode, 0) + + pDnode->customScore; +} + +float bnTryCalcDnodeScore(SDnodeObj *pDnode, int32_t extra) { + int32_t systemScore = bnCalcCpuScore(pDnode) + bnCalcMemoryScore(pDnode) + bnCalcDiskScore(pDnode) + + bnCalcBandScore(pDnode); + float moduleScore = bnCalcModuleScore(pDnode); + float vnodeScore = bnCalcVnodeScore(pDnode, extra); + + float score = systemScore + moduleScore + vnodeScore + pDnode->customScore; + return score; +} + +void bnInitDnodes() { + mnodeAddShowMetaHandle(TSDB_MGMT_TABLE_SCORES, bnGetScoresMeta); + mnodeAddShowRetrieveHandle(TSDB_MGMT_TABLE_SCORES, bnRetrieveScores); + mnodeAddShowFreeIterHandle(TSDB_MGMT_TABLE_SCORES, mnodeCancelGetNextDnode); + + memset(&tsBnDnodes, 0, sizeof(SBnDnodes)); + tsBnDnodes.maxSize = 16; + tsBnDnodes.list = calloc(tsBnDnodes.maxSize, sizeof(SDnodeObj *)); +} + +void bnCleanupDnodes() { + if (tsBnDnodes.list != NULL) { + free(tsBnDnodes.list); + tsBnDnodes.list = NULL; + } +} + +static void bnCheckDnodesSize(int32_t dnodesNum) { + if (tsBnDnodes.maxSize <= dnodesNum) { + tsBnDnodes.maxSize = dnodesNum * 2; + tsBnDnodes.list = realloc(tsBnDnodes.list, tsBnDnodes.maxSize * sizeof(SDnodeObj *)); + } +} + +void bnAccquireDnodes() { + int32_t dnodesNum = mnodeGetDnodesNum(); + bnCheckDnodesSize(dnodesNum); + + void * pIter = NULL; + SDnodeObj *pDnode = NULL; + int32_t dnodeIndex = 0; + + while (1) { + if (dnodeIndex >= dnodesNum) { + mnodeCancelGetNextDnode(pIter); + break; + } + + pIter = mnodeGetNextDnode(pIter, &pDnode); + if (pDnode == NULL) break; + if (pDnode->status == TAOS_DN_STATUS_OFFLINE) { + mnodeDecDnodeRef(pDnode); + continue; + } + + bnCalcDnodeScore(pDnode); + + int32_t orderIndex = dnodeIndex; + for (; orderIndex > 0; --orderIndex) { + if (pDnode->score > tsBnDnodes.list[orderIndex - 1]->score) { + break; + } + tsBnDnodes.list[orderIndex] = tsBnDnodes.list[orderIndex - 1]; + } + tsBnDnodes.list[orderIndex] = pDnode; + dnodeIndex++; + } + + tsBnDnodes.size = dnodeIndex; +} + +void bnReleaseDnodes() { + for (int32_t i = 0; i < tsBnDnodes.size; ++i) { + SDnodeObj *pDnode = tsBnDnodes.list[i]; + if (pDnode != NULL) { + mnodeDecDnodeRef(pDnode); + } + } +} + +static int32_t bnGetScoresMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn) { + SUserObj *pUser = mnodeGetUserFromConn(pConn); + if (pUser == NULL) return 0; + + if (strcmp(pUser->pAcct->user, "root") != 0) { + mnodeDecUserRef(pUser); + return TSDB_CODE_MND_NO_RIGHTS; + } + + int32_t cols = 0; + SSchema *pSchema = pMeta->schema; + + pShow->bytes[cols] = 2; + pSchema[cols].type = TSDB_DATA_TYPE_SMALLINT; + strcpy(pSchema[cols].name, "id"); + pSchema[cols].bytes = htons(pShow->bytes[cols]); + cols++; + + pShow->bytes[cols] = 4; + pSchema[cols].type = TSDB_DATA_TYPE_FLOAT; + strcpy(pSchema[cols].name, "system scores"); + pSchema[cols].bytes = htons(pShow->bytes[cols]); + cols++; + + pShow->bytes[cols] = 4; + pSchema[cols].type = TSDB_DATA_TYPE_FLOAT; + strcpy(pSchema[cols].name, "custom scores"); + pSchema[cols].bytes = htons(pShow->bytes[cols]); + cols++; + + pShow->bytes[cols] = 4; + pSchema[cols].type = TSDB_DATA_TYPE_FLOAT; + strcpy(pSchema[cols].name, "module scores"); + pSchema[cols].bytes = htons(pShow->bytes[cols]); + cols++; + + pShow->bytes[cols] = 4; + pSchema[cols].type = TSDB_DATA_TYPE_FLOAT; + strcpy(pSchema[cols].name, "vnode scores"); + pSchema[cols].bytes = htons(pShow->bytes[cols]); + cols++; + + pShow->bytes[cols] = 4; + pSchema[cols].type = TSDB_DATA_TYPE_FLOAT; + strcpy(pSchema[cols].name, "total scores"); + pSchema[cols].bytes = htons(pShow->bytes[cols]); + cols++; + + pShow->bytes[cols] = 4; + pSchema[cols].type = TSDB_DATA_TYPE_INT; + strcpy(pSchema[cols].name, "open vnodes"); + pSchema[cols].bytes = htons(pShow->bytes[cols]); + cols++; + + pShow->bytes[cols] = 4; + pSchema[cols].type = TSDB_DATA_TYPE_INT; + strcpy(pSchema[cols].name, "cpu cores"); + pSchema[cols].bytes = htons(pShow->bytes[cols]); + cols++; + + pShow->bytes[cols] = 18 + VARSTR_HEADER_SIZE; + pSchema[cols].type = TSDB_DATA_TYPE_BINARY; + strcpy(pSchema[cols].name, "balance state"); + pSchema[cols].bytes = htons(pShow->bytes[cols]); + cols++; + + pMeta->numOfColumns = htons(cols); + pShow->numOfColumns = cols; + + pShow->offset[0] = 0; + for (int32_t i = 1; i < cols; ++i) { + pShow->offset[i] = pShow->offset[i - 1] + pShow->bytes[i - 1]; + } + + pShow->numOfRows = mnodeGetDnodesNum(); + pShow->rowSize = pShow->offset[cols - 1] + pShow->bytes[cols - 1]; + pShow->pIter = NULL; + + mnodeDecUserRef(pUser); + + return 0; +} + +static int32_t bnRetrieveScores(SShowObj *pShow, char *data, int32_t rows, void *pConn) { + int32_t numOfRows = 0; + SDnodeObj *pDnode = NULL; + char * pWrite; + int32_t cols = 0; + + while (numOfRows < rows) { + pShow->pIter = mnodeGetNextDnode(pShow->pIter, &pDnode); + if (pDnode == NULL) break; + + int32_t systemScore = bnCalcCpuScore(pDnode) + bnCalcMemoryScore(pDnode) + bnCalcDiskScore(pDnode) + bnCalcBandScore(pDnode); + float moduleScore = bnCalcModuleScore(pDnode); + float vnodeScore = bnCalcVnodeScore(pDnode, 0); + + cols = 0; + + pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; + *(int16_t *)pWrite = pDnode->dnodeId; + cols++; + + pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; + *(float *)pWrite = systemScore; + cols++; + + pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; + *(float *)pWrite = pDnode->customScore; + cols++; + + pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; + *(float *)pWrite = (int32_t)moduleScore; + cols++; + + pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; + *(float *)pWrite = (int32_t)vnodeScore; + cols++; + + pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; + *(float *)pWrite = (int32_t)(vnodeScore + moduleScore + pDnode->customScore + systemScore); + cols++; + + pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; + *(int32_t *)pWrite = pDnode->openVnodes; + cols++; + + pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; + *(int32_t *)pWrite = pDnode->numOfCores; + cols++; + + pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; + STR_TO_VARSTR(pWrite, mnodeGetDnodeStatusStr(pDnode->status)); + cols++; + + numOfRows++; + mnodeDecDnodeRef(pDnode); + } + + mnodeVacuumResult(data, pShow->numOfColumns, numOfRows, rows, pShow); + pShow->numOfReads += numOfRows; + return numOfRows; +} diff --git a/src/balance/src/bnThread.c b/src/balance/src/bnThread.c new file mode 100644 index 0000000000..bf046a9fae --- /dev/null +++ b/src/balance/src/bnThread.c @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#define _DEFAULT_SOURCE +#include "os.h" +#include "ttimer.h" +#include "tglobal.h" +#include "mnodeSdb.h" +#include "bnThread.h" + +static SBnThread tsBnThread; + +static void *bnThreadFunc(void *arg) { + while (1) { + pthread_mutex_lock(&tsBnThread.mutex); + if (tsBnThread.stop) { + pthread_mutex_unlock(&tsBnThread.mutex); + break; + } + + pthread_cond_wait(&tsBnThread.cond, &tsBnThread.mutex); + bool updateSoon = bnStart(); + bnStartTimer(updateSoon ? 1000 : -1); + pthread_mutex_unlock(&(tsBnThread.mutex)); + } + + mDebug("balance thread is stopped"); + return NULL; +} + +int32_t bnInitThread() { + memset(&tsBnThread, 0, sizeof(SBnThread)); + tsBnThread.stop = false; + pthread_mutex_init(&tsBnThread.mutex, NULL); + pthread_cond_init(&tsBnThread.cond, NULL); + + pthread_attr_t thattr; + pthread_attr_init(&thattr); + pthread_attr_setdetachstate(&thattr, PTHREAD_CREATE_JOINABLE); + int32_t ret = pthread_create(&tsBnThread.thread, &thattr, bnThreadFunc, NULL); + pthread_attr_destroy(&thattr); + + if (ret != 0) { + mError("failed to create balance thread since %s", strerror(errno)); + return -1; + } + + bnStartTimer(2000); + mDebug("balance thread is created"); + return 0; +} + +void bnCleanupThread() { + mDebug("balance thread will be cleanup"); + + if (tsBnThread.timer != NULL) { + taosTmrStopA(&tsBnThread.timer); + tsBnThread.timer = NULL; + mDebug("stop balance timer"); + } + + pthread_mutex_lock(&tsBnThread.mutex); + tsBnThread.stop = true; + pthread_cond_signal(&tsBnThread.cond); + pthread_mutex_unlock(&(tsBnThread.mutex)); + pthread_join(tsBnThread.thread, NULL); + + pthread_cond_destroy(&tsBnThread.cond); + pthread_mutex_destroy(&tsBnThread.mutex); +} + +static void bnPostSignal() { + if (tsBnThread.stop) return; + + pthread_mutex_lock(&tsBnThread.mutex); + pthread_cond_signal(&tsBnThread.cond); + pthread_mutex_unlock(&(tsBnThread.mutex)); +} + +/* + * once sdb work as mater, then tsAccessSquence reset to zero + * increase tsAccessSquence every balance interval + */ + +static void bnProcessTimer(void *handle, void *tmrId) { + if (!sdbIsMaster()) return; + if (tsBnThread.stop) return; + + tsBnThread.timer = NULL; + tsAccessSquence++; + + bnCheckStatus(); + bnStartTimer(-1); + + if (handle == NULL) { + if (tsAccessSquence % tsBalanceInterval == 0) { + mDebug("balance function is scheduled by timer"); + bnPostSignal(); + } + } else { + int64_t mseconds = (int64_t)handle; + mDebug("balance function is scheduled by event for %" PRId64 " mseconds arrived", mseconds); + bnPostSignal(); + } +} + +void bnStartTimer(int64_t mseconds) { + if (tsBnThread.stop) return; + + bool updateSoon = (mseconds != -1); + if (updateSoon) { + taosTmrReset(bnProcessTimer, mseconds, (void *)mseconds, tsMnodeTmr, &tsBnThread.timer); + } else { + taosTmrReset(bnProcessTimer, tsStatusInterval * 1000, NULL, tsMnodeTmr, &tsBnThread.timer); + } +} + +void bnNotify() { + bnStartTimer(500); +} diff --git a/src/client/inc/tscLocalMerge.h b/src/client/inc/tscLocalMerge.h index 2c7c2f51d0..43ba31f331 100644 --- a/src/client/inc/tscLocalMerge.h +++ b/src/client/inc/tscLocalMerge.h @@ -56,7 +56,6 @@ typedef struct SLocalReducer { tFilePage * pTempBuffer; struct SQLFunctionCtx *pCtx; int32_t rowSize; // size of each intermediate result. - int32_t finalRowSize; // final result row size int32_t status; // denote it is in reduce process, in reduce process, it bool hasPrevRow; // cannot be released bool hasUnprocessedRow; diff --git a/src/client/inc/tsclient.h b/src/client/inc/tsclient.h index 35f9b3af62..a1b6174de0 100644 --- a/src/client/inc/tsclient.h +++ b/src/client/inc/tsclient.h @@ -246,11 +246,14 @@ typedef struct SQueryInfo { int16_t fillType; // final result fill type int16_t numOfTables; STableMetaInfo **pTableMetaInfo; - struct STSBuf * tsBuf; + struct STSBuf *tsBuf; int64_t * fillVal; // default value for fill char * msg; // pointer to the pCmd->payload to keep error message temporarily int64_t clauseLimit; // limit for current sub clause + int64_t prjOffset; // offset value in the original sql expression, only applied at client side + int64_t tableLimit; // table limit in case of super table projection query + global order + limit + int32_t udColumnId; // current user-defined constant output field column id, monotonically decreases from TSDB_UD_COLUMN_INDEX int16_t resColumnId; // result column id } SQueryInfo; @@ -333,7 +336,7 @@ typedef struct STscObj { char superAuth : 1; uint32_t connId; uint64_t rid; // ref ID returned by taosAddRef - struct SSqlObj * pHb; + int64_t hbrid; struct SSqlObj * sqlList; struct SSqlStream *streamList; SRpcCorEpSet *tscCorMgmtEpSet; @@ -374,7 +377,7 @@ typedef struct SSqlObj { struct SSqlObj **pSubs; struct SSqlObj *prev, *next; - struct SSqlObj **self; + int64_t self; } SSqlObj; typedef struct SSqlStream { @@ -508,7 +511,7 @@ static FORCE_INLINE void tscGetResultColumnChr(SSqlRes* pRes, SFieldInfo* pField } extern SCacheObj* tscMetaCache; -extern SCacheObj* tscObjCache; +extern int tscObjRef; extern void * tscTmr; extern void * tscQhandle; extern int tscKeepConn[]; diff --git a/src/client/src/tscFunctionImpl.c b/src/client/src/tscFunctionImpl.c index d39b833374..56b7f052f7 100644 --- a/src/client/src/tscFunctionImpl.c +++ b/src/client/src/tscFunctionImpl.c @@ -64,13 +64,13 @@ } \ } while (0); -#define DO_UPDATE_TAG_COLUMNS_WITHOUT_TS(ctx) \ -do {\ -for (int32_t i = 0; i < (ctx)->tagInfo.numOfTagCols; ++i) { \ - SQLFunctionCtx *__ctx = (ctx)->tagInfo.pTagCtxList[i]; \ - aAggs[TSDB_FUNC_TAG].xFunction(__ctx); \ - } \ -} while(0); +#define DO_UPDATE_TAG_COLUMNS_WITHOUT_TS(ctx) \ + do { \ + for (int32_t i = 0; i < (ctx)->tagInfo.numOfTagCols; ++i) { \ + SQLFunctionCtx *__ctx = (ctx)->tagInfo.pTagCtxList[i]; \ + aAggs[TSDB_FUNC_TAG].xFunction(__ctx); \ + } \ + } while (0); void noop1(SQLFunctionCtx *UNUSED_PARAM(pCtx)) {} void noop2(SQLFunctionCtx *UNUSED_PARAM(pCtx), int32_t UNUSED_PARAM(index)) {} @@ -426,8 +426,7 @@ static void count_function_f(SQLFunctionCtx *pCtx, int32_t index) { } SET_VAL(pCtx, 1, 1); - - *((int64_t *)pCtx->aOutputBuf) += 1; + *((int64_t *)pCtx->aOutputBuf) += pCtx->size; // do not need it actually SResultRowCellInfo *pInfo = GET_RES_INFO(pCtx); @@ -3624,94 +3623,158 @@ static bool twa_function_setup(SQLFunctionCtx *pCtx) { return false; } - SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); //->aOutputBuf + pCtx->outputBytes; - STwaInfo * pInfo = GET_ROWCELL_INTERBUF(pResInfo); - - pInfo->lastKey = INT64_MIN; - pInfo->type = pCtx->inputType; - + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + + STwaInfo *pInfo = GET_ROWCELL_INTERBUF(pResInfo); + pInfo->lastKey = INT64_MIN; + pInfo->win = TSWINDOW_INITIALIZER; return true; } -static FORCE_INLINE void setTWALastVal(SQLFunctionCtx *pCtx, const char *data, int32_t i, STwaInfo *pInfo) { - switch (pCtx->inputType) { - case TSDB_DATA_TYPE_INT: - pInfo->iLastValue = GET_INT32_VAL(data + pCtx->inputBytes * i); - break; - case TSDB_DATA_TYPE_TINYINT: - pInfo->iLastValue = GET_INT8_VAL(data + pCtx->inputBytes * i); - break; - case TSDB_DATA_TYPE_SMALLINT: - pInfo->iLastValue = GET_INT16_VAL(data + pCtx->inputBytes * i); - break; - case TSDB_DATA_TYPE_BIGINT: - pInfo->iLastValue = GET_INT64_VAL(data + pCtx->inputBytes * i); - break; - case TSDB_DATA_TYPE_FLOAT: - pInfo->dLastValue = GET_FLOAT_VAL(data + pCtx->inputBytes * i); - break; - case TSDB_DATA_TYPE_DOUBLE: - pInfo->dLastValue = GET_DOUBLE_VAL(data + pCtx->inputBytes * i); - break; - default: - assert(0); +static int32_t twa_function_impl(SQLFunctionCtx* pCtx, int32_t tsIndex, int32_t index, int32_t size) { + int32_t notNullElems = 0; + TSKEY *primaryKey = pCtx->ptsList; + + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + + STwaInfo *pInfo = GET_ROWCELL_INTERBUF(pResInfo); + + int32_t i = index; + int32_t step = GET_FORWARD_DIRECTION_FACTOR(pCtx->order); + + if (pCtx->start.key != INT64_MIN) { + assert((pCtx->start.key < primaryKey[tsIndex + i] && pCtx->order == TSDB_ORDER_ASC) || + (pCtx->start.key > primaryKey[tsIndex + i] && pCtx->order == TSDB_ORDER_DESC)); + + assert(pInfo->lastKey == INT64_MIN); + + pInfo->lastKey = primaryKey[tsIndex + i]; + GET_TYPED_DATA(pInfo->lastValue, double, pCtx->inputType, GET_INPUT_CHAR_INDEX(pCtx, index)); + + pInfo->dOutput += ((pInfo->lastValue + pCtx->start.val) / 2) * (pInfo->lastKey - pCtx->start.key); + + pInfo->hasResult = DATA_SET_FLAG; + pInfo->win.skey = pCtx->start.key; + notNullElems++; + i += step; + } else if (pInfo->lastKey == INT64_MIN) { + pInfo->lastKey = primaryKey[tsIndex + i]; + GET_TYPED_DATA(pInfo->lastValue, double, pCtx->inputType, GET_INPUT_CHAR_INDEX(pCtx, index)); + + pInfo->hasResult = DATA_SET_FLAG; + pInfo->win.skey = pInfo->lastKey; + notNullElems++; + i += step; } + + // calculate the value of + switch(pCtx->inputType) { + case TSDB_DATA_TYPE_TINYINT: { + int8_t *val = (int8_t*) GET_INPUT_CHAR_INDEX(pCtx, 0); + for (; i < size && i >= 0; i += step) { + if (pCtx->hasNull && isNull((const char*) &val[i], pCtx->inputType)) { + continue; + } + + pInfo->dOutput += ((val[i] + pInfo->lastValue) / 2) * (primaryKey[i + tsIndex] - pInfo->lastKey); + pInfo->lastValue = val[i]; + pInfo->lastKey = primaryKey[i + tsIndex]; + } + break; + } + case TSDB_DATA_TYPE_SMALLINT: { + int16_t *val = (int16_t*) GET_INPUT_CHAR_INDEX(pCtx, 0); + for (; i < size && i >= 0; i += step) { + if (pCtx->hasNull && isNull((const char*) &val[i], pCtx->inputType)) { + continue; + } + + pInfo->dOutput += ((val[i] + pInfo->lastValue) / 2) * (primaryKey[i + tsIndex] - pInfo->lastKey); + pInfo->lastValue = val[i]; + pInfo->lastKey = primaryKey[i + tsIndex]; + } + break; + } + case TSDB_DATA_TYPE_INT: { + int32_t *val = (int32_t*) GET_INPUT_CHAR_INDEX(pCtx, 0); + for (; i < size && i >= 0; i += step) { + if (pCtx->hasNull && isNull((const char*) &val[i], pCtx->inputType)) { + continue; + } + + pInfo->dOutput += ((val[i] + pInfo->lastValue) / 2) * (primaryKey[i + tsIndex] - pInfo->lastKey); + pInfo->lastValue = val[i]; + pInfo->lastKey = primaryKey[i + tsIndex]; + } + break; + } + case TSDB_DATA_TYPE_BIGINT: { + int64_t *val = (int64_t*) GET_INPUT_CHAR_INDEX(pCtx, 0); + for (; i < size && i >= 0; i += step) { + if (pCtx->hasNull && isNull((const char*) &val[i], pCtx->inputType)) { + continue; + } + + pInfo->dOutput += ((val[i] + pInfo->lastValue) / 2) * (primaryKey[i + tsIndex] - pInfo->lastKey); + pInfo->lastValue = (double) val[i]; + pInfo->lastKey = primaryKey[i + tsIndex]; + } + break; + } + case TSDB_DATA_TYPE_FLOAT: { + float *val = (float*) GET_INPUT_CHAR_INDEX(pCtx, 0); + for (; i < size && i >= 0; i += step) { + if (pCtx->hasNull && isNull((const char*) &val[i], pCtx->inputType)) { + continue; + } + + pInfo->dOutput += ((val[i] + pInfo->lastValue) / 2) * (primaryKey[i + tsIndex] - pInfo->lastKey); + pInfo->lastValue = val[i]; + pInfo->lastKey = primaryKey[i + tsIndex]; + } + break; + } + case TSDB_DATA_TYPE_DOUBLE: { + double *val = (double*) GET_INPUT_CHAR_INDEX(pCtx, 0); + for (; i < size && i >= 0; i += step) { + if (pCtx->hasNull && isNull((const char*) &val[i], pCtx->inputType)) { + continue; + } + + pInfo->dOutput += ((val[i] + pInfo->lastValue) / 2) * (primaryKey[i + tsIndex] - pInfo->lastKey); + pInfo->lastValue = val[i]; + pInfo->lastKey = primaryKey[i + tsIndex]; + } + break; + } + default: assert(0); + } + + // the last interpolated time window value + if (pCtx->end.key != INT64_MIN) { + pInfo->dOutput += ((pInfo->lastValue + pCtx->end.val) / 2) * (pCtx->end.key - pInfo->lastKey); + pInfo->lastValue = pCtx->end.val; + pInfo->lastKey = pCtx->end.key; + } + + pInfo->win.ekey = pInfo->lastKey; + return notNullElems; } static void twa_function(SQLFunctionCtx *pCtx) { void * data = GET_INPUT_CHAR(pCtx); - TSKEY *primaryKey = pCtx->ptsList; - - int32_t notNullElems = 0; - + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); STwaInfo * pInfo = GET_ROWCELL_INTERBUF(pResInfo); - int32_t i = 0; - // skip null value + int32_t step = GET_FORWARD_DIRECTION_FACTOR(pCtx->order); + int32_t i = (pCtx->order == TSDB_ORDER_ASC)? 0:(pCtx->size - 1); while (pCtx->hasNull && i < pCtx->size && isNull((char *)data + pCtx->inputBytes * i, pCtx->inputType)) { - i++; + i += step; } - - if (i >= pCtx->size) { - return; - } - - if (pInfo->lastKey == INT64_MIN) { - pInfo->lastKey = pCtx->nStartQueryTimestamp; - setTWALastVal(pCtx, data, i, pInfo); - - pInfo->hasResult = DATA_SET_FLAG; - } - - notNullElems++; - - if (pCtx->inputType == TSDB_DATA_TYPE_FLOAT || pCtx->inputType == TSDB_DATA_TYPE_DOUBLE) { - pInfo->dOutput += pInfo->dLastValue * (primaryKey[i] - pInfo->lastKey); - } else { - pInfo->iOutput += pInfo->iLastValue * (primaryKey[i] - pInfo->lastKey); - } - - pInfo->lastKey = primaryKey[i]; - setTWALastVal(pCtx, data, i, pInfo); - - for (++i; i < pCtx->size; i++) { - if (pCtx->hasNull && isNull((char *)data + pCtx->inputBytes * i, pCtx->inputType)) { - continue; - } - - notNullElems++; - if (pCtx->inputType == TSDB_DATA_TYPE_FLOAT || pCtx->inputType == TSDB_DATA_TYPE_DOUBLE) { - pInfo->dOutput += pInfo->dLastValue * (primaryKey[i] - pInfo->lastKey); - } else { - pInfo->iOutput += pInfo->iLastValue * (primaryKey[i] - pInfo->lastKey); - } - - pInfo->lastKey = primaryKey[i]; - setTWALastVal(pCtx, data, i, pInfo); - } - + + int32_t notNullElems = twa_function_impl(pCtx, pCtx->startOffset, i, pCtx->size); SET_VAL(pCtx, notNullElems, 1); if (notNullElems > 0) { @@ -3721,8 +3784,6 @@ static void twa_function(SQLFunctionCtx *pCtx) { if (pCtx->stableQuery) { memcpy(pCtx->aOutputBuf, pInfo, sizeof(STwaInfo)); } - - // pCtx->numOfIteratedElems += notNullElems; } static void twa_function_f(SQLFunctionCtx *pCtx, int32_t index) { @@ -3730,34 +3791,136 @@ static void twa_function_f(SQLFunctionCtx *pCtx, int32_t index) { if (pCtx->hasNull && isNull(pData, pCtx->inputType)) { return; } - - SET_VAL(pCtx, 1, 1); - + + int32_t notNullElems = 0; TSKEY *primaryKey = pCtx->ptsList; - + SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); + STwaInfo *pInfo = GET_ROWCELL_INTERBUF(pResInfo); - - if (pInfo->lastKey == INT64_MIN) { - pInfo->lastKey = pCtx->nStartQueryTimestamp; - setTWALastVal(pCtx, pData, 0, pInfo); - + int32_t i = pCtx->startOffset; + int32_t size = pCtx->size; + + if (pCtx->start.key != INT64_MIN) { + assert(pInfo->lastKey == INT64_MIN); + + pInfo->lastKey = primaryKey[index]; + GET_TYPED_DATA(pInfo->lastValue, double, pCtx->inputType, GET_INPUT_CHAR_INDEX(pCtx, index)); + + pInfo->dOutput += ((pInfo->lastValue + pCtx->start.val) / 2) * (pInfo->lastKey - pCtx->start.key); + pInfo->hasResult = DATA_SET_FLAG; + pInfo->win.skey = pCtx->start.key; + notNullElems++; + i += 1; + } else if (pInfo->lastKey == INT64_MIN) { + pInfo->lastKey = primaryKey[index]; + GET_TYPED_DATA(pInfo->lastValue, double, pCtx->inputType, GET_INPUT_CHAR_INDEX(pCtx, index)); + + pInfo->hasResult = DATA_SET_FLAG; + pInfo->win.skey = pInfo->lastKey; + notNullElems++; + i += 1; } - - if (pCtx->inputType == TSDB_DATA_TYPE_FLOAT || pCtx->inputType == TSDB_DATA_TYPE_DOUBLE) { - pInfo->dOutput += pInfo->dLastValue * (primaryKey[index] - pInfo->lastKey); - } else { - pInfo->iOutput += pInfo->iLastValue * (primaryKey[index] - pInfo->lastKey); + + // calculate the value of + switch(pCtx->inputType) { + case TSDB_DATA_TYPE_TINYINT: { + int8_t *val = (int8_t*) GET_INPUT_CHAR_INDEX(pCtx, index); + for (; i < size; i++) { + if (pCtx->hasNull && isNull((const char*) &val[i], pCtx->inputType)) { + continue; + } + + pInfo->dOutput += ((val[i] + pInfo->lastValue) / 2) * (primaryKey[i + index] - pInfo->lastKey); + pInfo->lastValue = val[i]; + pInfo->lastKey = primaryKey[i + index]; + } + break; + } + case TSDB_DATA_TYPE_SMALLINT: { + int16_t *val = (int16_t*) GET_INPUT_CHAR_INDEX(pCtx, index); + for (; i < size; i++) { + if (pCtx->hasNull && isNull((const char*) &val[i], pCtx->inputType)) { + continue; + } + + pInfo->dOutput += ((val[i] + pInfo->lastValue) / 2) * (primaryKey[i + index] - pInfo->lastKey); + pInfo->lastValue = val[i]; + pInfo->lastKey = primaryKey[i + index]; + } + break; + } + case TSDB_DATA_TYPE_INT: { + int32_t *val = (int32_t*) GET_INPUT_CHAR_INDEX(pCtx, index); + for (; i < size; i++) { + if (pCtx->hasNull && isNull((const char*) &val[i], pCtx->inputType)) { + continue; + } + + pInfo->dOutput += ((val[i] + pInfo->lastValue) / 2) * (primaryKey[i + index] - pInfo->lastKey); + pInfo->lastValue = val[i]; + pInfo->lastKey = primaryKey[i + index]; + } + break; + } + case TSDB_DATA_TYPE_BIGINT: { + int64_t *val = (int64_t*) GET_INPUT_CHAR_INDEX(pCtx, index); + for (; i < size; i++) { + if (pCtx->hasNull && isNull((const char*) &val[i], pCtx->inputType)) { + continue; + } + + pInfo->dOutput += ((val[i] + pInfo->lastValue) / 2) * (primaryKey[i + index] - pInfo->lastKey); + pInfo->lastValue = (double) val[i]; + pInfo->lastKey = primaryKey[i + index]; + } + break; + } + case TSDB_DATA_TYPE_FLOAT: { + float *val = (float*) GET_INPUT_CHAR_INDEX(pCtx, index); + for (; i < size; i++) { + if (pCtx->hasNull && isNull((const char*) &val[i], pCtx->inputType)) { + continue; + } + + pInfo->dOutput += ((val[i] + pInfo->lastValue) / 2) * (primaryKey[i + index] - pInfo->lastKey); + pInfo->lastValue = val[i]; + pInfo->lastKey = primaryKey[i + index]; + } + break; + } + case TSDB_DATA_TYPE_DOUBLE: { + double *val = (double*) GET_INPUT_CHAR_INDEX(pCtx, index); + for (; i < size; i++) { + if (pCtx->hasNull && isNull((const char*) &val[i], pCtx->inputType)) { + continue; + } + + pInfo->dOutput += ((val[i] + pInfo->lastValue) / 2) * (primaryKey[i + index] - pInfo->lastKey); + pInfo->lastValue = val[i]; + pInfo->lastKey = primaryKey[i + index]; + } + break; + } + default: assert(0); } - - // record the last key/value - pInfo->lastKey = primaryKey[index]; - setTWALastVal(pCtx, pData, 0, pInfo); - - // pCtx->numOfIteratedElems += 1; - pResInfo->hasResult = DATA_SET_FLAG; - + + // the last interpolated time window value + if (pCtx->end.key != INT64_MIN) { + pInfo->dOutput += ((pInfo->lastValue + pCtx->end.val) / 2) * (pCtx->end.key - pInfo->lastKey); + pInfo->lastValue = pCtx->end.val; + pInfo->lastKey = pCtx->end.key; + } + + pInfo->win.ekey = pInfo->lastKey; + + SET_VAL(pCtx, notNullElems, 1); + + if (notNullElems > 0) { + pResInfo->hasResult = DATA_SET_FLAG; + } + if (pCtx->stableQuery) { memcpy(pCtx->aOutputBuf, GET_ROWCELL_INTERBUF(pResInfo), sizeof(STwaInfo)); } @@ -3778,16 +3941,10 @@ static void twa_func_merge(SQLFunctionCtx *pCtx) { } numOfNotNull++; - if (pCtx->inputType >= TSDB_DATA_TYPE_TINYINT && pCtx->inputType <= TSDB_DATA_TYPE_BIGINT) { - pBuf->iOutput += pInput->iOutput; - } else { - pBuf->dOutput += pInput->dOutput; - } - - pBuf->SKey = pInput->SKey; - pBuf->EKey = pInput->EKey; + pBuf->dOutput += pInput->dOutput; + + pBuf->win = pInput->win; pBuf->lastKey = pInput->lastKey; - pBuf->iLastValue = pInput->iLastValue; } SET_VAL(pCtx, numOfNotNull, 1); @@ -3814,21 +3971,17 @@ void twa_function_finalizer(SQLFunctionCtx *pCtx) { SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); STwaInfo *pInfo = (STwaInfo *)GET_ROWCELL_INTERBUF(pResInfo); - assert(pInfo->EKey >= pInfo->lastKey && pInfo->hasResult == pResInfo->hasResult); + assert(pInfo->win.ekey == pInfo->lastKey && pInfo->hasResult == pResInfo->hasResult); if (pInfo->hasResult != DATA_SET_FLAG) { setNull(pCtx->aOutputBuf, TSDB_DATA_TYPE_DOUBLE, sizeof(double)); return; } - if (pInfo->SKey == pInfo->EKey) { - *(double *)pCtx->aOutputBuf = 0; - } else if (pInfo->type >= TSDB_DATA_TYPE_TINYINT && pInfo->type <= TSDB_DATA_TYPE_BIGINT) { - pInfo->iOutput += pInfo->iLastValue * (pInfo->EKey - pInfo->lastKey); - *(double *)pCtx->aOutputBuf = pInfo->iOutput / (double)(pInfo->EKey - pInfo->SKey); + if (pInfo->win.ekey == pInfo->win.skey) { + *(double *)pCtx->aOutputBuf = pInfo->lastValue; } else { - pInfo->dOutput += pInfo->dLastValue * (pInfo->EKey - pInfo->lastKey); - *(double *)pCtx->aOutputBuf = pInfo->dOutput / (pInfo->EKey - pInfo->SKey); + *(double *)pCtx->aOutputBuf = pInfo->dOutput / (pInfo->win.ekey - pInfo->win.skey); } GET_RES_INFO(pCtx)->numOfRes = 1; diff --git a/src/client/src/tscLocal.c b/src/client/src/tscLocal.c index 538e652f3c..4c28adc261 100644 --- a/src/client/src/tscLocal.c +++ b/src/client/src/tscLocal.c @@ -825,8 +825,11 @@ static int32_t tscProcessClientVer(SSqlObj *pSql) { static int32_t tscProcessServStatus(SSqlObj *pSql) { STscObj* pObj = pSql->pTscObj; - if (pObj->pHb != NULL) { - if (pObj->pHb->res.code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { + SSqlObj* pHb = (SSqlObj*)taosAcquireRef(tscObjRef, pObj->hbrid); + if (pHb != NULL) { + int32_t code = pHb->res.code; + taosReleaseRef(tscObjRef, pObj->hbrid); + if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { pSql->res.code = TSDB_CODE_RPC_NETWORK_UNAVAIL; return pSql->res.code; } diff --git a/src/client/src/tscLocalMerge.c b/src/client/src/tscLocalMerge.c index 3c7d46f914..9fdadfa957 100644 --- a/src/client/src/tscLocalMerge.c +++ b/src/client/src/tscLocalMerge.c @@ -198,6 +198,7 @@ void tscCreateLocalReducer(tExtMemBuffer **pMemBuffer, int32_t numOfBuffer, tOrd if (numOfFlush == 0 || numOfBuffer == 0) { tscLocalReducerEnvDestroy(pMemBuffer, pDesc, finalmodel, pFFModel, numOfBuffer); + pCmd->command = TSDB_SQL_RETRIEVE_EMPTY_RESULT; // no result, set the result empty tscDebug("%p retrieved no data", pSql); return; } @@ -330,22 +331,19 @@ void tscCreateLocalReducer(tExtMemBuffer **pMemBuffer, int32_t numOfBuffer, tOrd pReducer->nResultBufSize = pMemBuffer[0]->pageSize * 16; pReducer->pResultBuf = (tFilePage *)calloc(1, pReducer->nResultBufSize + sizeof(tFilePage)); - pReducer->finalRowSize = tscGetResRowLength(pQueryInfo->exprList); pReducer->resColModel = finalmodel; pReducer->resColModel->capacity = pReducer->nResultBufSize; - pReducer->finalModel = pFFModel; - assert(pReducer->finalRowSize > 0); - if (pReducer->finalRowSize > 0) { - pReducer->resColModel->capacity /= pReducer->finalRowSize; + if (finalmodel->rowSize > 0) { + pReducer->resColModel->capacity /= finalmodel->rowSize; } - assert(pReducer->finalRowSize <= pReducer->rowSize); + assert(finalmodel->rowSize > 0 && finalmodel->rowSize <= pReducer->rowSize); pReducer->pFinalRes = calloc(1, pReducer->rowSize * pReducer->resColModel->capacity); if (pReducer->pTempBuffer == NULL || pReducer->discardData == NULL || pReducer->pResultBuf == NULL || - /*pReducer->pBufForInterpo == NULL || */pReducer->pFinalRes == NULL || pReducer->prevRowOfInput == NULL) { + pReducer->pFinalRes == NULL || pReducer->prevRowOfInput == NULL) { tfree(pReducer->pTempBuffer); tfree(pReducer->discardData); tfree(pReducer->pResultBuf); @@ -723,10 +721,16 @@ int32_t tscLocalReducerEnvCreate(SSqlObj *pSql, tExtMemBuffer ***pMemBuffer, tOr // final result depends on the fields number memset(pSchema, 0, sizeof(SSchema) * size); + for (int32_t i = 0; i < size; ++i) { SSqlExpr *pExpr = tscSqlExprGet(pQueryInfo, i); - SSchema *p1 = tscGetTableColumnSchema(pTableMetaInfo->pTableMeta, pExpr->colInfo.colIndex); + SSchema p1 = {0}; + if (pExpr->colInfo.colIndex != TSDB_TBNAME_COLUMN_INDEX) { + p1 = *tscGetTableColumnSchema(pTableMetaInfo->pTableMeta, pExpr->colInfo.colIndex); + } else { + p1 = tGetTableNameColumnSchema(); + } int32_t inter = 0; int16_t type = -1; @@ -745,7 +749,8 @@ int32_t tscLocalReducerEnvCreate(SSqlObj *pSql, tExtMemBuffer ***pMemBuffer, tOr functionId = TSDB_FUNC_LAST; } - getResultDataInfo(p1->type, p1->bytes, functionId, 0, &type, &bytes, &inter, 0, false); + int32_t ret = getResultDataInfo(p1.type, p1.bytes, functionId, 0, &type, &bytes, &inter, 0, false); + assert(ret == TSDB_CODE_SUCCESS); } pSchema[i].type = (uint8_t)type; @@ -920,7 +925,7 @@ static void genFinalResWithoutFill(SSqlRes* pRes, SLocalReducer *pLocalReducer, savePrevRecordAndSetupFillInfo(pLocalReducer, pQueryInfo, pLocalReducer->pFillInfo); } - memcpy(pRes->data, pBeforeFillData->data, (size_t)(pRes->numOfRows * pLocalReducer->finalRowSize)); + memcpy(pRes->data, pBeforeFillData->data, (size_t)(pRes->numOfRows * pLocalReducer->finalModel->rowSize)); pRes->numOfClauseTotal += pRes->numOfRows; pBeforeFillData->num = 0; @@ -1256,7 +1261,7 @@ bool genFinalResults(SSqlObj *pSql, SLocalReducer *pLocalReducer, bool noMoreCur tColModelCompact(pModel, pResBuf, pModel->capacity); if (tscIsSecondStageQuery(pQueryInfo)) { - pLocalReducer->finalRowSize = doArithmeticCalculate(pQueryInfo, pResBuf, pModel->rowSize, pLocalReducer->finalRowSize); + doArithmeticCalculate(pQueryInfo, pResBuf, pModel->rowSize, pLocalReducer->finalModel->rowSize); } #ifdef _DEBUG_VIEW @@ -1627,7 +1632,8 @@ void tscInitResObjForLocalQuery(SSqlObj *pObj, int32_t numOfRes, int32_t rowLen) } int32_t doArithmeticCalculate(SQueryInfo* pQueryInfo, tFilePage* pOutput, int32_t rowSize, int32_t finalRowSize) { - char* pbuf = calloc(1, pOutput->num * rowSize); + int32_t maxRowSize = MAX(rowSize, finalRowSize); + char* pbuf = calloc(1, pOutput->num * maxRowSize); size_t size = tscNumOfFields(pQueryInfo); SArithmeticSupport arithSup = {0}; @@ -1660,7 +1666,6 @@ int32_t doArithmeticCalculate(SQueryInfo* pQueryInfo, tFilePage* pOutput, int32_ offset += pSup->field.bytes; } - assert(finalRowSize <= rowSize); memcpy(pOutput->data, pbuf, pOutput->num * offset); tfree(pbuf); diff --git a/src/client/src/tscProfile.c b/src/client/src/tscProfile.c index acc5acd786..18fc79c474 100644 --- a/src/client/src/tscProfile.c +++ b/src/client/src/tscProfile.c @@ -39,6 +39,7 @@ void tscInitConnCb(void *param, TAOS_RES *result, int code) { tscSlowQueryConnInitialized = true; tscSaveSlowQueryFp(sql, NULL); } + taos_free_result(result); } void tscAddIntoSqlList(SSqlObj *pSql) { @@ -69,6 +70,7 @@ void tscSaveSlowQueryFpCb(void *param, TAOS_RES *result, int code) { } else { tscDebug("success to save slow query, code:%d", code); } + taos_free_result(result); } void tscSaveSlowQueryFp(void *handle, void *tmrId) { diff --git a/src/client/src/tscSQLParser.c b/src/client/src/tscSQLParser.c index 743cb42eb3..1e6f931fe3 100644 --- a/src/client/src/tscSQLParser.c +++ b/src/client/src/tscSQLParser.c @@ -4248,7 +4248,7 @@ static int32_t getTagQueryCondExpr(SSqlCmd* pCmd, SQueryInfo* pQueryInfo, SCondE tExprTreeDestroy(&p, NULL); taosArrayDestroy(colList); - if (taosArrayGetSize(pQueryInfo->tagCond.pCond) > 0 && !UTIL_TABLE_IS_SUPER_TABLE(pTableMetaInfo)) { + if (pQueryInfo->tagCond.pCond != NULL && taosArrayGetSize(pQueryInfo->tagCond.pCond) > 0 && !UTIL_TABLE_IS_SUPER_TABLE(pTableMetaInfo)) { return invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), "filter on tag not supported for normal table"); } } @@ -4256,6 +4256,7 @@ static int32_t getTagQueryCondExpr(SSqlCmd* pCmd, SQueryInfo* pQueryInfo, SCondE pCondExpr->pTagCond = NULL; return ret; } + int32_t parseWhereClause(SQueryInfo* pQueryInfo, tSQLExpr** pExpr, SSqlObj* pSql) { if (pExpr == NULL) { return TSDB_CODE_SUCCESS; @@ -5102,7 +5103,7 @@ int32_t validateDNodeConfig(tDCLSQL* pOptions) { const int tokenDebugFlagEnd = 20; const SDNodeDynConfOption cfgOptions[] = { {"resetLog", 8}, {"resetQueryCache", 15}, {"balance", 7}, {"monitor", 7}, - {"debugFlag", 9}, {"monitorDebugFlag", 16}, {"vDebugFlag", 10}, {"mDebugFlag", 10}, + {"debugFlag", 9}, {"monDebugFlag", 12}, {"vDebugFlag", 10}, {"mDebugFlag", 10}, {"cDebugFlag", 10}, {"httpDebugFlag", 13}, {"qDebugflag", 10}, {"sdbDebugFlag", 12}, {"uDebugFlag", 10}, {"tsdbDebugFlag", 13}, {"sDebugflag", 10}, {"rpcDebugFlag", 12}, {"dDebugFlag", 10}, {"mqttDebugFlag", 13}, {"wDebugFlag", 10}, {"tmrDebugFlag", 12}, @@ -5306,15 +5307,18 @@ int32_t parseLimitClause(SSqlCmd* pCmd, SQueryInfo* pQueryInfo, int32_t clauseIn // keep original limitation value in globalLimit pQueryInfo->clauseLimit = pQueryInfo->limit.limit; - pQueryInfo->prjOffset = pQueryInfo->limit.offset; + pQueryInfo->prjOffset = pQueryInfo->limit.offset; + pQueryInfo->tableLimit = -1; if (tscOrderedProjectionQueryOnSTable(pQueryInfo, 0)) { /* - * the limitation/offset value should be removed during retrieve data from virtual node, - * since the global order are done in client side, so the limitation should also - * be done at the client side. + * the offset value should be removed during retrieve data from virtual node, since the + * global order are done in client side, so the offset is applied at the client side + * However, note that the maximum allowed number of result for each table should be less + * than or equal to the value of limit. */ if (pQueryInfo->limit.limit > 0) { + pQueryInfo->tableLimit = pQueryInfo->limit.limit + pQueryInfo->limit.offset; pQueryInfo->limit.limit = -1; } @@ -6648,7 +6652,7 @@ int32_t exprTreeFromSqlExpr(SSqlCmd* pCmd, tExprNode **pExpr, const tSQLExpr* pS return TSDB_CODE_SUCCESS; } else { - return TSDB_CODE_TSC_INVALID_SQL; + return invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), "not support filter expression"); } } else { diff --git a/src/client/src/tscServer.c b/src/client/src/tscServer.c index 3a2c673f34..66ca4faa61 100644 --- a/src/client/src/tscServer.c +++ b/src/client/src/tscServer.c @@ -165,10 +165,10 @@ void tscProcessHeartBeatRsp(void *param, TAOS_RES *tres, int code) { if (pRsp->streamId) tscKillStream(pObj, htonl(pRsp->streamId)); } } else { - tscDebug("%p heartbeat failed, code:%s", pObj->pHb, tstrerror(code)); + tscDebug("%" PRId64 " heartbeat failed, code:%s", pObj->hbrid, tstrerror(code)); } - if (pObj->pHb != NULL) { + if (pObj->hbrid != 0) { int32_t waitingDuring = tsShellActivityTimer * 500; tscDebug("%p send heartbeat in %dms", pSql, waitingDuring); @@ -183,20 +183,12 @@ void tscProcessActivityTimer(void *handle, void *tmrId) { STscObj *pObj = taosAcquireRef(tscRefId, rid); if (pObj == NULL) return; - SSqlObj* pHB = pObj->pHb; - - void** p = taosCacheAcquireByKey(tscObjCache, &pHB, sizeof(TSDB_CACHE_PTR_TYPE)); - if (p == NULL) { - tscWarn("%p HB object has been released already", pHB); - taosReleaseRef(tscRefId, pObj->rid); - return; - } - - assert(*pHB->self == pHB); + SSqlObj* pHB = taosAcquireRef(tscObjRef, pObj->hbrid); + assert(pHB->self == pObj->hbrid); pHB->retry = 0; int32_t code = tscProcessSql(pHB); - taosCacheRelease(tscObjCache, (void**) &p, false); + taosReleaseRef(tscObjRef, pObj->hbrid); if (code != TSDB_CODE_SUCCESS) { tscError("%p failed to sent HB to server, reason:%s", pHB, tstrerror(code)); @@ -226,7 +218,7 @@ int tscSendMsgToServer(SSqlObj *pSql) { .msgType = pSql->cmd.msgType, .pCont = pMsg, .contLen = pSql->cmd.payloadLen, - .ahandle = pSql, + .ahandle = (void*)pSql->self, .handle = NULL, .code = 0 }; @@ -237,26 +229,24 @@ int tscSendMsgToServer(SSqlObj *pSql) { void tscProcessMsgFromServer(SRpcMsg *rpcMsg, SRpcEpSet *pEpSet) { TSDB_CACHE_PTR_TYPE handle = (TSDB_CACHE_PTR_TYPE) rpcMsg->ahandle; - void** p = taosCacheAcquireByKey(tscObjCache, &handle, sizeof(TSDB_CACHE_PTR_TYPE)); - if (p == NULL) { + SSqlObj* pSql = (SSqlObj*)taosAcquireRef(tscObjRef, handle); + if (pSql == NULL) { rpcFreeCont(rpcMsg->pCont); return; } - - SSqlObj* pSql = *p; - assert(pSql != NULL); + assert(pSql->self == handle); STscObj *pObj = pSql->pTscObj; SSqlRes *pRes = &pSql->res; SSqlCmd *pCmd = &pSql->cmd; - assert(*pSql->self == pSql); pSql->rpcRid = -1; if (pObj->signature != pObj) { tscDebug("%p DB connection is closed, cmd:%d pObj:%p signature:%p", pSql, pCmd->command, pObj, pObj->signature); - taosCacheRelease(tscObjCache, (void**) &p, true); + taosRemoveRef(tscObjRef, pSql->self); + taosReleaseRef(tscObjRef, pSql->self); rpcFreeCont(rpcMsg->pCont); return; } @@ -266,10 +256,8 @@ void tscProcessMsgFromServer(SRpcMsg *rpcMsg, SRpcEpSet *pEpSet) { tscDebug("%p sqlObj needs to be released or DB connection is closed, cmd:%d type:%d, pObj:%p signature:%p", pSql, pCmd->command, pQueryInfo->type, pObj, pObj->signature); - void** p1 = p; - taosCacheRelease(tscObjCache, (void**) &p1, false); - - taosCacheRelease(tscObjCache, (void**) &p, true); + taosRemoveRef(tscObjRef, pSql->self); + taosReleaseRef(tscObjRef, pSql->self); rpcFreeCont(rpcMsg->pCont); return; } @@ -312,7 +300,7 @@ void tscProcessMsgFromServer(SRpcMsg *rpcMsg, SRpcEpSet *pEpSet) { // if there is an error occurring, proceed to the following error handling procedure. if (rpcMsg->code == TSDB_CODE_TSC_ACTION_IN_PROGRESS) { - taosCacheRelease(tscObjCache, (void**) &p, false); + taosReleaseRef(tscObjRef, pSql->self); rpcFreeCont(rpcMsg->pCont); return; } @@ -380,11 +368,10 @@ void tscProcessMsgFromServer(SRpcMsg *rpcMsg, SRpcEpSet *pEpSet) { (*pSql->fp)(pSql->param, pSql, rpcMsg->code); } - void** p1 = p; - taosCacheRelease(tscObjCache, (void**) &p1, false); + taosReleaseRef(tscObjRef, pSql->self); if (shouldFree) { // in case of table-meta/vgrouplist query, automatically free it - taosCacheRelease(tscObjCache, (void **)&p, true); + taosRemoveRef(tscObjRef, pSql->self); tscDebug("%p sqlObj is automatically freed", pSql); } @@ -687,6 +674,7 @@ int tscBuildQueryMsg(SSqlObj *pSql, SSqlInfo *pInfo) { pQueryMsg->tagNameRelType = htons(pQueryInfo->tagCond.relType); pQueryMsg->numOfTags = htonl(numOfTags); pQueryMsg->queryType = htonl(pQueryInfo->type); + pQueryMsg->tableLimit = htobe64(pQueryInfo->tableLimit); size_t numOfOutput = tscSqlExprNumOfExprs(pQueryInfo); pQueryMsg->numOfOutput = htons((int16_t)numOfOutput); // this is the stage one output column number @@ -2010,7 +1998,7 @@ int tscProcessShowRsp(SSqlObj *pSql) { // TODO multithread problem static void createHBObj(STscObj* pObj) { - if (pObj->pHb != NULL) { + if (pObj->hbrid != 0) { return; } @@ -2042,7 +2030,7 @@ static void createHBObj(STscObj* pObj) { registerSqlObj(pSql); tscDebug("%p HB is allocated, pObj:%p", pSql, pObj); - pObj->pHb = pSql; + pObj->hbrid = pSql->self; } int tscProcessConnectRsp(SSqlObj *pSql) { diff --git a/src/client/src/tscSql.c b/src/client/src/tscSql.c index 761a222b26..70b91bd685 100644 --- a/src/client/src/tscSql.c +++ b/src/client/src/tscSql.c @@ -292,8 +292,8 @@ void taos_close(TAOS *taos) { pObj->signature = NULL; taosTmrStopA(&(pObj->pTimer)); - SSqlObj* pHb = pObj->pHb; - if (pHb != NULL && atomic_val_compare_exchange_ptr(&pObj->pHb, pHb, 0) == pHb) { + SSqlObj* pHb = (SSqlObj*)taosAcquireRef(tscObjRef, pObj->hbrid); + if (pHb != NULL) { if (pHb->rpcRid > 0) { // wait for rsp from dnode rpcCancelRequest(pHb->rpcRid); pHb->rpcRid = -1; @@ -301,6 +301,7 @@ void taos_close(TAOS *taos) { tscDebug("%p HB is freed", pHb); taos_free_result(pHb); + taosReleaseRef(tscObjRef, pHb->self); } int32_t ref = T_REF_DEC(pObj); @@ -622,8 +623,7 @@ void taos_free_result(TAOS_RES *res) { bool freeNow = tscKillQueryInDnode(pSql); if (freeNow) { tscDebug("%p free sqlObj in cache", pSql); - SSqlObj** p = pSql->self; - taosCacheRelease(tscObjCache, (void**) &p, true); + taosReleaseRef(tscObjRef, pSql->self); } } @@ -716,13 +716,7 @@ static void tscKillSTableQuery(SSqlObj *pSql) { continue; } - void** p = taosCacheAcquireByKey(tscObjCache, &pSub, sizeof(TSDB_CACHE_PTR_TYPE)); - if (p == NULL) { - continue; - } - - SSqlObj* pSubObj = (SSqlObj*) (*p); - assert(pSubObj->self == (SSqlObj**) p); + SSqlObj* pSubObj = pSub; pSubObj->res.code = TSDB_CODE_TSC_QUERY_CANCELLED; if (pSubObj->rpcRid > 0) { @@ -731,7 +725,7 @@ static void tscKillSTableQuery(SSqlObj *pSql) { } tscQueueAsyncRes(pSubObj); - taosCacheRelease(tscObjCache, (void**) &p, false); + taosReleaseRef(tscObjRef, pSubObj->self); } tscDebug("%p super table query cancelled", pSql); diff --git a/src/client/src/tscSub.c b/src/client/src/tscSub.c index a782b53e75..52b74f7502 100644 --- a/src/client/src/tscSub.c +++ b/src/client/src/tscSub.c @@ -157,7 +157,7 @@ static SSub* tscCreateSubscription(STscObj* pObj, const char* topic, const char* registerSqlObj(pSql); - code = tsParseSql(pSql, false); + code = tsParseSql(pSql, true); if (code == TSDB_CODE_TSC_ACTION_IN_PROGRESS) { tsem_wait(&pSub->sem); code = pSql->res.code; @@ -168,7 +168,7 @@ static SSub* tscCreateSubscription(STscObj* pObj, const char* topic, const char* goto fail; } - if (pSql->cmd.command != TSDB_SQL_SELECT) { + if (pSql->cmd.command != TSDB_SQL_SELECT && pSql->cmd.command != TSDB_SQL_RETRIEVE_EMPTY_RESULT) { line = __LINE__; code = TSDB_CODE_TSC_INVALID_SQL; goto fail; @@ -179,10 +179,10 @@ static SSub* tscCreateSubscription(STscObj* pObj, const char* topic, const char* fail: tscError("tscCreateSubscription failed at line %d, reason: %s", line, tstrerror(code)); if (pSql != NULL) { - if (pSql->self != NULL) { - taos_free_result(pSql); + if (pSql->self != 0) { + taosReleaseRef(tscObjRef, pSql->self); } else { - tscFreeSqlObj(pSql); + tscFreeSqlObj(pSql); } pSql = NULL; @@ -401,9 +401,11 @@ TAOS_SUB *taos_subscribe(TAOS *taos, int restart, const char* topic, const char tscLoadSubscriptionProgress(pSub); } - if (!tscUpdateSubscription(pObj, pSub)) { - taos_unsubscribe(pSub, 1); - return NULL; + if (pSub->pSql->cmd.command == TSDB_SQL_SELECT) { + if (!tscUpdateSubscription(pObj, pSub)) { + taos_unsubscribe(pSub, 1); + return NULL; + } } pSub->interval = interval; @@ -417,10 +419,80 @@ TAOS_SUB *taos_subscribe(TAOS *taos, int restart, const char* topic, const char return pSub; } +SSqlObj* recreateSqlObj(SSub* pSub) { + SSqlObj* pSql = calloc(1, sizeof(SSqlObj)); + if (pSql == NULL) { + return NULL; + } + + pSql->signature = pSql; + pSql->pTscObj = pSub->taos; + + SSqlCmd* pCmd = &pSql->cmd; + SSqlRes* pRes = &pSql->res; + if (tsem_init(&pSql->rspSem, 0, 0) == -1) { + tscFreeSqlObj(pSql); + return NULL; + } + + pSql->param = pSub; + pSql->maxRetry = TSDB_MAX_REPLICA; + pSql->fp = asyncCallback; + pSql->fetchFp = asyncCallback; + pSql->sqlstr = strdup(pSub->pSql->sqlstr); + if (pSql->sqlstr == NULL) { + tscFreeSqlObj(pSql); + return NULL; + } + + pRes->qhandle = 0; + pRes->numOfRows = 1; + + int code = tscAllocPayload(pCmd, TSDB_DEFAULT_PAYLOAD_SIZE); + if (code != TSDB_CODE_SUCCESS) { + tscFreeSqlObj(pSql); + return NULL; + } + + registerSqlObj(pSql); + + code = tsParseSql(pSql, true); + if (code == TSDB_CODE_TSC_ACTION_IN_PROGRESS) { + tsem_wait(&pSub->sem); + code = pSql->res.code; + } + + if (code != TSDB_CODE_SUCCESS) { + taosReleaseRef(tscObjRef, pSql->self); + return NULL; + } + + if (pSql->cmd.command != TSDB_SQL_SELECT) { + taosReleaseRef(tscObjRef, pSql->self); + return NULL; + } + + return pSql; +} + TAOS_RES *taos_consume(TAOS_SUB *tsub) { SSub *pSub = (SSub *)tsub; if (pSub == NULL) return NULL; + if (pSub->pSql->cmd.command == TSDB_SQL_RETRIEVE_EMPTY_RESULT) { + SSqlObj* pSql = recreateSqlObj(pSub); + if (pSql == NULL) { + return NULL; + } + if (pSub->pSql->self != 0) { + taosReleaseRef(tscObjRef, pSub->pSql->self); + } else { + tscFreeSqlObj(pSub->pSql); + } + pSub->pSql = pSql; + pSql->pSubscription = pSub; + } + tscSaveSubscriptionProgress(pSub); SSqlObj *pSql = pSub->pSql; @@ -512,10 +584,13 @@ void taos_unsubscribe(TAOS_SUB *tsub, int keepProgress) { } if (pSub->pSql != NULL) { - taos_free_result(pSub->pSql); + if (pSub->pSql->self != 0) { + taosReleaseRef(tscObjRef, pSub->pSql->self); + } else { + tscFreeSqlObj(pSub->pSql); + } } - tscFreeSqlObj(pSub->pSql); taosArrayDestroy(pSub->progress); tsem_destroy(&pSub->sem); memset(pSub, 0, sizeof(*pSub)); diff --git a/src/client/src/tscSubquery.c b/src/client/src/tscSubquery.c index 6ebbeeef41..819a323db5 100644 --- a/src/client/src/tscSubquery.c +++ b/src/client/src/tscSubquery.c @@ -2198,6 +2198,9 @@ int32_t tscHandleInsertRetry(SSqlObj* pSql) { STableDataBlocks* pTableDataBlock = taosArrayGetP(pCmd->pDataBlocks, pSupporter->index); int32_t code = tscCopyDataBlockToPayload(pSql, pTableDataBlock); + // free the data block created from insert sql string + pCmd->pDataBlocks = tscDestroyBlockArrayList(pCmd->pDataBlocks); + if ((pRes->code = code)!= TSDB_CODE_SUCCESS) { tscQueueAsyncRes(pSql); return code; // here the pSql may have been released already. diff --git a/src/client/src/tscSystem.c b/src/client/src/tscSystem.c index a5bc1ae7ad..03b6ac8404 100644 --- a/src/client/src/tscSystem.c +++ b/src/client/src/tscSystem.c @@ -15,7 +15,7 @@ #include "os.h" #include "taosmsg.h" -#include "tcache.h" +#include "tref.h" #include "trpc.h" #include "tsystem.h" #include "ttimer.h" @@ -31,7 +31,7 @@ // global, not configurable SCacheObj* tscMetaCache; -SCacheObj* tscObjCache; +int tscObjRef = -1; void * tscTmr; void * tscQhandle; void * tscCheckDiskUsageTmr; @@ -139,7 +139,7 @@ void taos_init_imp(void) { int64_t refreshTime = 10; // 10 seconds by default if (tscMetaCache == NULL) { tscMetaCache = taosCacheInit(TSDB_DATA_TYPE_BINARY, refreshTime, false, tscFreeTableMetaHelper, "tableMeta"); - tscObjCache = taosCacheInit(TSDB_CACHE_PTR_KEY, refreshTime / 2, false, tscFreeRegisteredSqlObj, "sqlObj"); + tscObjRef = taosOpenRef(40960, tscFreeRegisteredSqlObj); } tscRefId = taosOpenRef(200, tscCloseTscObj); @@ -162,9 +162,9 @@ void taos_cleanup(void) { taosCacheCleanup(m); } - m = tscObjCache; - if (m != NULL && atomic_val_compare_exchange_ptr(&tscObjCache, m, 0) == m) { - taosCacheCleanup(m); + int refId = atomic_exchange_32(&tscObjRef, -1); + if (refId != -1) { + taosCloseRef(refId); } m = tscQhandle; diff --git a/src/client/src/tscUtil.c b/src/client/src/tscUtil.c index 077fd8aa00..a98132d319 100644 --- a/src/client/src/tscUtil.c +++ b/src/client/src/tscUtil.c @@ -447,20 +447,18 @@ static void tscFreeSubobj(SSqlObj* pSql) { void tscFreeRegisteredSqlObj(void *pSql) { assert(pSql != NULL); - SSqlObj** p = (SSqlObj**)pSql; - STscObj* pTscObj = (*p)->pTscObj; + SSqlObj* p = *(SSqlObj**)pSql; + STscObj* pTscObj = p->pTscObj; - assert((*p)->self != 0 && (*p)->self == (p)); - - SSqlObj* ptr = *p; - tscFreeSqlObj(*p); + assert(p->self != 0); + tscFreeSqlObj(p); int32_t ref = T_REF_DEC(pTscObj); assert(ref >= 0); - tscDebug("%p free sqlObj completed, tscObj:%p ref:%d", ptr, pTscObj, ref); + tscDebug("%p free sqlObj completed, tscObj:%p ref:%d", p, pTscObj, ref); if (ref == 0) { - tscDebug("%p all sqlObj freed, free tscObj:%p", ptr, pTscObj); + tscDebug("%p all sqlObj freed, free tscObj:%p", p, pTscObj); taosRemoveRef(tscRefId, pTscObj->rid); } } @@ -840,7 +838,6 @@ int32_t tscMergeTableDataBlocks(SSqlObj* pSql, SArray* pTableDataBlockList) { // the length does not include the SSubmitBlk structure pBlocks->dataLen = htonl(finalLen); - dataBuf->numOfTables += 1; } @@ -1565,19 +1562,6 @@ void tscGetSrcColumnInfo(SSrcColumnInfo* pColInfo, SQueryInfo* pQueryInfo) { } } -void tscSetFreeHeatBeat(STscObj* pObj) { - if (pObj == NULL || pObj->signature != pObj || pObj->pHb == NULL) { - return; - } - - SSqlObj* pHeatBeat = pObj->pHb; - assert(pHeatBeat == pHeatBeat->signature); - - // to denote the heart-beat timer close connection and free all allocated resources - SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(&pHeatBeat->cmd, 0); - pQueryInfo->type = TSDB_QUERY_TYPE_FREE_RESOURCE; -} - /* * the following four kinds of SqlObj should not be freed * 1. SqlObj for stream computing @@ -1596,7 +1580,7 @@ bool tscShouldBeFreed(SSqlObj* pSql) { } STscObj* pTscObj = pSql->pTscObj; - if (pSql->pStream != NULL || pTscObj->pHb == pSql || pSql->pSubscription != NULL) { + if (pSql->pStream != NULL || pTscObj->hbrid == pSql->self || pSql->pSubscription != NULL) { return false; } @@ -1888,13 +1872,10 @@ void tscResetForNextRetrieve(SSqlRes* pRes) { } void registerSqlObj(SSqlObj* pSql) { - int32_t DEFAULT_LIFE_TIME = 2 * 600 * 1000; // 1200 sec - int32_t ref = T_REF_INC(pSql->pTscObj); tscDebug("%p add to tscObj:%p, ref:%d", pSql, pSql->pTscObj, ref); - TSDB_CACHE_PTR_TYPE p = (TSDB_CACHE_PTR_TYPE) pSql; - pSql->self = taosCachePut(tscObjCache, &p, sizeof(TSDB_CACHE_PTR_TYPE), &p, sizeof(TSDB_CACHE_PTR_TYPE), DEFAULT_LIFE_TIME); + pSql->self = taosAddRef(tscObjRef, pSql); } SSqlObj* createSimpleSubObj(SSqlObj* pSql, void (*fp)(), void* param, int32_t cmd) { diff --git a/src/common/inc/tglobal.h b/src/common/inc/tglobal.h index efe3d7678a..7ba7260af2 100644 --- a/src/common/inc/tglobal.h +++ b/src/common/inc/tglobal.h @@ -125,6 +125,9 @@ extern char tsMonitorDbName[]; extern char tsInternalPass[]; extern int32_t tsMonitorInterval; +// stream +extern int32_t tsEnableStream; + // internal extern int32_t tsPrintAuth; extern int32_t tscEmbedded; @@ -176,7 +179,7 @@ extern int32_t tmrDebugFlag; extern int32_t sdbDebugFlag; extern int32_t httpDebugFlag; extern int32_t mqttDebugFlag; -extern int32_t monitorDebugFlag; +extern int32_t monDebugFlag; extern int32_t uDebugFlag; extern int32_t rpcDebugFlag; extern int32_t odbcDebugFlag; diff --git a/src/common/src/tglobal.c b/src/common/src/tglobal.c index f8bb965d28..dd4b738949 100644 --- a/src/common/src/tglobal.c +++ b/src/common/src/tglobal.c @@ -161,6 +161,9 @@ char tsMonitorDbName[TSDB_DB_NAME_LEN] = "log"; char tsInternalPass[] = "secretkey"; int32_t tsMonitorInterval = 30; // seconds +// stream +int32_t tsEnableStream = 1; + // internal int32_t tsPrintAuth = 0; int32_t tscEmbedded = 0; @@ -200,13 +203,13 @@ int32_t tsNumOfLogLines = 10000000; int32_t mDebugFlag = 135; int32_t sdbDebugFlag = 135; int32_t dDebugFlag = 135; -int32_t vDebugFlag = 131; +int32_t vDebugFlag = 135; int32_t cDebugFlag = 131; int32_t jniDebugFlag = 131; int32_t odbcDebugFlag = 131; int32_t httpDebugFlag = 131; int32_t mqttDebugFlag = 131; -int32_t monitorDebugFlag = 131; +int32_t monDebugFlag = 131; int32_t qDebugFlag = 131; int32_t rpcDebugFlag = 131; int32_t uDebugFlag = 131; @@ -216,9 +219,9 @@ int32_t wDebugFlag = 135; int32_t tsdbDebugFlag = 131; int32_t cqDebugFlag = 135; -int32_t (*monitorStartSystemFp)() = NULL; -void (*monitorStopSystemFp)() = NULL; -void (*monitorExecuteSQLFp)(char *sql) = NULL; +int32_t (*monStartSystemFp)() = NULL; +void (*monStopSystemFp)() = NULL; +void (*monExecuteSQLFp)(char *sql) = NULL; char *qtypeStr[] = {"rpc", "fwd", "wal", "cq", "query"}; @@ -235,7 +238,7 @@ void taosSetAllDebugFlag() { odbcDebugFlag = debugFlag; httpDebugFlag = debugFlag; mqttDebugFlag = debugFlag; - monitorDebugFlag = debugFlag; + monDebugFlag = debugFlag; qDebugFlag = debugFlag; rpcDebugFlag = debugFlag; uDebugFlag = debugFlag; @@ -276,15 +279,15 @@ bool taosCfgDynamicOptions(char *msg) { if (strncasecmp(cfg->option, "monitor", olen) == 0) { if (1 == vint) { - if (monitorStartSystemFp) { - (*monitorStartSystemFp)(); + if (monStartSystemFp) { + (*monStartSystemFp)(); uInfo("monitor is enabled"); } else { uError("monitor can't be updated, for monitor not initialized"); } } else { - if (monitorStopSystemFp) { - (*monitorStopSystemFp)(); + if (monStopSystemFp) { + (*monStopSystemFp)(); uInfo("monitor is disabled"); } else { uError("monitor can't be updated, for monitor not initialized"); @@ -307,8 +310,8 @@ bool taosCfgDynamicOptions(char *msg) { } if (strncasecmp(option, "resetQueryCache", 15) == 0) { - if (monitorExecuteSQLFp) { - (*monitorExecuteSQLFp)("resetQueryCache"); + if (monExecuteSQLFp) { + (*monExecuteSQLFp)("resetQueryCache"); uInfo("resetquerycache is executed"); } else { uError("resetquerycache can't be executed, for monitor not started"); @@ -1015,6 +1018,16 @@ static void doInitGlobalConfig(void) { cfg.unitType = TAOS_CFG_UTYPE_NONE; taosInitConfigOption(cfg); + cfg.option = "stream"; + cfg.ptr = &tsEnableStream; + cfg.valType = TAOS_CFG_VTYPE_INT32; + cfg.cfgType = TSDB_CFG_CTYPE_B_CONFIG | TSDB_CFG_CTYPE_B_SHOW; + cfg.minValue = 0; + cfg.maxValue = 1; + cfg.ptrLength = 1; + cfg.unitType = TAOS_CFG_UTYPE_NONE; + taosInitConfigOption(cfg); + cfg.option = "httpEnableRecordSql"; cfg.ptr = &tsHttpEnableRecordSql; cfg.valType = TAOS_CFG_VTYPE_INT32; @@ -1227,8 +1240,8 @@ static void doInitGlobalConfig(void) { cfg.unitType = TAOS_CFG_UTYPE_NONE; taosInitConfigOption(cfg); - cfg.option = "monitorDebugFlag"; - cfg.ptr = &monitorDebugFlag; + cfg.option = "monDebugFlag"; + cfg.ptr = &monDebugFlag; cfg.valType = TAOS_CFG_VTYPE_INT32; cfg.cfgType = TSDB_CFG_CTYPE_B_CONFIG | TSDB_CFG_CTYPE_B_LOG; cfg.minValue = 0; diff --git a/src/connector/go b/src/connector/go index 050667e5b4..8c58c512b6 160000 --- a/src/connector/go +++ b/src/connector/go @@ -1 +1 @@ -Subproject commit 050667e5b4d0eafa5387e4283e713559b421203f +Subproject commit 8c58c512b6acda8bcdfa48fdc7140227b5221766 diff --git a/src/connector/grafanaplugin b/src/connector/grafanaplugin index ec77d9049a..d598db167e 160000 --- a/src/connector/grafanaplugin +++ b/src/connector/grafanaplugin @@ -1 +1 @@ -Subproject commit ec77d9049a719dabfd1a7c1122a209e201861944 +Subproject commit d598db167eb256fe67409b7bb3d0eb7fffc3ff8c diff --git a/src/cq/src/cqMain.c b/src/cq/src/cqMain.c index 1be7552a89..efb8795962 100644 --- a/src/cq/src/cqMain.c +++ b/src/cq/src/cqMain.c @@ -40,15 +40,14 @@ typedef struct { int32_t vgId; + int32_t master; + int32_t num; // number of continuous streams char user[TSDB_USER_LEN]; char pass[TSDB_PASSWORD_LEN]; char db[TSDB_DB_NAME_LEN]; FCqWrite cqWrite; - void *ahandle; - int32_t num; // number of continuous streams struct SCqObj *pHead; void *dbConn; - int32_t master; void *tmrCtrl; pthread_mutex_t mutex; } SCqContext; @@ -70,6 +69,9 @@ static void cqProcessStreamRes(void *param, TAOS_RES *tres, TAOS_ROW row); static void cqCreateStream(SCqContext *pContext, SCqObj *pObj); void *cqOpen(void *ahandle, const SCqCfg *pCfg) { + if (tsEnableStream == 0) { + return NULL; + } SCqContext *pContext = calloc(sizeof(SCqContext), 1); if (pContext == NULL) { terrno = TAOS_SYSTEM_ERROR(errno); @@ -90,7 +92,6 @@ void *cqOpen(void *ahandle, const SCqCfg *pCfg) { tstrncpy(pContext->db, db, sizeof(pContext->db)); pContext->vgId = pCfg->vgId; pContext->cqWrite = pCfg->cqWrite; - pContext->ahandle = ahandle; tscEmbedded = 1; pthread_mutex_init(&pContext->mutex, NULL); @@ -101,6 +102,9 @@ void *cqOpen(void *ahandle, const SCqCfg *pCfg) { } void cqClose(void *handle) { + if (tsEnableStream == 0) { + return; + } SCqContext *pContext = handle; if (handle == NULL) return; @@ -131,6 +135,9 @@ void cqClose(void *handle) { } void cqStart(void *handle) { + if (tsEnableStream == 0) { + return; + } SCqContext *pContext = handle; if (pContext->dbConn || pContext->master) return; @@ -149,6 +156,9 @@ void cqStart(void *handle) { } void cqStop(void *handle) { + if (tsEnableStream == 0) { + return; + } SCqContext *pContext = handle; cInfo("vgId:%d, stop all CQs", pContext->vgId); if (pContext->dbConn == NULL || pContext->master == 0) return; @@ -176,6 +186,9 @@ void cqStop(void *handle) { } void *cqCreate(void *handle, uint64_t uid, int32_t tid, char *sqlStr, STSchema *pSchema) { + if (tsEnableStream == 0) { + return NULL; + } SCqContext *pContext = handle; SCqObj *pObj = calloc(sizeof(SCqObj), 1); @@ -205,6 +218,9 @@ void *cqCreate(void *handle, uint64_t uid, int32_t tid, char *sqlStr, STSchema * } void cqDrop(void *handle) { + if (tsEnableStream == 0) { + return; + } SCqObj *pObj = handle; SCqContext *pContext = pObj->pContext; @@ -241,8 +257,12 @@ static void doCreateStream(void *param, TAOS_RES *result, int32_t code) { SCqObj* pObj = (SCqObj*)param; SCqContext* pContext = pObj->pContext; SSqlObj* pSql = (SSqlObj*)result; - pContext->dbConn = pSql->pTscObj; + if (atomic_val_compare_exchange_ptr(&(pContext->dbConn), NULL, pSql->pTscObj) != NULL) { + taos_close(pSql->pTscObj); + } + pthread_mutex_lock(&pContext->mutex); cqCreateStream(pContext, pObj); + pthread_mutex_unlock(&pContext->mutex); } static void cqProcessCreateTimer(void *param, void *tmrId) { @@ -253,7 +273,9 @@ static void cqProcessCreateTimer(void *param, void *tmrId) { cDebug("vgId:%d, try connect to TDengine", pContext->vgId); taos_connect_a(NULL, pContext->user, pContext->pass, pContext->db, 0, doCreateStream, param, NULL); } else { + pthread_mutex_lock(&pContext->mutex); cqCreateStream(pContext, pObj); + pthread_mutex_unlock(&pContext->mutex); } } @@ -267,12 +289,14 @@ static void cqCreateStream(SCqContext *pContext, SCqObj *pObj) { } pObj->tmrId = 0; - pObj->pStream = taos_open_stream(pContext->dbConn, pObj->sqlStr, cqProcessStreamRes, 0, pObj, NULL); - if (pObj->pStream) { - pContext->num++; - cInfo("vgId:%d, id:%d CQ:%s is openned", pContext->vgId, pObj->tid, pObj->sqlStr); - } else { - cError("vgId:%d, id:%d CQ:%s, failed to open", pContext->vgId, pObj->tid, pObj->sqlStr); + if (pObj->pStream == NULL) { + pObj->pStream = taos_open_stream(pContext->dbConn, pObj->sqlStr, cqProcessStreamRes, 0, pObj, NULL); + if (pObj->pStream) { + pContext->num++; + cInfo("vgId:%d, id:%d CQ:%s is openned", pContext->vgId, pObj->tid, pObj->sqlStr); + } else { + cError("vgId:%d, id:%d CQ:%s, failed to open", pContext->vgId, pObj->tid, pObj->sqlStr); + } } } @@ -334,7 +358,7 @@ static void cqProcessStreamRes(void *param, TAOS_RES *tres, TAOS_ROW row) { pHead->version = 0; // write into vnode write queue - pContext->cqWrite(pContext->ahandle, pHead, TAOS_QTYPE_CQ, NULL); + pContext->cqWrite(pContext->vgId, pHead, TAOS_QTYPE_CQ, NULL); free(buffer); } diff --git a/src/cq/test/cqtest.c b/src/cq/test/cqtest.c index e1114fc024..41380f0d86 100644 --- a/src/cq/test/cqtest.c +++ b/src/cq/test/cqtest.c @@ -24,7 +24,7 @@ int64_t ver = 0; void *pCq = NULL; -int writeToQueue(void *pVnode, void *data, int type, void *pMsg) { +int writeToQueue(int32_t vgId, void *data, int type, void *pMsg) { return 0; } diff --git a/src/dnode/src/dnodeMain.c b/src/dnode/src/dnodeMain.c index 130be0af20..9f52dbd331 100644 --- a/src/dnode/src/dnodeMain.c +++ b/src/dnode/src/dnodeMain.c @@ -19,6 +19,7 @@ #include "tutil.h" #include "tconfig.h" #include "tglobal.h" +#include "tfile.h" #include "twal.h" #include "trpc.h" #include "dnode.h" @@ -55,6 +56,7 @@ typedef struct { } SDnodeComponent; static const SDnodeComponent tsDnodeComponents[] = { + {"tfile", tfInit, tfCleanup}, {"rpc", rpcInit, rpcCleanup}, {"storage", dnodeInitStorage, dnodeCleanupStorage}, {"dnodecfg", dnodeInitCfg, dnodeCleanupCfg}, diff --git a/src/dnode/src/dnodeMgmt.c b/src/dnode/src/dnodeMgmt.c index da1852e05e..5c01f64716 100644 --- a/src/dnode/src/dnodeMgmt.c +++ b/src/dnode/src/dnodeMgmt.c @@ -24,7 +24,7 @@ #include "tqueue.h" #include "tsync.h" #include "ttimer.h" -#include "tbalance.h" +#include "tbn.h" #include "tglobal.h" #include "dnode.h" #include "vnode.h" @@ -444,12 +444,12 @@ static int32_t dnodeProcessCreateMnodeMsg(SRpcMsg *pMsg) { SCreateMnodeMsg *pCfg = pMsg->pCont; pCfg->dnodeId = htonl(pCfg->dnodeId); if (pCfg->dnodeId != dnodeGetDnodeId()) { - dError("dnodeId:%d, in create mnode msg is not equal with saved dnodeId:%d", pCfg->dnodeId, dnodeGetDnodeId()); + dDebug("dnodeId:%d, in create mnode msg is not equal with saved dnodeId:%d", pCfg->dnodeId, dnodeGetDnodeId()); return TSDB_CODE_MND_DNODE_ID_NOT_CONFIGURED; } if (strcmp(pCfg->dnodeEp, tsLocalEp) != 0) { - dError("dnodeEp:%s, in create mnode msg is not equal with saved dnodeEp:%s", pCfg->dnodeEp, tsLocalEp); + dDebug("dnodeEp:%s, in create mnode msg is not equal with saved dnodeEp:%s", pCfg->dnodeEp, tsLocalEp); return TSDB_CODE_MND_DNODE_EP_NOT_CONFIGURED; } diff --git a/src/dnode/src/dnodeModule.c b/src/dnode/src/dnodeModule.c index bd9500ba51..7faa3c8913 100644 --- a/src/dnode/src/dnodeModule.c +++ b/src/dnode/src/dnodeModule.c @@ -78,10 +78,10 @@ static void dnodeAllocModules() { tsModule[TSDB_MOD_MONITOR].enable = (tsEnableMonitorModule == 1); tsModule[TSDB_MOD_MONITOR].name = "monitor"; - tsModule[TSDB_MOD_MONITOR].initFp = monitorInitSystem; - tsModule[TSDB_MOD_MONITOR].cleanUpFp = monitorCleanUpSystem; - tsModule[TSDB_MOD_MONITOR].startFp = monitorStartSystem; - tsModule[TSDB_MOD_MONITOR].stopFp = monitorStopSystem; + tsModule[TSDB_MOD_MONITOR].initFp = monInitSystem; + tsModule[TSDB_MOD_MONITOR].cleanUpFp = monCleanupSystem; + tsModule[TSDB_MOD_MONITOR].startFp = monStartSystem; + tsModule[TSDB_MOD_MONITOR].stopFp = monStopSystem; if (tsEnableMonitorModule) { dnodeSetModuleStatus(TSDB_MOD_MONITOR); } diff --git a/src/dnode/src/dnodePeer.c b/src/dnode/src/dnodePeer.c index 6bf22cee4e..6b5b28622b 100644 --- a/src/dnode/src/dnodePeer.c +++ b/src/dnode/src/dnodePeer.c @@ -182,6 +182,8 @@ void dnodeSendMsgToDnode(SRpcEpSet *epSet, SRpcMsg *rpcMsg) { void dnodeSendMsgToMnodeRecv(SRpcMsg *rpcMsg, SRpcMsg *rpcRsp) { SRpcEpSet epSet = {0}; dnodeGetEpSetForPeer(&epSet); + + assert(tsClientRpc != 0); rpcSendRecv(tsClientRpc, &epSet, rpcMsg, rpcRsp); } diff --git a/src/inc/monitor.h b/src/inc/monitor.h index b1229cca6b..1aefb0b848 100644 --- a/src/inc/monitor.h +++ b/src/inc/monitor.h @@ -47,13 +47,13 @@ typedef struct { int8_t accessState; } SAcctMonitorObj; -int32_t monitorInitSystem(); -int32_t monitorStartSystem(); -void monitorStopSystem(); -void monitorCleanUpSystem(); -void monitorSaveAcctLog(SAcctMonitorObj *pMonObj); -void monitorSaveLog(int32_t level, const char *const format, ...); -void monitorExecuteSQL(char *sql); +int32_t monInitSystem(); +int32_t monStartSystem(); +void monStopSystem(); +void monCleanupSystem(); +void monSaveAcctLog(SAcctMonitorObj *pMonObj); +void monSaveLog(int32_t level, const char *const format, ...); +void monExecuteSQL(char *sql); #ifdef __cplusplus } diff --git a/src/inc/taosdef.h b/src/inc/taosdef.h index 20c7af6a21..ec1e1fc330 100644 --- a/src/inc/taosdef.h +++ b/src/inc/taosdef.h @@ -257,7 +257,7 @@ void tsDataSwap(void *pLeft, void *pRight, int32_t type, int32_t size, void* buf #define TSDB_MAX_SAVED_SQL_LEN TSDB_MAX_COLUMNS * 64 #define TSDB_MAX_SQL_LEN TSDB_PAYLOAD_SIZE #define TSDB_MAX_SQL_SHOW_LEN 512 -#define TSDB_MAX_ALLOWED_SQL_LEN (8*1024*1024U) // sql length should be less than 8mb +#define TSDB_MAX_ALLOWED_SQL_LEN (1*1024*1024U) // sql length should be less than 1mb #define TSDB_APPNAME_LEN TSDB_UNI_LEN diff --git a/src/inc/taoserror.h b/src/inc/taoserror.h index 77ec5350ba..a720b68e59 100644 --- a/src/inc/taoserror.h +++ b/src/inc/taoserror.h @@ -184,6 +184,9 @@ TAOS_DEFINE_ERROR(TSDB_CODE_MND_TOO_MANY_DATABASES, 0, 0x0385, "Too many d TAOS_DEFINE_ERROR(TSDB_CODE_MND_DB_IN_DROPPING, 0, 0x0386, "Database not available") TAOS_DEFINE_ERROR(TSDB_CODE_MND_VGROUP_NOT_READY, 0, 0x0387, "Database unsynced") +TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_DB_OPTION_DAYS, 0, 0x0390, "Invalid database option: days out of range") +TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_DB_OPTION_KEEP, 0, 0x0391, "Invalid database option: keep >= keep2 >= keep1 >= days") + // dnode TAOS_DEFINE_ERROR(TSDB_CODE_DND_MSG_NOT_PROCESSED, 0, 0x0400, "Message not processed") TAOS_DEFINE_ERROR(TSDB_CODE_DND_OUT_OF_MEMORY, 0, 0x0401, "Dnode out of memory") @@ -367,6 +370,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_HTTP_OP_TAG_VALUE_TOO_LONG, 0, 0x11A4, "tag value TAOS_DEFINE_ERROR(TSDB_CODE_HTTP_OP_VALUE_NULL, 0, 0x11A5, "value not find") TAOS_DEFINE_ERROR(TSDB_CODE_HTTP_OP_VALUE_TYPE, 0, 0x11A6, "value type should be boolean, number or string") +// odbc TAOS_DEFINE_ERROR(TSDB_CODE_ODBC_OOM, 0, 0x2100, "out of memory") TAOS_DEFINE_ERROR(TSDB_CODE_ODBC_CONV_CHAR_NOT_NUM, 0, 0x2101, "convertion not a valid literal input") TAOS_DEFINE_ERROR(TSDB_CODE_ODBC_CONV_UNDEF, 0, 0x2102, "convertion undefined") @@ -390,7 +394,6 @@ TAOS_DEFINE_ERROR(TSDB_CODE_ODBC_CONV_SRC_BAD_SEQ, 0, 0x2113, "src bad se TAOS_DEFINE_ERROR(TSDB_CODE_ODBC_CONV_SRC_INCOMPLETE, 0, 0x2114, "src incomplete") TAOS_DEFINE_ERROR(TSDB_CODE_ODBC_CONV_SRC_GENERAL, 0, 0x2115, "src general") - #ifdef TAOS_ERROR_C }; #endif diff --git a/src/inc/taosmsg.h b/src/inc/taosmsg.h index b77db69c46..b4d3bec958 100644 --- a/src/inc/taosmsg.h +++ b/src/inc/taosmsg.h @@ -476,19 +476,21 @@ typedef struct { int16_t numOfGroupCols; // num of group by columns int16_t orderByIdx; int16_t orderType; // used in group by xx order by xxx + int64_t tableLimit; // limit the number of rows for each table, used in order by + limit in stable projection query. + int16_t prjOrder; // global order in super table projection query. int64_t limit; int64_t offset; uint32_t queryType; // denote another query process int16_t numOfOutput; // final output columns numbers int16_t tagNameRelType; // relation of tag criteria and tbname criteria - int16_t fillType; // interpolate type - uint64_t fillVal; // default value array list + int16_t fillType; // interpolate type + uint64_t fillVal; // default value array list int32_t secondStageOutput; - int32_t tsOffset; // offset value in current msg body, NOTE: ts list is compressed - int32_t tsLen; // total length of ts comp block - int32_t tsNumOfBlocks; // ts comp block numbers - int32_t tsOrder; // ts comp block order - int32_t numOfTags; // number of tags columns involved + int32_t tsOffset; // offset value in current msg body, NOTE: ts list is compressed + int32_t tsLen; // total length of ts comp block + int32_t tsNumOfBlocks; // ts comp block numbers + int32_t tsOrder; // ts comp block order + int32_t numOfTags; // number of tags columns involved SColumnInfo colList[]; } SQueryTableMsg; diff --git a/src/inc/tbalance.h b/src/inc/tbn.h similarity index 71% rename from src/inc/tbalance.h rename to src/inc/tbn.h index f0da4a3747..b9f4e3c608 100644 --- a/src/inc/tbalance.h +++ b/src/inc/tbn.h @@ -23,14 +23,14 @@ extern "C" { struct SVgObj; struct SDnodeObj; -int32_t balanceInit(); -void balanceCleanUp(); -void balanceAsyncNotify(); -void balanceSyncNotify(); -void balanceReset(); -int32_t balanceAllocVnodes(struct SVgObj *pVgroup); -int32_t balanceAlterDnode(struct SDnodeObj *pDnode, int32_t vnodeId, int32_t dnodeId); -int32_t balanceDropDnode(struct SDnodeObj *pDnode); +int32_t bnInit(); +void bnCleanUp(); +void bnNotify(); +void bnCheckModules(); +void bnReset(); +int32_t bnAllocVnodes(struct SVgObj *pVgroup); +int32_t bnAlterDnode(struct SDnodeObj *pDnode, int32_t vnodeId, int32_t dnodeId); +int32_t bnDropDnode(struct SDnodeObj *pDnode); #ifdef __cplusplus } diff --git a/src/inc/tcq.h b/src/inc/tcq.h index 7a0727f1b8..afa744a9c4 100644 --- a/src/inc/tcq.h +++ b/src/inc/tcq.h @@ -21,7 +21,7 @@ extern "C" { #include "tdataformat.h" -typedef int32_t (*FCqWrite)(void *ahandle, void *pHead, int32_t qtype, void *pMsg); +typedef int32_t (*FCqWrite)(int32_t vgId, void *pHead, int32_t qtype, void *pMsg); typedef struct { int32_t vgId; diff --git a/src/inc/tsync.h b/src/inc/tsync.h index 398e1bf97c..1303195ef1 100644 --- a/src/inc/tsync.h +++ b/src/inc/tsync.h @@ -85,6 +85,9 @@ typedef void (*FNotifyFlowCtrl)(int32_t vgId, int32_t level); // when data file is synced successfully, notity app typedef int32_t (*FNotifyFileSynced)(int32_t vgId, uint64_t fversion); +// get file version +typedef int32_t (*FGetVersion)(int32_t vgId, uint64_t *fver, uint64_t *vver); + typedef struct { int32_t vgId; // vgroup ID uint64_t version; // initial version @@ -97,6 +100,7 @@ typedef struct { FNotifyRole notifyRole; FNotifyFlowCtrl notifyFlowCtrl; FNotifyFileSynced notifyFileSynced; + FGetVersion getVersion; } SSyncInfo; typedef void *tsync_h; diff --git a/src/inc/twal.h b/src/inc/twal.h index 8dd3a8a912..1645de77aa 100644 --- a/src/inc/twal.h +++ b/src/inc/twal.h @@ -51,9 +51,8 @@ typedef struct { typedef void * twalh; // WAL HANDLE typedef int32_t FWalWrite(void *ahandle, void *pHead, int32_t qtype, void *pMsg); -int32_t walInit(); -void walCleanUp(); - +int32_t walInit(); +void walCleanUp(); twalh walOpen(char *path, SWalCfg *pCfg); int32_t walAlter(twalh pWal, SWalCfg *pCfg); void walStop(twalh); diff --git a/src/kit/shell/src/shellDarwin.c b/src/kit/shell/src/shellDarwin.c index 995b56f341..ddf7b21bef 100644 --- a/src/kit/shell/src/shellDarwin.c +++ b/src/kit/shell/src/shellDarwin.c @@ -407,7 +407,11 @@ void get_history_path(char *history) { sprintf(history, "%s/%s", getpwuid(getuid void clearScreen(int ecmd_pos, int cursor_pos) { struct winsize w; - ioctl(0, TIOCGWINSZ, &w); + if (ioctl(0, TIOCGWINSZ, &w) < 0 || w.ws_col == 0 || w.ws_row == 0) { + //fprintf(stderr, "No stream device, and use default value(col 120, row 30)\n"); + w.ws_col = 120; + w.ws_row = 30; + } int cursor_x = cursor_pos / w.ws_col; int cursor_y = cursor_pos % w.ws_col; @@ -425,8 +429,9 @@ void clearScreen(int ecmd_pos, int cursor_pos) { void showOnScreen(Command *cmd) { struct winsize w; if (ioctl(0, TIOCGWINSZ, &w) < 0 || w.ws_col == 0 || w.ws_row == 0) { - fprintf(stderr, "No stream device\n"); - exit(EXIT_FAILURE); + //fprintf(stderr, "No stream device\n"); + w.ws_col = 120; + w.ws_row = 30; } wchar_t wc; diff --git a/src/kit/shell/src/shellLinux.c b/src/kit/shell/src/shellLinux.c index 04f5824d8d..3226ad830a 100644 --- a/src/kit/shell/src/shellLinux.c +++ b/src/kit/shell/src/shellLinux.c @@ -413,7 +413,11 @@ void get_history_path(char *history) { snprintf(history, TSDB_FILENAME_LEN, "%s/ void clearScreen(int ecmd_pos, int cursor_pos) { struct winsize w; - ioctl(0, TIOCGWINSZ, &w); + if (ioctl(0, TIOCGWINSZ, &w) < 0 || w.ws_col == 0 || w.ws_row == 0) { + //fprintf(stderr, "No stream device, and use default value(col 120, row 30)\n"); + w.ws_col = 120; + w.ws_row = 30; + } int cursor_x = cursor_pos / w.ws_col; int cursor_y = cursor_pos % w.ws_col; @@ -431,8 +435,9 @@ void clearScreen(int ecmd_pos, int cursor_pos) { void showOnScreen(Command *cmd) { struct winsize w; if (ioctl(0, TIOCGWINSZ, &w) < 0 || w.ws_col == 0 || w.ws_row == 0) { - fprintf(stderr, "No stream device\n"); - exit(EXIT_FAILURE); + //fprintf(stderr, "No stream device\n"); + w.ws_col = 120; + w.ws_row = 30; } wchar_t wc; diff --git a/src/mnode/inc/mnodeAcct.h b/src/mnode/inc/mnodeAcct.h index 744a62f948..595dcca413 100644 --- a/src/mnode/inc/mnodeAcct.h +++ b/src/mnode/inc/mnodeAcct.h @@ -27,6 +27,7 @@ void mnodeCleanupAccts(); void mnodeGetStatOfAllAcct(SAcctInfo* pAcctInfo); void * mnodeGetAcct(char *acctName); void * mnodeGetNextAcct(void *pIter, SAcctObj **pAcct); +void mnodeCancelGetNextAcct(void *pIter); void mnodeIncAcctRef(SAcctObj *pAcct); void mnodeDecAcctRef(SAcctObj *pAcct); void mnodeAddDbToAcct(SAcctObj *pAcct, SDbObj *pDb); diff --git a/src/mnode/inc/mnodeDb.h b/src/mnode/inc/mnodeDb.h index 7cbd08ed92..9354b923d7 100644 --- a/src/mnode/inc/mnodeDb.h +++ b/src/mnode/inc/mnodeDb.h @@ -34,6 +34,7 @@ int64_t mnodeGetDbNum(); SDbObj *mnodeGetDb(char *db); SDbObj *mnodeGetDbByTableId(char *db); void * mnodeGetNextDb(void *pIter, SDbObj **pDb); +void mnodeCancelGetNextDb(void *pIter); void mnodeIncDbRef(SDbObj *pDb); void mnodeDecDbRef(SDbObj *pDb); bool mnodeCheckIsMonitorDB(char *db, char *monitordb); diff --git a/src/mnode/inc/mnodeDnode.h b/src/mnode/inc/mnodeDnode.h index b6ddb7a9bf..8bc29ef9ef 100644 --- a/src/mnode/inc/mnodeDnode.h +++ b/src/mnode/inc/mnodeDnode.h @@ -65,6 +65,7 @@ int32_t mnodeGetDnodesNum(); int32_t mnodeGetOnlinDnodesCpuCoreNum(); int32_t mnodeGetOnlineDnodesNum(); void * mnodeGetNextDnode(void *pIter, SDnodeObj **pDnode); +void mnodeCancelGetNextDnode(void *pIter); void mnodeIncDnodeRef(SDnodeObj *pDnode); void mnodeDecDnodeRef(SDnodeObj *pDnode); void * mnodeGetDnode(int32_t dnodeId); diff --git a/src/mnode/inc/mnodeInt.h b/src/mnode/inc/mnodeInt.h index 44626fd167..7a791d76e6 100644 --- a/src/mnode/inc/mnodeInt.h +++ b/src/mnode/inc/mnodeInt.h @@ -41,9 +41,9 @@ extern int32_t sdbDebugFlag; #define sdbDebug(...) { if (sdbDebugFlag & DEBUG_DEBUG) { taosPrintLog("SDB ", sdbDebugFlag, __VA_ARGS__); }} #define sdbTrace(...) { if (sdbDebugFlag & DEBUG_TRACE) { taosPrintLog("SDB ", sdbDebugFlag, __VA_ARGS__); }} -#define mLError(...) { monitorSaveLog(2, __VA_ARGS__); mError(__VA_ARGS__) } -#define mLWarn(...) { monitorSaveLog(1, __VA_ARGS__); mWarn(__VA_ARGS__) } -#define mLInfo(...) { monitorSaveLog(0, __VA_ARGS__); mInfo(__VA_ARGS__) } +#define mLError(...) { monSaveLog(2, __VA_ARGS__); mError(__VA_ARGS__) } +#define mLWarn(...) { monSaveLog(1, __VA_ARGS__); mWarn(__VA_ARGS__) } +#define mLInfo(...) { monSaveLog(0, __VA_ARGS__); mInfo(__VA_ARGS__) } #ifdef __cplusplus } diff --git a/src/mnode/inc/mnodeMnode.h b/src/mnode/inc/mnodeMnode.h index a28a03ea40..10cbcebe22 100644 --- a/src/mnode/inc/mnodeMnode.h +++ b/src/mnode/inc/mnodeMnode.h @@ -38,6 +38,7 @@ void mnodeDropMnodeLocal(int32_t dnodeId); void * mnodeGetMnode(int32_t mnodeId); int32_t mnodeGetMnodesNum(); void * mnodeGetNextMnode(void *pIter, struct SMnodeObj **pMnode); +void mnodeCancelGetNextMnode(void *pIter); void mnodeIncMnodeRef(struct SMnodeObj *pMnode); void mnodeDecMnodeRef(struct SMnodeObj *pMnode); diff --git a/src/mnode/inc/mnodeSdb.h b/src/mnode/inc/mnodeSdb.h index 29d8cf1207..90c4eac40a 100644 --- a/src/mnode/inc/mnodeSdb.h +++ b/src/mnode/inc/mnodeSdb.h @@ -79,10 +79,13 @@ typedef struct { int32_t (*fpRestored)(); } SSdbTableDesc; +int32_t sdbInitRef(); +void sdbCleanUpRef(); int32_t sdbInit(); void sdbCleanUp(); -void * sdbOpenTable(SSdbTableDesc *desc); -void sdbCloseTable(void *handle); +int64_t sdbOpenTable(SSdbTableDesc *desc); +void sdbCloseTable(int64_t rid); +void* sdbGetTableByRid(int64_t rid); bool sdbIsMaster(); bool sdbIsServing(); void sdbUpdateMnodeRoles(); @@ -92,9 +95,9 @@ int32_t sdbDeleteRow(SSdbRow *pRow); int32_t sdbUpdateRow(SSdbRow *pRow); int32_t sdbInsertRowToQueue(SSdbRow *pRow); -void *sdbGetRow(void *pTable, void *key); -void *sdbFetchRow(void *pTable, void *pIter, void **ppRow); -void sdbFreeIter(void *pIter); +void * sdbGetRow(void *pTable, void *key); +void * sdbFetchRow(void *pTable, void *pIter, void **ppRow); +void sdbFreeIter(void *pTable, void *pIter); void sdbIncRef(void *pTable, void *pRow); void sdbDecRef(void *pTable, void *pRow); int64_t sdbGetNumOfRows(void *pTable); diff --git a/src/mnode/inc/mnodeShow.h b/src/mnode/inc/mnodeShow.h index da66e71678..f985fe792d 100644 --- a/src/mnode/inc/mnodeShow.h +++ b/src/mnode/inc/mnodeShow.h @@ -26,8 +26,10 @@ void mnodeCleanUpShow(); typedef int32_t (*SShowMetaFp)(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn); typedef int32_t (*SShowRetrieveFp)(SShowObj *pShow, char *data, int32_t rows, void *pConn); +typedef void (*SShowFreeIterFp)(void *pIter); void mnodeAddShowMetaHandle(uint8_t showType, SShowMetaFp fp); void mnodeAddShowRetrieveHandle(uint8_t showType, SShowRetrieveFp fp); +void mnodeAddShowFreeIterHandle(uint8_t msgType, SShowFreeIterFp fp); void mnodeVacuumResult(char *data, int32_t numOfCols, int32_t rows, int32_t capacity, SShowObj *pShow); #ifdef __cplusplus diff --git a/src/mnode/inc/mnodeTable.h b/src/mnode/inc/mnodeTable.h index 7c0077aa60..bf04f26a90 100644 --- a/src/mnode/inc/mnodeTable.h +++ b/src/mnode/inc/mnodeTable.h @@ -31,6 +31,8 @@ void mnodeIncTableRef(void *pTable); void mnodeDecTableRef(void *pTable); void * mnodeGetNextChildTable(void *pIter, SCTableObj **pTable); void * mnodeGetNextSuperTable(void *pIter, SSTableObj **pTable); +void mnodeCancelGetNextChildTable(void *pIter); +void mnodeCancelGetNextSuperTable(void *pIter); void mnodeDropAllChildTables(SDbObj *pDropDb); void mnodeDropAllSuperTables(SDbObj *pDropDb); void mnodeDropAllChildTablesInVgroups(SVgObj *pVgroup); diff --git a/src/mnode/inc/mnodeUser.h b/src/mnode/inc/mnodeUser.h index 073460f9d3..156bc7aaeb 100644 --- a/src/mnode/inc/mnodeUser.h +++ b/src/mnode/inc/mnodeUser.h @@ -25,6 +25,7 @@ int32_t mnodeInitUsers(); void mnodeCleanupUsers(); SUserObj *mnodeGetUser(char *name); void * mnodeGetNextUser(void *pIter, SUserObj **pUser); +void mnodeCancelGetNextUser(void *pIter); void mnodeIncUserRef(SUserObj *pUser); void mnodeDecUserRef(SUserObj *pUser); SUserObj *mnodeGetUserFromConn(void *pConn); diff --git a/src/mnode/inc/mnodeVgroup.h b/src/mnode/inc/mnodeVgroup.h index 0e6d9dfde4..ee9ec7ae93 100644 --- a/src/mnode/inc/mnodeVgroup.h +++ b/src/mnode/inc/mnodeVgroup.h @@ -34,6 +34,7 @@ void mnodeDropAllDnodeVgroups(SDnodeObj *pDropDnode); //void mnodeUpdateAllDbVgroups(SDbObj *pAlterDb); void * mnodeGetNextVgroup(void *pIter, SVgObj **pVgroup); +void mnodeCancelGetNextVgroup(void *pIter); void mnodeUpdateVgroup(SVgObj *pVgroup); void mnodeUpdateVgroupStatus(SVgObj *pVgroup, SDnodeObj *pDnode, SVnodeLoad *pVload); void mnodeCheckUnCreatedVgroup(SDnodeObj *pDnode, SVnodeLoad *pVloads, int32_t openVnodes); diff --git a/src/mnode/src/mnodeAcct.c b/src/mnode/src/mnodeAcct.c index 9fff2f0229..6fba05674f 100644 --- a/src/mnode/src/mnodeAcct.c +++ b/src/mnode/src/mnodeAcct.c @@ -26,6 +26,7 @@ #include "mnodeUser.h" #include "mnodeVgroup.h" +int64_t tsAcctRid = -1; void * tsAcctSdb = NULL; static int32_t tsAcctUpdateSize; static int32_t mnodeCreateRootAcct(); @@ -114,7 +115,8 @@ int32_t mnodeInitAccts() { .fpRestored = mnodeAcctActionRestored }; - tsAcctSdb = sdbOpenTable(&desc); + tsAcctRid = sdbOpenTable(&desc); + tsAcctSdb = sdbGetTableByRid(tsAcctRid); if (tsAcctSdb == NULL) { mError("table:%s, failed to create hash", desc.name); return -1; @@ -126,7 +128,7 @@ int32_t mnodeInitAccts() { void mnodeCleanupAccts() { acctCleanUp(); - sdbCloseTable(tsAcctSdb); + sdbCloseTable(tsAcctRid); tsAcctSdb = NULL; } @@ -144,7 +146,6 @@ void mnodeGetStatOfAllAcct(SAcctInfo* pAcctInfo) { pAcctInfo->numOfTimeSeries += pAcct->acctInfo.numOfTimeSeries; mnodeDecAcctRef(pAcct); } - sdbFreeIter(pIter); SVgObj *pVgroup = NULL; pIter = NULL; @@ -158,7 +159,6 @@ void mnodeGetStatOfAllAcct(SAcctInfo* pAcctInfo) { pAcctInfo->totalPoints += pVgroup->pointsWritten; mnodeDecVgroupRef(pVgroup); } - sdbFreeIter(pIter); } void *mnodeGetAcct(char *name) { @@ -169,6 +169,10 @@ void *mnodeGetNextAcct(void *pIter, SAcctObj **pAcct) { return sdbFetchRow(tsAcctSdb, pIter, (void **)pAcct); } +void mnodeCancelGetNextAcct(void *pIter) { + sdbFreeIter(tsAcctSdb, pIter); +} + void mnodeIncAcctRef(SAcctObj *pAcct) { sdbIncRef(tsAcctSdb, pAcct); } diff --git a/src/mnode/src/mnodeCluster.c b/src/mnode/src/mnodeCluster.c index 5be67e4ad9..56229daffa 100644 --- a/src/mnode/src/mnodeCluster.c +++ b/src/mnode/src/mnodeCluster.c @@ -24,6 +24,7 @@ #include "mnodeShow.h" #include "tglobal.h" +int64_t tsClusterRid = -1; static void * tsClusterSdb = NULL; static int32_t tsClusterUpdateSize; static char tsClusterId[TSDB_CLUSTER_ID_LEN]; @@ -31,6 +32,7 @@ static int32_t mnodeCreateCluster(); static int32_t mnodeGetClusterMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn); static int32_t mnodeRetrieveClusters(SShowObj *pShow, char *data, int32_t rows, void *pConn); +static void mnodeCancelGetNextCluster(void *pIter); static int32_t mnodeClusterActionDestroy(SSdbRow *pRow) { tfree(pRow->pObj); @@ -100,21 +102,23 @@ int32_t mnodeInitCluster() { .fpRestored = mnodeClusterActionRestored }; - tsClusterSdb = sdbOpenTable(&desc); + tsClusterRid = sdbOpenTable(&desc); + tsClusterSdb = sdbGetTableByRid(tsClusterRid); if (tsClusterSdb == NULL) { - mError("table:%s, failed to create hash", desc.name); + mError("table:%s, rid:%" PRId64 ", failed to create hash", desc.name, tsClusterRid); return -1; } mnodeAddShowMetaHandle(TSDB_MGMT_TABLE_CLUSTER, mnodeGetClusterMeta); mnodeAddShowRetrieveHandle(TSDB_MGMT_TABLE_CLUSTER, mnodeRetrieveClusters); + mnodeAddShowFreeIterHandle(TSDB_MGMT_TABLE_CLUSTER, mnodeCancelGetNextCluster); mDebug("table:%s, hash is created", desc.name); return TSDB_CODE_SUCCESS; } void mnodeCleanupCluster() { - sdbCloseTable(tsClusterSdb); + sdbCloseTable(tsClusterRid); tsClusterSdb = NULL; } @@ -122,6 +126,10 @@ void *mnodeGetNextCluster(void *pIter, SClusterObj **pCluster) { return sdbFetchRow(tsClusterSdb, pIter, (void **)pCluster); } +void mnodeCancelGetNextCluster(void *pIter) { + sdbFreeIter(tsClusterSdb, pIter); +} + void mnodeIncClusterRef(SClusterObj *pCluster) { sdbIncRef(tsClusterSdb, pCluster); } @@ -167,7 +175,7 @@ void mnodeUpdateClusterId() { } mnodeDecClusterRef(pCluster); - sdbFreeIter(pIter); + mnodeCancelGetNextCluster(pIter); } static int32_t mnodeGetClusterMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn) { diff --git a/src/mnode/src/mnodeDb.c b/src/mnode/src/mnodeDb.c index d121208447..c971a945aa 100644 --- a/src/mnode/src/mnodeDb.c +++ b/src/mnode/src/mnodeDb.c @@ -20,7 +20,7 @@ #include "tgrant.h" #include "tglobal.h" #include "tname.h" -#include "tbalance.h" +#include "tbn.h" #include "tdataformat.h" #include "mnode.h" #include "mnodeDef.h" @@ -38,6 +38,7 @@ #include "mnodeVgroup.h" #define VG_LIST_SIZE 8 +int64_t tsDbRid = -1; static void * tsDbSdb = NULL; static int32_t tsDbUpdateSize; @@ -160,7 +161,8 @@ int32_t mnodeInitDbs() { .fpRestored = mnodeDbActionRestored }; - tsDbSdb = sdbOpenTable(&desc); + tsDbRid = sdbOpenTable(&desc); + tsDbSdb = sdbGetTableByRid(tsDbRid); if (tsDbSdb == NULL) { mError("failed to init db data"); return -1; @@ -171,6 +173,7 @@ int32_t mnodeInitDbs() { mnodeAddWriteMsgHandle(TSDB_MSG_TYPE_CM_DROP_DB, mnodeProcessDropDbMsg); mnodeAddShowMetaHandle(TSDB_MGMT_TABLE_DB, mnodeGetDbMeta); mnodeAddShowRetrieveHandle(TSDB_MGMT_TABLE_DB, mnodeRetrieveDbs); + mnodeAddShowFreeIterHandle(TSDB_MGMT_TABLE_DB, mnodeCancelGetNextDb); mDebug("table:dbs table is created"); return 0; @@ -180,6 +183,10 @@ void *mnodeGetNextDb(void *pIter, SDbObj **pDb) { return sdbFetchRow(tsDbSdb, pIter, (void **)pDb); } +void mnodeCancelGetNextDb(void *pIter) { + sdbFreeIter(tsDbSdb, pIter); +} + SDbObj *mnodeGetDb(char *db) { return (SDbObj *)sdbGetRow(tsDbSdb, db); } @@ -229,30 +236,28 @@ static int32_t mnodeCheckDbCfg(SDbCfg *pCfg) { if (pCfg->daysPerFile < TSDB_MIN_DAYS_PER_FILE || pCfg->daysPerFile > TSDB_MAX_DAYS_PER_FILE) { mError("invalid db option daysPerFile:%d valid range: [%d, %d]", pCfg->daysPerFile, TSDB_MIN_DAYS_PER_FILE, TSDB_MAX_DAYS_PER_FILE); - return TSDB_CODE_MND_INVALID_DB_OPTION; + return TSDB_CODE_MND_INVALID_DB_OPTION_DAYS; } if (pCfg->daysToKeep < TSDB_MIN_KEEP || pCfg->daysToKeep > TSDB_MAX_KEEP) { mError("invalid db option daysToKeep:%d valid range: [%d, %d]", pCfg->daysToKeep, TSDB_MIN_KEEP, TSDB_MAX_KEEP); - return TSDB_CODE_MND_INVALID_DB_OPTION; + return TSDB_CODE_MND_INVALID_DB_OPTION_KEEP; } if (pCfg->daysToKeep < pCfg->daysPerFile) { mError("invalid db option daysToKeep:%d should larger than daysPerFile:%d", pCfg->daysToKeep, pCfg->daysPerFile); - return TSDB_CODE_MND_INVALID_DB_OPTION; + return TSDB_CODE_MND_INVALID_DB_OPTION_KEEP; } -#if 0 if (pCfg->daysToKeep2 < TSDB_MIN_KEEP || pCfg->daysToKeep2 > pCfg->daysToKeep) { - mError("invalid db option daysToKeep2:%d valid range: [%d, %d]", pCfg->daysToKeep, TSDB_MIN_KEEP, pCfg->daysToKeep); - return TSDB_CODE_MND_INVALID_DB_OPTION; + mError("invalid db option daysToKeep2:%d valid range: [%d, %d]", pCfg->daysToKeep2, TSDB_MIN_KEEP, pCfg->daysToKeep); + return TSDB_CODE_MND_INVALID_DB_OPTION_KEEP; } if (pCfg->daysToKeep1 < TSDB_MIN_KEEP || pCfg->daysToKeep1 > pCfg->daysToKeep2) { mError("invalid db option daysToKeep1:%d valid range: [%d, %d]", pCfg->daysToKeep1, TSDB_MIN_KEEP, pCfg->daysToKeep2); - return TSDB_CODE_MND_INVALID_DB_OPTION; + return TSDB_CODE_MND_INVALID_DB_OPTION_KEEP; } -#endif if (pCfg->maxRowsPerFileBlock < TSDB_MIN_MAX_ROW_FBLOCK || pCfg->maxRowsPerFileBlock > TSDB_MAX_MAX_ROW_FBLOCK) { mError("invalid db option maxRowsPerFileBlock:%d valid range: [%d, %d]", pCfg->maxRowsPerFileBlock, @@ -491,7 +496,7 @@ void mnodeRemoveVgroupFromDb(SVgObj *pVgroup) { } void mnodeCleanupDbs() { - sdbCloseTable(tsDbSdb); + sdbCloseTable(tsDbRid); tsDbSdb = NULL; } @@ -986,8 +991,8 @@ static int32_t mnodeAlterDbCb(SMnodeMsg *pMsg, int32_t code) { SDbObj *pDb = pMsg->pDb; void *pIter = NULL; - while (1) { - SVgObj *pVgroup = NULL; + SVgObj *pVgroup = NULL; + while (1) { pIter = mnodeGetNextVgroup(pIter, &pVgroup); if (pVgroup == NULL) break; if (pVgroup->pDb == pDb) { @@ -995,12 +1000,11 @@ static int32_t mnodeAlterDbCb(SMnodeMsg *pMsg, int32_t code) { } mnodeDecVgroupRef(pVgroup); } - sdbFreeIter(pIter); mDebug("db:%s, all vgroups is altered", pDb->name); mLInfo("db:%s, is alterd by %s", pDb->name, mnodeGetUserFromMsg(pMsg)); - balanceAsyncNotify(); + bnNotify(); return TSDB_CODE_SUCCESS; } @@ -1146,7 +1150,5 @@ void mnodeDropAllDbs(SAcctObj *pAcct) { mnodeDecDbRef(pDb); } - sdbFreeIter(pIter); - mInfo("acct:%s, all dbs:%d is dropped from sdb", pAcct->user, numOfDbs); } diff --git a/src/mnode/src/mnodeDnode.c b/src/mnode/src/mnodeDnode.c index f76533c760..0ff50b2307 100644 --- a/src/mnode/src/mnodeDnode.c +++ b/src/mnode/src/mnodeDnode.c @@ -16,12 +16,12 @@ #define _DEFAULT_SOURCE #include "os.h" #include "tgrant.h" -#include "tbalance.h" +#include "tbn.h" #include "tglobal.h" #include "tconfig.h" #include "tutil.h" #include "tsocket.h" -#include "tbalance.h" +#include "tbn.h" #include "tsync.h" #include "tdataformat.h" #include "mnode.h" @@ -39,6 +39,7 @@ #include "mnodeCluster.h" int32_t tsAccessSquence = 0; +int64_t tsDnodeRid = -1; static void * tsDnodeSdb = NULL; static int32_t tsDnodeUpdateSize = 0; extern void * tsMnodeSdb; @@ -114,7 +115,7 @@ static int32_t mnodeDnodeActionDelete(SSdbRow *pRow) { mnodeDropAllDnodeVgroups(pDnode); #endif mnodeDropMnodeLocal(pDnode->dnodeId); - balanceAsyncNotify(); + bnNotify(); mnodeUpdateDnodeEps(); mDebug("dnode:%d, all vgroups is dropped from sdb", pDnode->dnodeId); @@ -187,7 +188,8 @@ int32_t mnodeInitDnodes() { .fpRestored = mnodeDnodeActionRestored }; - tsDnodeSdb = sdbOpenTable(&desc); + tsDnodeRid = sdbOpenTable(&desc); + tsDnodeSdb = sdbGetTableByRid(tsDnodeRid); if (tsDnodeSdb == NULL) { mError("failed to init dnodes data"); return -1; @@ -206,13 +208,14 @@ int32_t mnodeInitDnodes() { mnodeAddShowRetrieveHandle(TSDB_MGMT_TABLE_VNODES, mnodeRetrieveVnodes); mnodeAddShowMetaHandle(TSDB_MGMT_TABLE_DNODE, mnodeGetDnodeMeta); mnodeAddShowRetrieveHandle(TSDB_MGMT_TABLE_DNODE, mnodeRetrieveDnodes); + mnodeAddShowFreeIterHandle(TSDB_MGMT_TABLE_DNODE, mnodeCancelGetNextDnode); mDebug("table:dnodes table is created"); return 0; } void mnodeCleanupDnodes() { - sdbCloseTable(tsDnodeSdb); + sdbCloseTable(tsDnodeRid); pthread_mutex_destroy(&tsDnodeEpsMutex); free(tsDnodeEps); tsDnodeEps = NULL; @@ -223,6 +226,10 @@ void *mnodeGetNextDnode(void *pIter, SDnodeObj **pDnode) { return sdbFetchRow(tsDnodeSdb, pIter, (void **)pDnode); } +void mnodeCancelGetNextDnode(void *pIter) { + sdbFreeIter(tsDnodeSdb, pIter); +} + int32_t mnodeGetDnodesNum() { return sdbGetNumOfRows(tsDnodeSdb); } @@ -241,8 +248,6 @@ int32_t mnodeGetOnlinDnodesCpuCoreNum() { mnodeDecDnodeRef(pDnode); } - sdbFreeIter(pIter); - if (cpuCores < 2) cpuCores = 2; return cpuCores; } @@ -259,8 +264,6 @@ int32_t mnodeGetOnlineDnodesNum() { mnodeDecDnodeRef(pDnode); } - sdbFreeIter(pIter); - return onlineDnodes; } @@ -276,13 +279,12 @@ void *mnodeGetDnodeByEp(char *ep) { pIter = mnodeGetNextDnode(pIter, &pDnode); if (pDnode == NULL) break; if (strcmp(ep, pDnode->dnodeEp) == 0) { - sdbFreeIter(pIter); + mnodeCancelGetNextDnode(pIter); return pDnode; } mnodeDecDnodeRef(pDnode); } - sdbFreeIter(pIter); return NULL; } @@ -345,7 +347,7 @@ static int32_t mnodeProcessCfgDnodeMsg(SMnodeMsg *pMsg) { return TSDB_CODE_MND_INVALID_DNODE_CFG_OPTION; } - int32_t code = balanceAlterDnode(pDnode, vnodeId, dnodeId); + int32_t code = bnAlterDnode(pDnode, vnodeId, dnodeId); mnodeDecDnodeRef(pDnode); return code; } else { @@ -464,7 +466,10 @@ static void mnodeUpdateDnodeEps() { while (1) { pIter = mnodeGetNextDnode(pIter, &pDnode); if (pDnode == NULL) break; - if (dnodesNum >= totalDnodes) break; + if (dnodesNum >= totalDnodes) { + mnodeCancelGetNextDnode(pIter); + break; + } SDnodeEp *pEp = &tsDnodeEps->dnodeEps[dnodesNum]; dnodesNum++; @@ -474,7 +479,6 @@ static void mnodeUpdateDnodeEps() { mnodeDecDnodeRef(pDnode); } - sdbFreeIter(pIter); pthread_mutex_unlock(&tsDnodeEpsMutex); } @@ -587,8 +591,8 @@ static int32_t mnodeProcessDnodeStatusMsg(SMnodeMsg *pMsg) { mInfo("dnode:%d, from offline to online", pDnode->dnodeId); pDnode->status = TAOS_DN_STATUS_READY; pDnode->offlineReason = TAOS_DN_OFF_ONLINE; - balanceSyncNotify(); - balanceAsyncNotify(); + bnCheckModules(); + bnNotify(); } if (openVnodes != pDnode->openVnodes) { @@ -704,7 +708,7 @@ static int32_t mnodeDropDnodeByEp(char *ep, SMnodeMsg *pMsg) { #ifndef _SYNC int32_t code = mnodeDropDnode(pDnode, pMsg); #else - int32_t code = balanceDropDnode(pDnode); + int32_t code = bnDropDnode(pDnode); #endif mnodeDecDnodeRef(pDnode); return code; @@ -1100,7 +1104,7 @@ static int32_t mnodeGetVnodeMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pC pDnode = mnodeGetDnodeByEp(pShow->payload); } else { void *pIter = mnodeGetNextDnode(NULL, (SDnodeObj **)&pDnode); - sdbFreeIter(pIter); + mnodeCancelGetNextDnode(pIter); } if (pDnode != NULL) { @@ -1148,7 +1152,6 @@ static int32_t mnodeRetrieveVnodes(SShowObj *pShow, char *data, int32_t rows, vo mnodeDecVgroupRef(pVgroup); } - sdbFreeIter(pIter); } else { numOfRows = 0; } @@ -1179,12 +1182,12 @@ static char* mnodeGetDnodeAlternativeRoleStr(int32_t alternativeRole) { #ifndef _SYNC -int32_t balanceInit() { return TSDB_CODE_SUCCESS; } -void balanceCleanUp() {} -void balanceAsyncNotify() {} -void balanceSyncNotify() {} -void balanceReset() {} -int32_t balanceAlterDnode(struct SDnodeObj *pDnode, int32_t vnodeId, int32_t dnodeId) { return TSDB_CODE_SYN_NOT_ENABLED; } +int32_t bnInit() { return TSDB_CODE_SUCCESS; } +void bnCleanUp() {} +void bnNotify() {} +void bnCheckModules() {} +void bnReset() {} +int32_t bnAlterDnode(struct SDnodeObj *pDnode, int32_t vnodeId, int32_t dnodeId) { return TSDB_CODE_SYN_NOT_ENABLED; } char* syncRole[] = { "offline", @@ -1194,7 +1197,7 @@ char* syncRole[] = { "master" }; -int32_t balanceAllocVnodes(SVgObj *pVgroup) { +int32_t bnAllocVnodes(SVgObj *pVgroup) { void * pIter = NULL; SDnodeObj *pDnode = NULL; SDnodeObj *pSelDnode = NULL; @@ -1217,8 +1220,6 @@ int32_t balanceAllocVnodes(SVgObj *pVgroup) { mnodeDecDnodeRef(pDnode); } - sdbFreeIter(pIter); - if (pSelDnode == NULL) { mError("failed to alloc vnode to vgroup"); return TSDB_CODE_MND_NO_ENOUGH_DNODES; diff --git a/src/mnode/src/mnodeMain.c b/src/mnode/src/mnodeMain.c index 1f5ad42bde..86f2c821f9 100644 --- a/src/mnode/src/mnodeMain.c +++ b/src/mnode/src/mnodeMain.c @@ -17,7 +17,7 @@ #include "os.h" #include "taosdef.h" #include "tsched.h" -#include "tbalance.h" +#include "tbn.h" #include "tgrant.h" #include "ttimer.h" #include "tglobal.h" @@ -47,6 +47,7 @@ void *tsMnodeTmr = NULL; static bool tsMgmtIsRunning = false; static const SMnodeComponent tsMnodeComponents[] = { + {"sdbref", sdbInitRef, sdbCleanUpRef}, {"profile", mnodeInitProfile, mnodeCleanupProfile}, {"cluster", mnodeInitCluster, mnodeCleanupCluster}, {"accts", mnodeInitAccts, mnodeCleanupAccts}, @@ -57,7 +58,7 @@ static const SMnodeComponent tsMnodeComponents[] = { {"tables", mnodeInitTables, mnodeCleanupTables}, {"mnodes", mnodeInitMnodes, mnodeCleanupMnodes}, {"sdb", sdbInit, sdbCleanUp}, - {"balance", balanceInit, balanceCleanUp}, + {"balance", bnInit, bnCleanUp}, {"grant", grantInit, grantCleanUp}, {"show", mnodeInitShow, mnodeCleanUpShow} }; diff --git a/src/mnode/src/mnodeMnode.c b/src/mnode/src/mnodeMnode.c index 205bfda4b9..d20d51f82b 100644 --- a/src/mnode/src/mnodeMnode.c +++ b/src/mnode/src/mnodeMnode.c @@ -19,7 +19,7 @@ #include "tglobal.h" #include "trpc.h" #include "tsync.h" -#include "tbalance.h" +#include "tbn.h" #include "tutil.h" #include "tsocket.h" #include "tdataformat.h" @@ -34,6 +34,7 @@ #include "mnodeUser.h" #include "mnodeVgroup.h" +int64_t tsMnodeRid = -1; static void * tsMnodeSdb = NULL; static int32_t tsMnodeUpdateSize = 0; static SRpcEpSet tsMnodeEpSetForShell; @@ -123,7 +124,7 @@ static int32_t mnodeMnodeActionRestored() { pMnode->role = TAOS_SYNC_ROLE_MASTER; mnodeDecMnodeRef(pMnode); } - sdbFreeIter(pIter); + mnodeCancelGetNextMnode(pIter); } mnodeUpdateMnodeEpSet(); @@ -153,7 +154,8 @@ int32_t mnodeInitMnodes() { .fpRestored = mnodeMnodeActionRestored }; - tsMnodeSdb = sdbOpenTable(&desc); + tsMnodeRid = sdbOpenTable(&desc); + tsMnodeSdb = sdbGetTableByRid(tsMnodeRid); if (tsMnodeSdb == NULL) { mError("failed to init mnodes data"); return -1; @@ -161,13 +163,14 @@ int32_t mnodeInitMnodes() { mnodeAddShowMetaHandle(TSDB_MGMT_TABLE_MNODE, mnodeGetMnodeMeta); mnodeAddShowRetrieveHandle(TSDB_MGMT_TABLE_MNODE, mnodeRetrieveMnodes); + mnodeAddShowFreeIterHandle(TSDB_MGMT_TABLE_MNODE, mnodeCancelGetNextMnode); mDebug("table:mnodes table is created"); return TSDB_CODE_SUCCESS; } void mnodeCleanupMnodes() { - sdbCloseTable(tsMnodeSdb); + sdbCloseTable(tsMnodeRid); tsMnodeSdb = NULL; mnodeMnodeDestroyLock(); } @@ -192,6 +195,10 @@ void *mnodeGetNextMnode(void *pIter, SMnodeObj **pMnode) { return sdbFetchRow(tsMnodeSdb, pIter, (void **)pMnode); } +void mnodeCancelGetNextMnode(void *pIter) { + sdbFreeIter(tsMnodeSdb, pIter); +} + void mnodeUpdateMnodeEpSet() { mInfo("update mnodes epSet, numOfEps:%d ", mnodeGetMnodesNum()); @@ -239,8 +246,6 @@ void mnodeUpdateMnodeEpSet() { tsMnodeEpSetForShell.numOfEps = index; tsMnodeEpSetForPeer.numOfEps = index; - sdbFreeIter(pIter); - mnodeMnodeUnLock(); } @@ -248,12 +253,30 @@ void mnodeGetMnodeEpSetForPeer(SRpcEpSet *epSet) { mnodeMnodeRdLock(); *epSet = tsMnodeEpSetForPeer; mnodeMnodeUnLock(); + + for (int32_t i = 0; i < epSet->numOfEps; ++i) { + if (strcmp(epSet->fqdn[i], tsLocalFqdn) == 0 && htons(epSet->port[i]) == tsServerPort + TSDB_PORT_DNODEDNODE) { + epSet->inUse = (i + 1) % epSet->numOfEps; + mTrace("mnode:%d, for peer ep:%s:%u, set inUse to %d", i, epSet->fqdn[i], htons(epSet->port[i]), epSet->inUse); + } else { + mTrace("mpeer:%d, for peer ep:%s:%u", i, epSet->fqdn[i], htons(epSet->port[i])); + } + } } void mnodeGetMnodeEpSetForShell(SRpcEpSet *epSet) { mnodeMnodeRdLock(); *epSet = tsMnodeEpSetForShell; mnodeMnodeUnLock(); + + for (int32_t i = 0; i < epSet->numOfEps; ++i) { + if (strcmp(epSet->fqdn[i], tsLocalFqdn) == 0 && htons(epSet->port[i]) == tsServerPort) { + epSet->inUse = (i + 1) % epSet->numOfEps; + mTrace("mnode:%d, for shell ep:%s:%u, set inUse to %d", i, epSet->fqdn[i], htons(epSet->port[i]), epSet->inUse); + } else { + mTrace("mnode:%d, for shell ep:%s:%u", i, epSet->fqdn[i], htons(epSet->port[i])); + } + } } char* mnodeGetMnodeMasterEp() { diff --git a/src/mnode/src/mnodePeer.c b/src/mnode/src/mnodePeer.c index f13ef75398..cfb7b7781b 100644 --- a/src/mnode/src/mnodePeer.c +++ b/src/mnode/src/mnodePeer.c @@ -20,7 +20,7 @@ #include "tsystem.h" #include "tutil.h" #include "tgrant.h" -#include "tbalance.h" +#include "tbn.h" #include "tglobal.h" #include "mnode.h" #include "dnode.h" @@ -58,16 +58,8 @@ int32_t mnodeProcessPeerReq(SMnodeMsg *pMsg) { rpcRsp->rsp = epSet; rpcRsp->len = sizeof(SRpcEpSet); - mDebug("msg:%p, ahandle:%p type:%s in mpeer queue will be redirected, numOfEps:%d inUse:%d", pMsg, - pMsg->rpcMsg.ahandle, taosMsg[pMsg->rpcMsg.msgType], epSet->numOfEps, epSet->inUse); - for (int32_t i = 0; i < epSet->numOfEps; ++i) { - if (strcmp(epSet->fqdn[i], tsLocalFqdn) == 0 && htons(epSet->port[i]) == tsServerPort + TSDB_PORT_DNODEDNODE) { - epSet->inUse = (i + 1) % epSet->numOfEps; - mDebug("mpeer:%d ep:%s:%u, set inUse to %d", i, epSet->fqdn[i], htons(epSet->port[i]), epSet->inUse); - } else { - mDebug("mpeer:%d ep:%s:%u", i, epSet->fqdn[i], htons(epSet->port[i])); - } - } + mDebug("msg:%p, ahandle:%p type:%s in mpeer queue is redirected, numOfEps:%d inUse:%d", pMsg, pMsg->rpcMsg.ahandle, + taosMsg[pMsg->rpcMsg.msgType], epSet->numOfEps, epSet->inUse); return TSDB_CODE_RPC_REDIRECT; } diff --git a/src/mnode/src/mnodeProfile.c b/src/mnode/src/mnodeProfile.c index 12ac64854c..36b6ff7a59 100644 --- a/src/mnode/src/mnodeProfile.c +++ b/src/mnode/src/mnodeProfile.c @@ -34,7 +34,6 @@ #define QUERY_ID_SIZE 20 #define QUERY_STREAM_SAVE_SIZE 20 -extern void *tsMnodeTmr; static SCacheObj *tsMnodeConnCache = NULL; static int32_t tsConnIndex = 0; @@ -42,6 +41,7 @@ static int32_t mnodeGetQueryMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pC static int32_t mnodeRetrieveQueries(SShowObj *pShow, char *data, int32_t rows, void *pConn); static int32_t mnodeGetConnsMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn); static int32_t mnodeRetrieveConns(SShowObj *pShow, char *data, int32_t rows, void *pConn); +static void mnodeCancelGetNextConn(void *pIter); static int32_t mnodeGetStreamMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn); static int32_t mnodeRetrieveStreams(SShowObj *pShow, char *data, int32_t rows, void *pConn); static void mnodeFreeConn(void *data); @@ -52,10 +52,13 @@ static int32_t mnodeProcessKillConnectionMsg(SMnodeMsg *pMsg); int32_t mnodeInitProfile() { mnodeAddShowMetaHandle(TSDB_MGMT_TABLE_QUERIES, mnodeGetQueryMeta); mnodeAddShowRetrieveHandle(TSDB_MGMT_TABLE_QUERIES, mnodeRetrieveQueries); + mnodeAddShowFreeIterHandle(TSDB_MGMT_TABLE_QUERIES, mnodeCancelGetNextConn); mnodeAddShowMetaHandle(TSDB_MGMT_TABLE_CONNS, mnodeGetConnsMeta); mnodeAddShowRetrieveHandle(TSDB_MGMT_TABLE_CONNS, mnodeRetrieveConns); + mnodeAddShowFreeIterHandle(TSDB_MGMT_TABLE_CONNS, mnodeCancelGetNextConn); mnodeAddShowMetaHandle(TSDB_MGMT_TABLE_STREAMS, mnodeGetStreamMeta); mnodeAddShowRetrieveHandle(TSDB_MGMT_TABLE_STREAMS, mnodeRetrieveStreams); + mnodeAddShowFreeIterHandle(TSDB_MGMT_TABLE_STREAMS, mnodeCancelGetNextConn); mnodeAddWriteMsgHandle(TSDB_MSG_TYPE_CM_KILL_QUERY, mnodeProcessKillQueryMsg); mnodeAddWriteMsgHandle(TSDB_MSG_TYPE_CM_KILL_STREAM, mnodeProcessKillStreamMsg); @@ -137,21 +140,15 @@ static void mnodeFreeConn(void *data) { mDebug("connId:%d, is destroyed", pConn->connId); } -static void *mnodeGetNextConn(SHashMutableIterator *pIter, SConnObj **pConn) { +static void *mnodeGetNextConn(void *pIter, SConnObj **pConn) { *pConn = NULL; - if (pIter == NULL) { - pIter = taosHashCreateIter(tsMnodeConnCache->pHashTable); - } + pIter = taosHashIterate(tsMnodeConnCache->pHashTable, pIter); + if (pIter == NULL) return NULL; - if (!taosHashIterNext(pIter)) { - taosHashDestroyIter(pIter); - return NULL; - } - - SCacheDataNode **pNode = taosHashIterGet(pIter); + SCacheDataNode **pNode = pIter; if (pNode == NULL || *pNode == NULL) { - taosHashDestroyIter(pIter); + taosHashCancelIterate(tsMnodeConnCache->pHashTable, pIter); return NULL; } @@ -159,6 +156,10 @@ static void *mnodeGetNextConn(SHashMutableIterator *pIter, SConnObj **pConn) { return pIter; } +static void mnodeCancelGetNextConn(void *pIter) { + taosHashCancelIterate(tsMnodeConnCache->pHashTable, pIter); +} + static int32_t mnodeGetConnsMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn) { SUserObj *pUser = mnodeGetUserFromConn(pConn); if (pUser == NULL) return 0; diff --git a/src/mnode/src/mnodeRead.c b/src/mnode/src/mnodeRead.c index ea7ce783e8..c2a70bc01d 100644 --- a/src/mnode/src/mnodeRead.c +++ b/src/mnode/src/mnodeRead.c @@ -17,7 +17,7 @@ #include "os.h" #include "taosdef.h" #include "tsched.h" -#include "tbalance.h" +#include "tbn.h" #include "tgrant.h" #include "ttimer.h" #include "tglobal.h" @@ -51,21 +51,12 @@ int32_t mnodeProcessRead(SMnodeMsg *pMsg) { SMnodeRsp *rpcRsp = &pMsg->rpcRsp; SRpcEpSet *epSet = rpcMallocCont(sizeof(SRpcEpSet)); mnodeGetMnodeEpSetForShell(epSet); - - mDebug("msg:%p, app:%p type:%s in mread queue will be redirected, numOfEps:%d inUse:%d", pMsg, pMsg->rpcMsg.ahandle, - taosMsg[pMsg->rpcMsg.msgType], epSet->numOfEps, epSet->inUse); - for (int32_t i = 0; i < epSet->numOfEps; ++i) { - if (strcmp(epSet->fqdn[i], tsLocalFqdn) == 0 && htons(epSet->port[i]) == tsServerPort) { - epSet->inUse = (i + 1) % epSet->numOfEps; - mDebug("mnode index:%d ep:%s:%u, set inUse to %d", i, epSet->fqdn[i], htons(epSet->port[i]), epSet->inUse); - } else { - mDebug("mnode index:%d ep:%s:%u", i, epSet->fqdn[i], htons(epSet->port[i])); - } - } - rpcRsp->rsp = epSet; rpcRsp->len = sizeof(SRpcEpSet); + mDebug("msg:%p, app:%p type:%s in mread queue is redirected, numOfEps:%d inUse:%d", pMsg, pMsg->rpcMsg.ahandle, + taosMsg[pMsg->rpcMsg.msgType], epSet->numOfEps, epSet->inUse); + return TSDB_CODE_RPC_REDIRECT; } diff --git a/src/mnode/src/mnodeSdb.c b/src/mnode/src/mnodeSdb.c index e5fda26687..2ef758baf1 100644 --- a/src/mnode/src/mnodeSdb.c +++ b/src/mnode/src/mnodeSdb.c @@ -18,7 +18,8 @@ #include "taoserror.h" #include "hash.h" #include "tutil.h" -#include "tbalance.h" +#include "tref.h" +#include "tbn.h" #include "tqueue.h" #include "twal.h" #include "tsync.h" @@ -98,6 +99,7 @@ typedef struct { SSdbWorker *worker; } SSdbWorkerPool; +int32_t tsSdbRid; extern void * tsMnodeTmr; static void * tsSdbTmr; static SSdbMgmt tsSdbMgmt = {0}; @@ -118,6 +120,7 @@ static void sdbFreeQueue(); static int32_t sdbInsertHash(SSdbTable *pTable, SSdbRow *pRow); static int32_t sdbUpdateHash(SSdbTable *pTable, SSdbRow *pRow); static int32_t sdbDeleteHash(SSdbTable *pTable, SSdbRow *pRow); +static void sdbCloseTableObj(void *handle); int32_t sdbGetId(void *pTable) { return ((SSdbTable *)pTable)->autoIndex; @@ -241,13 +244,23 @@ static void sdbNotifyRole(int32_t vgId, int8_t role) { sdbInfo("vgId:1, mnode role changed from %s to %s", syncRole[tsSdbMgmt.role], syncRole[role]); if (role == TAOS_SYNC_ROLE_MASTER && tsSdbMgmt.role != TAOS_SYNC_ROLE_MASTER) { - balanceReset(); + bnReset(); } tsSdbMgmt.role = role; sdbUpdateMnodeRoles(); } +static int32_t sdbNotifyFileSynced(int32_t vgId, uint64_t fversion) { return 0; } + +static void sdbNotifyFlowCtrl(int32_t vgId, int32_t level) {} + +static int32_t sdbGetSyncVersion(int32_t vgId, uint64_t *fver, uint64_t *vver) { + *fver = 0; + *vver = 0; + return 0; +} + // failed to forward, need revert insert static void sdbHandleFailedConfirm(SSdbRow *pRow) { SWalHead *pHead = pRow->pHead; @@ -325,7 +338,6 @@ void sdbUpdateSync(void *pMnodes) { mnodeDecDnodeRef(pDnode); mnodeDecMnodeRef(pMnode); } - sdbFreeIter(pIter); syncCfg.replica = index; mDebug("vgId:1, mnodes info not input, use infos in sdb, numOfMnodes:%d", syncCfg.replica); } else { @@ -370,11 +382,14 @@ void sdbUpdateSync(void *pMnodes) { syncInfo.version = sdbGetVersion(); syncInfo.syncCfg = syncCfg; sprintf(syncInfo.path, "%s", tsMnodeDir); - syncInfo.getWalInfo = sdbGetWalInfo; syncInfo.getFileInfo = sdbGetFileInfo; + syncInfo.getWalInfo = sdbGetWalInfo; syncInfo.writeToCache = sdbWriteFwdToQueue; syncInfo.confirmForward = sdbConfirmForward; syncInfo.notifyRole = sdbNotifyRole; + syncInfo.notifyFileSynced = sdbNotifyFileSynced; + syncInfo.notifyFlowCtrl = sdbNotifyFlowCtrl; + syncInfo.getVersion = sdbGetSyncVersion; tsSdbMgmt.cfg = syncCfg; if (tsSdbMgmt.sync) { @@ -386,6 +401,17 @@ void sdbUpdateSync(void *pMnodes) { sdbUpdateMnodeRoles(); } +int32_t sdbInitRef() { + tsSdbRid = taosOpenRef(10, sdbCloseTableObj); + if (tsSdbRid <= 0) { + sdbError("failed to init sdb ref"); + return -1; + } + return 0; +} + +void sdbCleanUpRef() { taosCloseRef(tsSdbRid); } + int32_t sdbInit() { pthread_mutex_init(&tsSdbMgmt.mutex, NULL); @@ -424,7 +450,7 @@ void sdbCleanUp() { walClose(tsSdbMgmt.wal); tsSdbMgmt.wal = NULL; } - + pthread_mutex_destroy(&tsSdbMgmt.mutex); } @@ -507,7 +533,7 @@ static int32_t sdbInsertHash(SSdbTable *pTable, SSdbRow *pRow) { atomic_add_fetch_32(&pTable->autoIndex, 1); } - sdbDebug("vgId:1, sdb:%s, insert key:%s to hash, rowSize:%d rows:%" PRId64 ", msg:%p", pTable->name, + sdbTrace("vgId:1, sdb:%s, insert key:%s to hash, rowSize:%d rows:%" PRId64 ", msg:%p", pTable->name, sdbGetRowStr(pTable, pRow->pObj), pRow->rowSize, pTable->numOfRows, pRow->pMsg); int32_t code = (*pTable->fpInsert)(pRow); @@ -543,7 +569,7 @@ static int32_t sdbDeleteHash(SSdbTable *pTable, SSdbRow *pRow) { atomic_sub_fetch_32(&pTable->numOfRows, 1); - sdbDebug("vgId:1, sdb:%s, delete key:%s from hash, numOfRows:%" PRId64 ", msg:%p", pTable->name, + sdbTrace("vgId:1, sdb:%s, delete key:%s from hash, numOfRows:%" PRId64 ", msg:%p", pTable->name, sdbGetRowStr(pTable, pRow->pObj), pTable->numOfRows, pRow->pMsg); sdbDecRef(pTable, pRow->pObj); @@ -552,7 +578,7 @@ static int32_t sdbDeleteHash(SSdbTable *pTable, SSdbRow *pRow) { } static int32_t sdbUpdateHash(SSdbTable *pTable, SSdbRow *pRow) { - sdbDebug("vgId:1, sdb:%s, update key:%s in hash, numOfRows:%" PRId64 ", msg:%p", pTable->name, + sdbTrace("vgId:1, sdb:%s, update key:%s in hash, numOfRows:%" PRId64 ", msg:%p", pTable->name, sdbGetRowStr(pTable, pRow->pObj), pTable->numOfRows, pRow->pMsg); (*pTable->fpUpdate)(pRow); @@ -650,7 +676,7 @@ static int32_t sdbProcessWrite(void *wparam, void *hparam, int32_t qtype, void * return syncCode; } - sdbDebug("vgId:1, sdb:%s, record from wal/fwd is disposed, action:%s key:%s hver:%" PRIu64, pTable->name, + sdbTrace("vgId:1, sdb:%s, record from %s is disposed, action:%s key:%s hver:%" PRIu64, pTable->name, qtypeStr[qtype], actStr[action], sdbGetKeyStr(pTable, pHead->cont), pHead->version); // even it is WAL/FWD, it shall be called to update version in sync @@ -775,24 +801,17 @@ int32_t sdbUpdateRow(SSdbRow *pRow) { } } -void *sdbFetchRow(void *tparam, void *pNode, void **ppRow) { +void *sdbFetchRow(void *tparam, void *pIter, void **ppRow) { SSdbTable *pTable = tparam; *ppRow = NULL; if (pTable == NULL) return NULL; - SHashMutableIterator *pIter = pNode; - if (pIter == NULL) { - pIter = taosHashCreateIter(pTable->iHandle); - } + pIter = taosHashIterate(pTable->iHandle, pIter); + if (pIter == NULL) return NULL; - if (!taosHashIterNext(pIter)) { - taosHashDestroyIter(pIter); - return NULL; - } - - void **ppMetaRow = taosHashIterGet(pIter); + void **ppMetaRow = pIter; if (ppMetaRow == NULL) { - taosHashDestroyIter(pIter); + taosHashCancelIterate(pTable->iHandle, pIter); return NULL; } @@ -802,16 +821,17 @@ void *sdbFetchRow(void *tparam, void *pNode, void **ppRow) { return pIter; } -void sdbFreeIter(void *pIter) { - if (pIter != NULL) { - taosHashDestroyIter(pIter); - } +void sdbFreeIter(void *tparam, void *pIter) { + SSdbTable *pTable = tparam; + if (pTable == NULL || pIter == NULL) return; + + taosHashCancelIterate(pTable->iHandle, pIter); } -void *sdbOpenTable(SSdbTableDesc *pDesc) { +int64_t sdbOpenTable(SSdbTableDesc *pDesc) { SSdbTable *pTable = (SSdbTable *)calloc(1, sizeof(SSdbTable)); - if (pTable == NULL) return NULL; + if (pTable == NULL) return -1; pthread_mutex_init(&pTable->mutex, NULL); tstrncpy(pTable->name, pDesc->name, SDB_TABLE_LEN); @@ -836,19 +856,31 @@ void *sdbOpenTable(SSdbTableDesc *pDesc) { tsSdbMgmt.numOfTables++; tsSdbMgmt.tableList[pTable->id] = pTable; - return pTable; + + return taosAddRef(tsSdbRid, pTable); } -void sdbCloseTable(void *handle) { +void sdbCloseTable(int64_t rid) { + taosRemoveRef(tsSdbRid, rid); +} + +void *sdbGetTableByRid(int64_t rid) { + void *handle = taosAcquireRef(tsSdbRid, rid); + taosReleaseRef(tsSdbRid, rid); + return handle; +} + +static void sdbCloseTableObj(void *handle) { SSdbTable *pTable = (SSdbTable *)handle; if (pTable == NULL) return; tsSdbMgmt.numOfTables--; tsSdbMgmt.tableList[pTable->id] = NULL; - SHashMutableIterator *pIter = taosHashCreateIter(pTable->iHandle); - while (taosHashIterNext(pIter)) { - void **ppRow = taosHashIterGet(pIter); + void *pIter = taosHashIterate(pTable->iHandle, NULL); + while (pIter) { + void **ppRow = pIter; + pIter = taosHashIterate(pTable->iHandle, pIter); if (ppRow == NULL) continue; SSdbRow row = { @@ -859,8 +891,9 @@ void sdbCloseTable(void *handle) { (*pTable->fpDestroy)(&row); } - taosHashDestroyIter(pIter); + taosHashCancelIterate(pTable->iHandle, pIter); taosHashCleanup(pTable->iHandle); + pTable->iHandle = NULL; pthread_mutex_destroy(&pTable->mutex); sdbDebug("vgId:1, sdb:%s, is closed, numOfTables:%d", pTable->name, tsSdbMgmt.numOfTables); diff --git a/src/mnode/src/mnodeShow.c b/src/mnode/src/mnodeShow.c index 0d53fa9617..2da46d5b4b 100644 --- a/src/mnode/src/mnodeShow.c +++ b/src/mnode/src/mnodeShow.c @@ -52,11 +52,11 @@ static bool mnodeCheckShowFinished(SShowObj *pShow); static void *mnodePutShowObj(SShowObj *pShow); static void mnodeReleaseShowObj(SShowObj *pShow, bool forceRemove); -extern void *tsMnodeTmr; static void *tsMnodeShowCache = NULL; static int32_t tsShowObjIndex = 0; static SShowMetaFp tsMnodeShowMetaFp[TSDB_MGMT_TABLE_MAX] = {0}; static SShowRetrieveFp tsMnodeShowRetrieveFp[TSDB_MGMT_TABLE_MAX] = {0}; +static SShowFreeIterFp tsMnodeShowFreeIterFp[TSDB_MGMT_TABLE_MAX] = {0}; int32_t mnodeInitShow() { mnodeAddReadMsgHandle(TSDB_MSG_TYPE_CM_SHOW, mnodeProcessShowMsg); @@ -85,6 +85,10 @@ void mnodeAddShowRetrieveHandle(uint8_t msgType, SShowRetrieveFp fp) { tsMnodeShowRetrieveFp[msgType] = fp; } +void mnodeAddShowFreeIterHandle(uint8_t msgType, SShowFreeIterFp fp) { + tsMnodeShowFreeIterFp[msgType] = fp; +} + static char *mnodeGetShowType(int32_t showType) { switch (showType) { case TSDB_MGMT_TABLE_ACCT: return "show accounts"; @@ -412,7 +416,9 @@ static void* mnodePutShowObj(SShowObj *pShow) { static void mnodeFreeShowObj(void *data) { SShowObj *pShow = *(SShowObj **)data; - sdbFreeIter(pShow->pIter); + if (tsMnodeShowFreeIterFp[pShow->type] != NULL && pShow->pIter != NULL) { + (*tsMnodeShowFreeIterFp[pShow->type])(pShow->pIter); + } mDebug("%p, show is destroyed, data:%p index:%d", pShow, data, pShow->index); tfree(pShow); diff --git a/src/mnode/src/mnodeTable.c b/src/mnode/src/mnodeTable.c index d0a5402986..2149cb12c0 100644 --- a/src/mnode/src/mnodeTable.c +++ b/src/mnode/src/mnodeTable.c @@ -49,7 +49,9 @@ #define CREATE_CTABLE_RETRY_TIMES 10 #define CREATE_CTABLE_RETRY_SEC 14 +int64_t tsCTableRid = -1; static void * tsChildTableSdb; +int64_t tsSTableRid = -1; static void * tsSuperTableSdb; static int32_t tsChildTableUpdateSize; static int32_t tsSuperTableUpdateSize; @@ -342,8 +344,7 @@ static int32_t mnodeChildTableActionRestored() { mnodeDecTableRef(pTable); } - sdbFreeIter(pIter); - + mnodeCancelGetNextChildTable(pIter); return 0; } @@ -351,7 +352,7 @@ static int32_t mnodeInitChildTables() { SCTableObj tObj; tsChildTableUpdateSize = (int8_t *)tObj.updateEnd - (int8_t *)&tObj.info.type; - SSdbTableDesc tableDesc = { + SSdbTableDesc desc = { .id = SDB_TABLE_CTABLE, .name = "ctables", .hashSessions = TSDB_DEFAULT_CTABLES_HASH_SIZE, @@ -367,7 +368,8 @@ static int32_t mnodeInitChildTables() { .fpRestored = mnodeChildTableActionRestored }; - tsChildTableSdb = sdbOpenTable(&tableDesc); + tsCTableRid = sdbOpenTable(&desc); + tsChildTableSdb = sdbGetTableByRid(tsCTableRid); if (tsChildTableSdb == NULL) { mError("failed to init child table data"); return -1; @@ -378,7 +380,7 @@ static int32_t mnodeInitChildTables() { } static void mnodeCleanupChildTables() { - sdbCloseTable(tsChildTableSdb); + sdbCloseTable(tsCTableRid); tsChildTableSdb = NULL; } @@ -544,7 +546,7 @@ static int32_t mnodeInitSuperTables() { SSTableObj tObj; tsSuperTableUpdateSize = (int8_t *)tObj.updateEnd - (int8_t *)&tObj.info.type; - SSdbTableDesc tableDesc = { + SSdbTableDesc desc = { .id = SDB_TABLE_STABLE, .name = "stables", .hashSessions = TSDB_DEFAULT_STABLES_HASH_SIZE, @@ -560,7 +562,8 @@ static int32_t mnodeInitSuperTables() { .fpRestored = mnodeSuperTableActionRestored }; - tsSuperTableSdb = sdbOpenTable(&tableDesc); + tsSTableRid = sdbOpenTable(&desc); + tsSuperTableSdb = sdbGetTableByRid(tsSTableRid); if (tsSuperTableSdb == NULL) { mError("failed to init stables data"); return -1; @@ -571,7 +574,7 @@ static int32_t mnodeInitSuperTables() { } static void mnodeCleanupSuperTables() { - sdbCloseTable(tsSuperTableSdb); + sdbCloseTable(tsSTableRid); tsSuperTableSdb = NULL; } @@ -602,10 +605,13 @@ int32_t mnodeInitTables() { mnodeAddShowMetaHandle(TSDB_MGMT_TABLE_TABLE, mnodeGetShowTableMeta); mnodeAddShowRetrieveHandle(TSDB_MGMT_TABLE_TABLE, mnodeRetrieveShowTables); + mnodeAddShowFreeIterHandle(TSDB_MGMT_TABLE_TABLE, mnodeCancelGetNextChildTable); mnodeAddShowMetaHandle(TSDB_MGMT_TABLE_METRIC, mnodeGetShowSuperTableMeta); mnodeAddShowRetrieveHandle(TSDB_MGMT_TABLE_METRIC, mnodeRetrieveShowSuperTables); + mnodeAddShowFreeIterHandle(TSDB_MGMT_TABLE_METRIC, mnodeCancelGetNextSuperTable); mnodeAddShowMetaHandle(TSDB_MGMT_TABLE_STREAMTABLES, mnodeGetStreamTableMeta); mnodeAddShowRetrieveHandle(TSDB_MGMT_TABLE_STREAMTABLES, mnodeRetrieveStreamTables); + mnodeAddShowFreeIterHandle(TSDB_MGMT_TABLE_STREAMTABLES, mnodeCancelGetNextChildTable); return TSDB_CODE_SUCCESS; } @@ -626,14 +632,12 @@ static void *mnodeGetSuperTableByUid(uint64_t uid) { pIter = mnodeGetNextSuperTable(pIter, &pStable); if (pStable == NULL) break; if (pStable->uid == uid) { - sdbFreeIter(pIter); + mnodeCancelGetNextSuperTable(pIter); return pStable; } mnodeDecTableRef(pStable); } - sdbFreeIter(pIter); - return NULL; } @@ -655,10 +659,18 @@ void *mnodeGetNextChildTable(void *pIter, SCTableObj **pTable) { return sdbFetchRow(tsChildTableSdb, pIter, (void **)pTable); } +void mnodeCancelGetNextChildTable(void *pIter) { + sdbFreeIter(tsChildTableSdb, pIter); +} + void *mnodeGetNextSuperTable(void *pIter, SSTableObj **pTable) { return sdbFetchRow(tsSuperTableSdb, pIter, (void **)pTable); } +void mnodeCancelGetNextSuperTable(void *pIter) { + sdbFreeIter(tsSuperTableSdb, pIter); +} + void mnodeIncTableRef(void *p1) { STableObj *pTable = (STableObj *)p1; if (pTable->type == TSDB_SUPER_TABLE) { @@ -914,10 +926,10 @@ static int32_t mnodeProcessDropSuperTableMsg(SMnodeMsg *pMsg) { SSTableObj *pStable = (SSTableObj *)pMsg->pTable; if (pStable->vgHash != NULL /*pStable->numOfTables != 0*/) { - SHashMutableIterator *pIter = taosHashCreateIter(pStable->vgHash); - while (taosHashIterNext(pIter)) { - int32_t *pVgId = taosHashIterGet(pIter); + int32_t *pVgId = taosHashIterate(pStable->vgHash, NULL); + while (pVgId) { SVgObj *pVgroup = mnodeGetVgroup(*pVgId); + pVgId = taosHashIterate(pStable->vgHash, pVgId); if (pVgroup == NULL) break; SDropSTableMsg *pDrop = rpcMallocCont(sizeof(SDropSTableMsg)); @@ -933,7 +945,8 @@ static int32_t mnodeProcessDropSuperTableMsg(SMnodeMsg *pMsg) { dnodeSendMsgToDnode(&epSet, &rpcMsg); mnodeDecVgroupRef(pVgroup); } - taosHashDestroyIter(pIter); + + taosHashCancelIterate(pStable->vgHash, pVgId); mnodeDropAllChildTablesInStable(pStable); } @@ -1430,8 +1443,6 @@ void mnodeDropAllSuperTables(SDbObj *pDropDb) { mnodeDecTableRef(pTable); } - sdbFreeIter(pIter); - mInfo("db:%s, all super tables:%d is dropped from sdb", pDropDb->name, numOfTables); } @@ -1523,11 +1534,11 @@ static int32_t mnodeProcessSuperTableVgroupMsg(SMnodeMsg *pMsg) { } else { SVgroupsMsg *pVgroupMsg = (SVgroupsMsg *)msg; - SHashMutableIterator *pIter = taosHashCreateIter(pTable->vgHash); - int32_t vgSize = 0; - while (taosHashIterNext(pIter)) { - int32_t *pVgId = taosHashIterGet(pIter); - SVgObj * pVgroup = mnodeGetVgroup(*pVgId); + int32_t *pVgId = taosHashIterate(pTable->vgHash, NULL); + int32_t vgSize = 0; + while (pVgId) { + SVgObj *pVgroup = mnodeGetVgroup(*pVgId); + pVgId = taosHashIterate(pTable->vgHash, pVgId); if (pVgroup == NULL) continue; pVgroupMsg->vgroups[vgSize].vgId = htonl(pVgroup->vgId); @@ -1547,7 +1558,7 @@ static int32_t mnodeProcessSuperTableVgroupMsg(SMnodeMsg *pMsg) { mnodeDecVgroupRef(pVgroup); } - taosHashDestroyIter(pIter); + taosHashCancelIterate(pTable->vgHash, pVgId); mnodeDecTableRef(pTable); pVgroupMsg->numOfVgroups = htonl(vgSize); @@ -2230,8 +2241,6 @@ void mnodeDropAllChildTablesInVgroups(SVgObj *pVgroup) { mnodeDecTableRef(pTable); } - sdbFreeIter(pIter); - mInfo("vgId:%d, all child tables is dropped from sdb", pVgroup->vgId); } @@ -2263,8 +2272,6 @@ void mnodeDropAllChildTables(SDbObj *pDropDb) { mnodeDecTableRef(pTable); } - sdbFreeIter(pIter); - mInfo("db:%s, all child tables:%d is dropped from sdb", pDropDb->name, numOfTables); } @@ -2293,8 +2300,6 @@ static void mnodeDropAllChildTablesInStable(SSTableObj *pStable) { mnodeDecTableRef(pTable); } - sdbFreeIter(pIter); - mInfo("stable:%s, all child tables:%d is dropped from sdb", pStable->info.tableId, numOfTables); } diff --git a/src/mnode/src/mnodeUser.c b/src/mnode/src/mnodeUser.c index dc76d92eb8..fb26086d04 100644 --- a/src/mnode/src/mnodeUser.c +++ b/src/mnode/src/mnodeUser.c @@ -33,6 +33,7 @@ #include "mnodeWrite.h" #include "mnodePeer.h" +int64_t tsUserRid = -1; static void * tsUserSdb = NULL; static int32_t tsUserUpdateSize = 0; static int32_t mnodeGetUserMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn); @@ -123,7 +124,6 @@ static void mnodePrintUserAuth() { } fflush(fp); - sdbFreeIter(pIter); fclose(fp); } @@ -166,7 +166,8 @@ int32_t mnodeInitUsers() { .fpRestored = mnodeUserActionRestored }; - tsUserSdb = sdbOpenTable(&desc); + tsUserRid = sdbOpenTable(&desc); + tsUserSdb = sdbGetTableByRid(tsUserRid); if (tsUserSdb == NULL) { mError("table:%s, failed to create hash", desc.name); return -1; @@ -177,6 +178,8 @@ int32_t mnodeInitUsers() { mnodeAddWriteMsgHandle(TSDB_MSG_TYPE_CM_DROP_USER, mnodeProcessDropUserMsg); mnodeAddShowMetaHandle(TSDB_MGMT_TABLE_USER, mnodeGetUserMeta); mnodeAddShowRetrieveHandle(TSDB_MGMT_TABLE_USER, mnodeRetrieveUsers); + mnodeAddShowFreeIterHandle(TSDB_MGMT_TABLE_USER, mnodeCancelGetNextUser); + mnodeAddPeerMsgHandle(TSDB_MSG_TYPE_DM_AUTH, mnodeProcessAuthMsg); mDebug("table:%s, hash is created", desc.name); @@ -184,7 +187,7 @@ int32_t mnodeInitUsers() { } void mnodeCleanupUsers() { - sdbCloseTable(tsUserSdb); + sdbCloseTable(tsUserRid); tsUserSdb = NULL; } @@ -196,6 +199,10 @@ void *mnodeGetNextUser(void *pIter, SUserObj **pUser) { return sdbFetchRow(tsUserSdb, pIter, (void **)pUser); } +void mnodeCancelGetNextUser(void *pIter) { + sdbFreeIter(tsUserSdb, pIter); +} + void mnodeIncUserRef(SUserObj *pUser) { return sdbIncRef(tsUserSdb, pUser); } @@ -574,8 +581,6 @@ void mnodeDropAllUsers(SAcctObj *pAcct) { mnodeDecUserRef(pUser); } - sdbFreeIter(pIter); - mDebug("acct:%s, all users:%d is dropped from sdb", pAcct->user, numOfUsers); } diff --git a/src/mnode/src/mnodeVgroup.c b/src/mnode/src/mnodeVgroup.c index f9a49e5ec2..d3020de6bd 100644 --- a/src/mnode/src/mnodeVgroup.c +++ b/src/mnode/src/mnodeVgroup.c @@ -20,7 +20,7 @@ #include "tsocket.h" #include "tidpool.h" #include "tsync.h" -#include "tbalance.h" +#include "tbn.h" #include "tglobal.h" #include "tdataformat.h" #include "dnode.h" @@ -51,6 +51,7 @@ char* vgroupStatus[] = { "updating" }; +int64_t tsVgroupRid = -1; static void *tsVgroupSdb = NULL; static int32_t tsVgUpdateSize = 0; @@ -222,7 +223,8 @@ int32_t mnodeInitVgroups() { .fpRestored = mnodeVgroupActionRestored, }; - tsVgroupSdb = sdbOpenTable(&desc); + tsVgroupRid = sdbOpenTable(&desc); + tsVgroupSdb = sdbGetTableByRid(tsVgroupRid); if (tsVgroupSdb == NULL) { mError("failed to init vgroups data"); return -1; @@ -230,6 +232,7 @@ int32_t mnodeInitVgroups() { mnodeAddShowMetaHandle(TSDB_MGMT_TABLE_VGROUP, mnodeGetVgroupMeta); mnodeAddShowRetrieveHandle(TSDB_MGMT_TABLE_VGROUP, mnodeRetrieveVgroups); + mnodeAddShowFreeIterHandle(TSDB_MGMT_TABLE_VGROUP, mnodeCancelGetNextVgroup); mnodeAddPeerRspHandle(TSDB_MSG_TYPE_MD_CREATE_VNODE_RSP, mnodeProcessCreateVnodeRsp); mnodeAddPeerRspHandle(TSDB_MSG_TYPE_MD_ALTER_VNODE_RSP, mnodeProcessAlterVnodeRsp); mnodeAddPeerRspHandle(TSDB_MSG_TYPE_MD_DROP_VNODE_RSP, mnodeProcessDropVnodeRsp); @@ -304,7 +307,7 @@ void mnodeCheckUnCreatedVgroup(SDnodeObj *pDnode, SVnodeLoad *pVloads, int32_t o mnodeDecVgroupRef(pVgroup); } - sdbFreeIter(pIter); + mnodeCancelGetNextVgroup(pIter); } void mnodeUpdateVgroupStatus(SVgObj *pVgroup, SDnodeObj *pDnode, SVnodeLoad *pVload) { @@ -491,6 +494,10 @@ void *mnodeGetNextVgroup(void *pIter, SVgObj **pVgroup) { return sdbFetchRow(tsVgroupSdb, pIter, (void **)pVgroup); } +void mnodeCancelGetNextVgroup(void *pIter) { + sdbFreeIter(tsVgroupSdb, pIter); +} + static int32_t mnodeCreateVgroupFp(SMnodeMsg *pMsg) { SVgObj *pVgroup = pMsg->pVgroup; SDbObj *pDb = pMsg->pDb; @@ -556,7 +563,7 @@ int32_t mnodeCreateVgroup(SMnodeMsg *pMsg) { pVgroup->numOfVnodes = pDb->cfg.replications; pVgroup->createdTime = taosGetTimestampMs(); pVgroup->accessState = TSDB_VN_ALL_ACCCESS; - int32_t code = balanceAllocVnodes(pVgroup); + int32_t code = bnAllocVnodes(pVgroup); if (code != TSDB_CODE_SUCCESS) { mError("db:%s, no enough dnode to alloc %d vnodes to vgroup, reason:%s", pDb->name, pVgroup->numOfVnodes, tstrerror(code)); @@ -605,7 +612,7 @@ void mnodeDropVgroup(SVgObj *pVgroup, void *ahandle) { } void mnodeCleanupVgroups() { - sdbCloseTable(tsVgroupSdb); + sdbCloseTable(tsVgroupRid); tsVgroupSdb = NULL; } @@ -1095,8 +1102,6 @@ void mnodeDropAllDnodeVgroups(SDnodeObj *pDropDnode) { mnodeDecVgroupRef(pVgroup); } - sdbFreeIter(pIter); - mInfo("dnode:%d, all vgroups:%d is dropped from sdb", pDropDnode->dnodeId, numOfVgroups); } @@ -1118,8 +1123,6 @@ void mnodeUpdateAllDbVgroups(SDbObj *pAlterDb) { mnodeDecVgroupRef(pVgroup); } - sdbFreeIter(pIter); - mInfo("db:%s, all vgroups is updated in sdb", pAlterDb->name); } #endif @@ -1147,8 +1150,6 @@ void mnodeDropAllDbVgroups(SDbObj *pDropDb) { mnodeDecVgroupRef(pVgroup); } - sdbFreeIter(pIter); - mInfo("db:%s, all vgroups:%d is dropped from sdb", pDropDb->name, numOfVgroups); } @@ -1170,7 +1171,5 @@ void mnodeSendDropAllDbVgroupsMsg(SDbObj *pDropDb) { numOfVgroups++; } - sdbFreeIter(pIter); - mInfo("db:%s, all vgroups:%d drop msg is sent to dnode", pDropDb->name, numOfVgroups); } diff --git a/src/mnode/src/mnodeWrite.c b/src/mnode/src/mnodeWrite.c index 8893316ffc..53981238a7 100644 --- a/src/mnode/src/mnodeWrite.c +++ b/src/mnode/src/mnodeWrite.c @@ -17,7 +17,7 @@ #include "os.h" #include "taosdef.h" #include "tsched.h" -#include "tbalance.h" +#include "tbn.h" #include "tgrant.h" #include "tglobal.h" #include "trpc.h" @@ -54,18 +54,8 @@ int32_t mnodeProcessWrite(SMnodeMsg *pMsg) { rpcRsp->rsp = epSet; rpcRsp->len = sizeof(SRpcEpSet); - mDebug("msg:%p, app:%p type:%s in write queue, will be redirected, numOfEps:%d inUse:%d", pMsg, pMsg->rpcMsg.ahandle, - taosMsg[pMsg->rpcMsg.msgType], epSet->numOfEps, epSet->inUse); - for (int32_t i = 0; i < epSet->numOfEps; ++i) { - if (strcmp(epSet->fqdn[i], tsLocalFqdn) == 0 && htons(epSet->port[i]) == tsServerPort) { - epSet->inUse = (i + 1) % epSet->numOfEps; - mDebug("msg:%p, app:%p mnode index:%d ep:%s:%d, set inUse to %d", pMsg, pMsg->rpcMsg.ahandle, i, epSet->fqdn[i], - htons(epSet->port[i]), epSet->inUse); - } else { - mDebug("msg:%p, app:%p mnode index:%d ep:%s:%d", pMsg, pMsg->rpcMsg.ahandle, i, epSet->fqdn[i], - htons(epSet->port[i])); - } - } + mDebug("msg:%p, app:%p type:%s in write queue, is redirected, numOfEps:%d inUse:%d", pMsg, + pMsg->rpcMsg.ahandle, taosMsg[pMsg->rpcMsg.msgType], epSet->numOfEps, epSet->inUse); return TSDB_CODE_RPC_REDIRECT; } diff --git a/src/os/inc/osFile.h b/src/os/inc/osFile.h index 62e44d8eb0..c9b3b9cd76 100644 --- a/src/os/inc/osFile.h +++ b/src/os/inc/osFile.h @@ -20,17 +20,6 @@ extern "C" { #endif -#define tread(fd, buf, count) read(fd, buf, count) -#define twrite(fd, buf, count) write(fd, buf, count) -#define tlseek(fd, offset, whence) lseek(fd, offset, whence) -#define tclose(fd) \ - { \ - if (FD_VALID(fd)) { \ - close(fd); \ - fd = FD_INITIALIZER; \ - } \ - } - int64_t taosReadImp(int32_t fd, void *buf, int64_t count); int64_t taosWriteImp(int32_t fd, void *buf, int64_t count); int64_t taosLSeekImp(int32_t fd, int64_t offset, int32_t whence); @@ -39,7 +28,13 @@ int32_t taosRenameFile(char *fullPath, char *suffix, char delimiter, char **dstP #define taosRead(fd, buf, count) taosReadImp(fd, buf, count) #define taosWrite(fd, buf, count) taosWriteImp(fd, buf, count) #define taosLSeek(fd, offset, whence) taosLSeekImp(fd, offset, whence) -#define taosClose(x) tclose(x) +#define taosClose(fd) \ + { \ + if (FD_VALID(fd)) { \ + close(fd); \ + fd = FD_INITIALIZER; \ + } \ + } // TAOS_OS_FUNC_FILE_SENDIFLE int64_t taosSendFile(int32_t dfd, int32_t sfd, int64_t *offset, int64_t size); diff --git a/src/os/src/detail/osFile.c b/src/os/src/detail/osFile.c index 23fc88b8e1..2e6886aa21 100644 --- a/src/os/src/detail/osFile.c +++ b/src/os/src/detail/osFile.c @@ -116,7 +116,7 @@ int64_t taosWriteImp(int32_t fd, void *buf, int64_t n) { } int64_t taosLSeekImp(int32_t fd, int64_t offset, int32_t whence) { - return (int64_t)tlseek(fd, (long)offset, whence); + return (int64_t)lseek(fd, (long)offset, whence); } #ifndef TAOS_OS_FUNC_FILE_SENDIFLE diff --git a/src/plugins/monitor/src/monitorMain.c b/src/plugins/monitor/src/monMain.c similarity index 70% rename from src/plugins/monitor/src/monitorMain.c rename to src/plugins/monitor/src/monMain.c index 24998b54cd..9443b1ce12 100644 --- a/src/plugins/monitor/src/monitorMain.c +++ b/src/plugins/monitor/src/monMain.c @@ -27,12 +27,12 @@ #include "monitor.h" #include "taoserror.h" -#define mnFatal(...) { if (monitorDebugFlag & DEBUG_FATAL) { taosPrintLog("MON FATAL ", 255, __VA_ARGS__); }} -#define mnError(...) { if (monitorDebugFlag & DEBUG_ERROR) { taosPrintLog("MON ERROR ", 255, __VA_ARGS__); }} -#define mnWarn(...) { if (monitorDebugFlag & DEBUG_WARN) { taosPrintLog("MON WARN ", 255, __VA_ARGS__); }} -#define mnInfo(...) { if (monitorDebugFlag & DEBUG_INFO) { taosPrintLog("MON ", 255, __VA_ARGS__); }} -#define mnDebug(...) { if (monitorDebugFlag & DEBUG_DEBUG) { taosPrintLog("MON ", monitorDebugFlag, __VA_ARGS__); }} -#define mnTrace(...) { if (monitorDebugFlag & DEBUG_TRACE) { taosPrintLog("MON ", monitorDebugFlag, __VA_ARGS__); }} +#define monFatal(...) { if (monDebugFlag & DEBUG_FATAL) { taosPrintLog("MON FATAL ", 255, __VA_ARGS__); }} +#define monError(...) { if (monDebugFlag & DEBUG_ERROR) { taosPrintLog("MON ERROR ", 255, __VA_ARGS__); }} +#define monWarn(...) { if (monDebugFlag & DEBUG_WARN) { taosPrintLog("MON WARN ", 255, __VA_ARGS__); }} +#define monInfo(...) { if (monDebugFlag & DEBUG_INFO) { taosPrintLog("MON ", 255, __VA_ARGS__); }} +#define monDebug(...) { if (monDebugFlag & DEBUG_DEBUG) { taosPrintLog("MON ", monDebugFlag, __VA_ARGS__); }} +#define monTrace(...) { if (monDebugFlag & DEBUG_TRACE) { taosPrintLog("MON ", monDebugFlag, __VA_ARGS__); }} #define SQL_LENGTH 1030 #define LOG_LEN_STR 100 @@ -48,12 +48,12 @@ typedef enum { MON_CMD_CREATE_TB_ACCT_ROOT, MON_CMD_CREATE_TB_SLOWQUERY, MON_CMD_MAX -} EMonitorCommand; +} EMonCmd; typedef enum { MON_STATE_NOT_INIT, MON_STATE_INITED -} EMonitorState; +} EMonState; typedef struct { pthread_t thread; @@ -64,17 +64,17 @@ typedef struct { int8_t start; // enable/disable by mnode int8_t quiting; // taosd is quiting char sql[SQL_LENGTH + 1]; -} SMonitorConn; +} SMonConn; -static SMonitorConn tsMonitor = {0}; -static void monitorSaveSystemInfo(); -static void *monitorThreadFunc(void *param); -static void monitorBuildMonitorSql(char *sql, int32_t cmd); -extern int32_t (*monitorStartSystemFp)(); -extern void (*monitorStopSystemFp)(); -extern void (*monitorExecuteSQLFp)(char *sql); +static SMonConn tsMonitor = {0}; +static void monSaveSystemInfo(); +static void *monThreadFunc(void *param); +static void monBuildMonitorSql(char *sql, int32_t cmd); +extern int32_t (*monStartSystemFp)(); +extern void (*monStopSystemFp)(); +extern void (*monExecuteSQLFp)(char *sql); -int32_t monitorInitSystem() { +int32_t monInitSystem() { if (tsMonitor.ep[0] == 0) { strcpy(tsMonitor.ep, tsLocalEp); } @@ -90,29 +90,29 @@ int32_t monitorInitSystem() { pthread_attr_init(&thAttr); pthread_attr_setdetachstate(&thAttr, PTHREAD_CREATE_JOINABLE); - if (pthread_create(&tsMonitor.thread, &thAttr, monitorThreadFunc, NULL)) { - mnError("failed to create thread to for monitor module, reason:%s", strerror(errno)); + if (pthread_create(&tsMonitor.thread, &thAttr, monThreadFunc, NULL)) { + monError("failed to create thread to for monitor module, reason:%s", strerror(errno)); return -1; } pthread_attr_destroy(&thAttr); - mnDebug("monitor thread is launched"); + monDebug("monitor thread is launched"); - monitorStartSystemFp = monitorStartSystem; - monitorStopSystemFp = monitorStopSystem; + monStartSystemFp = monStartSystem; + monStopSystemFp = monStopSystem; return 0; } -int32_t monitorStartSystem() { +int32_t monStartSystem() { taos_init(); tsMonitor.start = 1; - monitorExecuteSQLFp = monitorExecuteSQL; - mnInfo("monitor module start"); + monExecuteSQLFp = monExecuteSQL; + monInfo("monitor module start"); return 0; } -static void *monitorThreadFunc(void *param) { - mnDebug("starting to initialize monitor module ..."); +static void *monThreadFunc(void *param) { + monDebug("starting to initialize monitor module ..."); while (1) { static int32_t accessTimes = 0; @@ -121,7 +121,7 @@ static void *monitorThreadFunc(void *param) { if (tsMonitor.quiting) { tsMonitor.state = MON_STATE_NOT_INIT; - mnInfo("monitor thread will quit, for taosd is quiting"); + monInfo("monitor thread will quit, for taosd is quiting"); break; } else { taosGetDisk(); @@ -132,7 +132,7 @@ static void *monitorThreadFunc(void *param) { } if (dnodeGetDnodeId() <= 0) { - mnDebug("dnode not initialized, waiting for 3000 ms to start monitor module"); + monDebug("dnode not initialized, waiting for 3000 ms to start monitor module"); continue; } @@ -140,10 +140,10 @@ static void *monitorThreadFunc(void *param) { tsMonitor.state = MON_STATE_NOT_INIT; tsMonitor.conn = taos_connect(NULL, "monitor", tsInternalPass, "", 0); if (tsMonitor.conn == NULL) { - mnError("failed to connect to database, reason:%s", tstrerror(terrno)); + monError("failed to connect to database, reason:%s", tstrerror(terrno)); continue; } else { - mnDebug("connect to database success"); + monDebug("connect to database success"); } } @@ -151,16 +151,16 @@ static void *monitorThreadFunc(void *param) { int code = 0; for (; tsMonitor.cmdIndex < MON_CMD_MAX; ++tsMonitor.cmdIndex) { - monitorBuildMonitorSql(tsMonitor.sql, tsMonitor.cmdIndex); + monBuildMonitorSql(tsMonitor.sql, tsMonitor.cmdIndex); void *res = taos_query(tsMonitor.conn, tsMonitor.sql); code = taos_errno(res); taos_free_result(res); if (code != 0) { - mnError("failed to exec sql:%s, reason:%s", tsMonitor.sql, tstrerror(code)); + monError("failed to exec sql:%s, reason:%s", tsMonitor.sql, tstrerror(code)); break; } else { - mnDebug("successfully to exec sql:%s", tsMonitor.sql); + monDebug("successfully to exec sql:%s", tsMonitor.sql); } } @@ -171,16 +171,16 @@ static void *monitorThreadFunc(void *param) { if (tsMonitor.state == MON_STATE_INITED) { if (accessTimes % tsMonitorInterval == 0) { - monitorSaveSystemInfo(); + monSaveSystemInfo(); } } } - mnInfo("monitor thread is stopped"); + monInfo("monitor thread is stopped"); return NULL; } -static void monitorBuildMonitorSql(char *sql, int32_t cmd) { +static void monBuildMonitorSql(char *sql, int32_t cmd) { memset(sql, 0, SQL_LENGTH); if (cmd == MON_CMD_CREATE_DB) { @@ -236,47 +236,47 @@ static void monitorBuildMonitorSql(char *sql, int32_t cmd) { sql[SQL_LENGTH] = 0; } -void monitorStopSystem() { +void monStopSystem() { tsMonitor.start = 0; tsMonitor.state = MON_STATE_NOT_INIT; - monitorExecuteSQLFp = NULL; - mnInfo("monitor module stopped"); + monExecuteSQLFp = NULL; + monInfo("monitor module stopped"); } -void monitorCleanUpSystem() { +void monCleanupSystem() { tsMonitor.quiting = 1; - monitorStopSystem(); + monStopSystem(); pthread_join(tsMonitor.thread, NULL); if (tsMonitor.conn != NULL) { taos_close(tsMonitor.conn); tsMonitor.conn = NULL; } - mnInfo("monitor module is cleaned up"); + monInfo("monitor module is cleaned up"); } // unit is MB -static int32_t monitorBuildMemorySql(char *sql) { +static int32_t monBuildMemorySql(char *sql) { float sysMemoryUsedMB = 0; bool suc = taosGetSysMemory(&sysMemoryUsedMB); if (!suc) { - mnDebug("failed to get sys memory info"); + monDebug("failed to get sys memory info"); } float procMemoryUsedMB = 0; suc = taosGetProcMemory(&procMemoryUsedMB); if (!suc) { - mnDebug("failed to get proc memory info"); + monDebug("failed to get proc memory info"); } return sprintf(sql, ", %f, %f, %d", procMemoryUsedMB, sysMemoryUsedMB, tsTotalMemoryMB); } // unit is % -static int32_t monitorBuildCpuSql(char *sql) { +static int32_t monBuildCpuSql(char *sql) { float sysCpuUsage = 0, procCpuUsage = 0; bool suc = taosGetCpuUsage(&sysCpuUsage, &procCpuUsage); if (!suc) { - mnDebug("failed to get cpu usage"); + monDebug("failed to get cpu usage"); } if (sysCpuUsage <= procCpuUsage) { @@ -287,72 +287,72 @@ static int32_t monitorBuildCpuSql(char *sql) { } // unit is GB -static int32_t monitorBuildDiskSql(char *sql) { +static int32_t monBuildDiskSql(char *sql) { return sprintf(sql, ", %f, %d", (tsTotalDataDirGB - tsAvailDataDirGB), (int32_t)tsTotalDataDirGB); } // unit is Kb -static int32_t monitorBuildBandSql(char *sql) { +static int32_t monBuildBandSql(char *sql) { float bandSpeedKb = 0; bool suc = taosGetBandSpeed(&bandSpeedKb); if (!suc) { - mnDebug("failed to get bandwidth speed"); + monDebug("failed to get bandwidth speed"); } return sprintf(sql, ", %f", bandSpeedKb); } -static int32_t monitorBuildReqSql(char *sql) { +static int32_t monBuildReqSql(char *sql) { SStatisInfo info = dnodeGetStatisInfo(); return sprintf(sql, ", %d, %d, %d)", info.httpReqNum, info.queryReqNum, info.submitReqNum); } -static int32_t monitorBuildIoSql(char *sql) { +static int32_t monBuildIoSql(char *sql) { float readKB = 0, writeKB = 0; bool suc = taosGetProcIO(&readKB, &writeKB); if (!suc) { - mnDebug("failed to get io info"); + monDebug("failed to get io info"); } return sprintf(sql, ", %f, %f", readKB, writeKB); } -static void monitorSaveSystemInfo() { +static void monSaveSystemInfo() { int64_t ts = taosGetTimestampUs(); char * sql = tsMonitor.sql; int32_t pos = snprintf(sql, SQL_LENGTH, "insert into %s.dn%d values(%" PRId64, tsMonitorDbName, dnodeGetDnodeId(), ts); - pos += monitorBuildCpuSql(sql + pos); - pos += monitorBuildMemorySql(sql + pos); - pos += monitorBuildDiskSql(sql + pos); - pos += monitorBuildBandSql(sql + pos); - pos += monitorBuildIoSql(sql + pos); - pos += monitorBuildReqSql(sql + pos); + pos += monBuildCpuSql(sql + pos); + pos += monBuildMemorySql(sql + pos); + pos += monBuildDiskSql(sql + pos); + pos += monBuildBandSql(sql + pos); + pos += monBuildIoSql(sql + pos); + pos += monBuildReqSql(sql + pos); void *res = taos_query(tsMonitor.conn, tsMonitor.sql); int code = taos_errno(res); taos_free_result(res); if (code != 0) { - mnError("failed to save system info, reason:%s, sql:%s", tstrerror(code), tsMonitor.sql); + monError("failed to save system info, reason:%s, sql:%s", tstrerror(code), tsMonitor.sql); } else { - mnDebug("successfully to save system info, sql:%s", tsMonitor.sql); + monDebug("successfully to save system info, sql:%s", tsMonitor.sql); } } -static void montiorExecSqlCb(void *param, TAOS_RES *result, int32_t code) { +static void monExecSqlCb(void *param, TAOS_RES *result, int32_t code) { int32_t c = taos_errno(result); if (c != TSDB_CODE_SUCCESS) { - mnError("save %s failed, reason:%s", (char *)param, tstrerror(c)); + monError("save %s failed, reason:%s", (char *)param, tstrerror(c)); } else { int32_t rows = taos_affected_rows(result); - mnDebug("save %s succ, rows:%d", (char *)param, rows); + monDebug("save %s succ, rows:%d", (char *)param, rows); } taos_free_result(result); } -void monitorSaveAcctLog(SAcctMonitorObj *pMon) { +void monSaveAcctLog(SAcctMonitorObj *pMon) { if (tsMonitor.state != MON_STATE_INITED) return; char sql[1024] = {0}; @@ -382,11 +382,11 @@ void monitorSaveAcctLog(SAcctMonitorObj *pMon) { pMon->totalConns, pMon->maxConns, pMon->accessState); - mnDebug("save account info, sql:%s", sql); - taos_query_a(tsMonitor.conn, sql, montiorExecSqlCb, "account info"); + monDebug("save account info, sql:%s", sql); + taos_query_a(tsMonitor.conn, sql, monExecSqlCb, "account info"); } -void monitorSaveLog(int32_t level, const char *const format, ...) { +void monSaveLog(int32_t level, const char *const format, ...) { if (tsMonitor.state != MON_STATE_INITED) return; va_list argpointer; @@ -403,13 +403,13 @@ void monitorSaveLog(int32_t level, const char *const format, ...) { len += sprintf(sql + len, "', '%s')", tsLocalEp); sql[len++] = 0; - mnDebug("save log, sql: %s", sql); - taos_query_a(tsMonitor.conn, sql, montiorExecSqlCb, "log"); + monDebug("save log, sql: %s", sql); + taos_query_a(tsMonitor.conn, sql, monExecSqlCb, "log"); } -void monitorExecuteSQL(char *sql) { +void monExecuteSQL(char *sql) { if (tsMonitor.state != MON_STATE_INITED) return; - mnDebug("execute sql:%s", sql); - taos_query_a(tsMonitor.conn, sql, montiorExecSqlCb, "sql"); + monDebug("execute sql:%s", sql); + taos_query_a(tsMonitor.conn, sql, monExecSqlCb, "sql"); } diff --git a/src/query/inc/qExecutor.h b/src/query/inc/qExecutor.h index 895b414a56..b73f7ce3f5 100644 --- a/src/query/inc/qExecutor.h +++ b/src/query/inc/qExecutor.h @@ -63,9 +63,11 @@ typedef struct SSqlGroupbyExpr { typedef struct SResultRow { int32_t pageId; // pageId & rowId is the position of current result in disk-based output buffer - int32_t rowId:15; - bool closed:1; // this result status: closed or opened - uint16_t numOfRows; // number of rows of current time window + int32_t rowId:29; // row index in buffer page + bool startInterp; // the time window start timestamp has done the interpolation already. + bool endInterp; // the time window end timestamp has done the interpolation already. + bool closed; // this result status: closed or opened + uint32_t numOfRows; // number of rows of current time window SResultRowCellInfo* pCellInfo; // For each result column, there is a resultInfo union {STimeWindow win; char* key;}; // start key of current time window } SResultRow; @@ -81,16 +83,15 @@ typedef struct SResultRec { int32_t threshold; // result size threshold in rows. } SResultRec; -typedef struct SWindowResInfo { - SResultRow** pResult; // result list - int16_t type:8; // data type for hash key - int32_t size:24; // number of result set - int32_t threshold; // threshold to halt query and return the generated results. - int32_t capacity; // max capacity - int32_t curIndex; // current start active index - int64_t startTime; // start time of the first time window for sliding query - int64_t prevSKey; // previous (not completed) sliding window start key -} SWindowResInfo; +typedef struct SResultRowInfo { + SResultRow** pResult; // result list + int16_t type:8; // data type for hash key + int32_t size:24; // number of result set + int32_t capacity; // max capacity + int32_t curIndex; // current start active index + int64_t startTime; // start time of the first time window for sliding query + int64_t prevSKey; // previous (not completed) sliding window start key +} SResultRowInfo; typedef struct SColumnFilterElem { int16_t bytes; // column length @@ -113,7 +114,7 @@ typedef struct STableQueryInfo { STimeWindow win; STSCursor cur; void* pTable; // for retrieve the page id list - SWindowResInfo windowResInfo; + SResultRowInfo windowResInfo; } STableQueryInfo; typedef struct SQueryCostInfo { @@ -177,7 +178,7 @@ typedef struct SQueryRuntimeEnv { uint16_t* offset; uint16_t scanFlag; // denotes reversed scan of data or not SFillInfo* pFillInfo; - SWindowResInfo windowResInfo; + SResultRowInfo windowResInfo; STSBuf* pTSBuf; STSCursor cur; SQueryCostInfo summary; @@ -187,6 +188,8 @@ typedef struct SQueryRuntimeEnv { bool topBotQuery; // false bool groupbyNormalCol; // denote if this is a groupby normal column query bool hasTagResults; // if there are tag values in final result or not + bool timeWindowInterpo;// if the time window start/end required interpolation + bool queryWindowIdentical; // all query time windows are identical for all tables in one group int32_t interBufSize; // intermediate buffer sizse int32_t prevGroupId; // previous executed group id SDiskbasedResultBuf* pResultBuf; // query result buffer based on blocked-wised disk file @@ -195,6 +198,8 @@ typedef struct SQueryRuntimeEnv { SResultRowPool* pool; // window result object pool int32_t* rowCellInfoOffset;// offset value for each row result cell info + char** prevRow; + char** nextRow; } SQueryRuntimeEnv; enum { @@ -212,7 +217,8 @@ typedef struct SQInfo { STableGroupInfo tableGroupInfo; // table list SArray STableGroupInfo tableqinfoGroupInfo; // this is a group array list, including SArray structure SQueryRuntimeEnv runtimeEnv; - SArray* arrTableIdInfo; +// SArray* arrTableIdInfo; + SHashObj* arrTableIdInfo; int32_t groupIndex; /* diff --git a/src/query/inc/qUtil.h b/src/query/inc/qUtil.h index 8b84ac0182..fb71c8a5fe 100644 --- a/src/query/inc/qUtil.h +++ b/src/query/inc/qUtil.h @@ -30,19 +30,19 @@ void clearResultRow(SQueryRuntimeEnv* pRuntimeEnv, SResultRow* pRow, int16_t typ void copyResultRow(SQueryRuntimeEnv* pRuntimeEnv, SResultRow* dst, const SResultRow* src, int16_t type); SResultRowCellInfo* getResultCell(SQueryRuntimeEnv* pRuntimeEnv, const SResultRow* pRow, int32_t index); -int32_t initWindowResInfo(SWindowResInfo* pWindowResInfo, int32_t size, int32_t threshold, int16_t type); +int32_t initWindowResInfo(SResultRowInfo* pWindowResInfo, int32_t size, int16_t type); -void cleanupTimeWindowInfo(SWindowResInfo* pWindowResInfo); -void resetTimeWindowInfo(SQueryRuntimeEnv* pRuntimeEnv, SWindowResInfo* pWindowResInfo); +void cleanupTimeWindowInfo(SResultRowInfo* pWindowResInfo); +void resetTimeWindowInfo(SQueryRuntimeEnv* pRuntimeEnv, SResultRowInfo* pWindowResInfo); void clearFirstNWindowRes(SQueryRuntimeEnv *pRuntimeEnv, int32_t num); void clearClosedTimeWindow(SQueryRuntimeEnv* pRuntimeEnv); -int32_t numOfClosedTimeWindow(SWindowResInfo* pWindowResInfo); -void closeTimeWindow(SWindowResInfo* pWindowResInfo, int32_t slot); -void closeAllTimeWindow(SWindowResInfo* pWindowResInfo); -void removeRedundantWindow(SWindowResInfo *pWindowResInfo, TSKEY lastKey, int32_t order); +int32_t numOfClosedTimeWindow(SResultRowInfo* pWindowResInfo); +void closeTimeWindow(SResultRowInfo* pWindowResInfo, int32_t slot); +void closeAllTimeWindow(SResultRowInfo* pWindowResInfo); +void removeRedundantWindow(SResultRowInfo *pWindowResInfo, TSKEY lastKey, int32_t order); -static FORCE_INLINE SResultRow *getResultRow(SWindowResInfo *pWindowResInfo, int32_t slot) { +static FORCE_INLINE SResultRow *getResultRow(SResultRowInfo *pWindowResInfo, int32_t slot) { assert(pWindowResInfo != NULL && slot >= 0 && slot < pWindowResInfo->size); return pWindowResInfo->pResult[slot]; } @@ -50,7 +50,7 @@ static FORCE_INLINE SResultRow *getResultRow(SWindowResInfo *pWindowResInfo, int #define curTimeWindowIndex(_winres) ((_winres)->curIndex) #define GET_ROW_PARAM_FOR_MULTIOUTPUT(_q, tbq, sq) (((tbq) && (!sq))? (_q)->pExpr1[1].base.arg->argValue.i64:1) -bool isWindowResClosed(SWindowResInfo *pWindowResInfo, int32_t slot); +bool isWindowResClosed(SResultRowInfo *pWindowResInfo, int32_t slot); int32_t initResultRow(SResultRow *pResultRow); diff --git a/src/query/inc/tsqlfunction.h b/src/query/inc/tsqlfunction.h index 32cbb56c62..5a923db52c 100644 --- a/src/query/inc/tsqlfunction.h +++ b/src/query/inc/tsqlfunction.h @@ -152,6 +152,11 @@ typedef struct SResultRowCellInfo { uint32_t numOfRes; // num of output result in current buffer } SResultRowCellInfo; +typedef struct SPoint1 { + int64_t key; + double val; +} SPoint1; + #define GET_ROWCELL_INTERBUF(_c) ((void*) ((char*)(_c) + sizeof(SResultRowCellInfo))) struct SQLFunctionCtx; @@ -194,6 +199,8 @@ typedef struct SQLFunctionCtx { SResultRowCellInfo *resultInfo; SExtTagsInfo tagInfo; + SPoint1 start; + SPoint1 end; } SQLFunctionCtx; typedef struct SQLAggFuncElem { @@ -243,21 +250,11 @@ enum { }; typedef struct STwaInfo { - TSKEY lastKey; - int8_t hasResult; // flag to denote has value - int16_t type; // source data type - TSKEY SKey; - TSKEY EKey; - - union { - double dOutput; - int64_t iOutput; - }; - - union { - double dLastValue; - int64_t iLastValue; - }; + TSKEY lastKey; + int8_t hasResult; // flag to denote has value + double dOutput; + double lastValue; + STimeWindow win; } STwaInfo; /* global sql function array */ @@ -276,8 +273,6 @@ bool topbot_datablock_filter(SQLFunctionCtx *pCtx, int32_t functionId, const cha (_r)->initialized = false; \ } while (0) -//void setResultInfoBuf(SResultRowCellInfo *pResInfo, char* buf); - static FORCE_INLINE void initResultInfo(SResultRowCellInfo *pResInfo, uint32_t bufLen) { pResInfo->initialized = true; // the this struct has been initialized flag diff --git a/src/query/src/qAst.c b/src/query/src/qAst.c index e813688d84..a65f4a6dc9 100644 --- a/src/query/src/qAst.c +++ b/src/query/src/qAst.c @@ -370,6 +370,66 @@ void tExprTreeTraverse(tExprNode *pExpr, SSkipList *pSkipList, SArray *result, S #endif } +static void reverseCopy(char* dest, const char* src, int16_t type, int32_t numOfRows) { + switch(type) { + case TSDB_DATA_TYPE_TINYINT: { + int8_t* p = (int8_t*) dest; + int8_t* pSrc = (int8_t*) src; + + for(int32_t i = 0; i < numOfRows; ++i) { + p[i] = pSrc[numOfRows - i - 1]; + } + break; + } + case TSDB_DATA_TYPE_SMALLINT: { + int16_t* p = (int16_t*) dest; + int16_t* pSrc = (int16_t*) src; + + for(int32_t i = 0; i < numOfRows; ++i) { + p[i] = pSrc[numOfRows - i - 1]; + } + break; + } + case TSDB_DATA_TYPE_INT: { + int32_t* p = (int32_t*) dest; + int32_t* pSrc = (int32_t*) src; + + for(int32_t i = 0; i < numOfRows; ++i) { + p[i] = pSrc[numOfRows - i - 1]; + } + break; + } + case TSDB_DATA_TYPE_BIGINT: { + int64_t* p = (int64_t*) dest; + int64_t* pSrc = (int64_t*) src; + + for(int32_t i = 0; i < numOfRows; ++i) { + p[i] = pSrc[numOfRows - i - 1]; + } + break; + } + case TSDB_DATA_TYPE_FLOAT: { + float* p = (float*) dest; + float* pSrc = (float*) src; + + for(int32_t i = 0; i < numOfRows; ++i) { + p[i] = pSrc[numOfRows - i - 1]; + } + break; + } + case TSDB_DATA_TYPE_DOUBLE: { + double* p = (double*) dest; + double* pSrc = (double*) src; + + for(int32_t i = 0; i < numOfRows; ++i) { + p[i] = pSrc[numOfRows - i - 1]; + } + break; + } + default: assert(0); + } +} + void tExprTreeCalcTraverse(tExprNode *pExprs, int32_t numOfRows, char *pOutput, void *param, int32_t order, char *(*getSourceDataBlock)(void *, const char*, int32_t)) { if (pExprs == NULL) { @@ -387,6 +447,8 @@ void tExprTreeCalcTraverse(tExprNode *pExprs, int32_t numOfRows, char *pOutput, /* the right output has result from the right child syntax tree */ char *pRightOutput = malloc(sizeof(int64_t) * numOfRows); + char *pdata = malloc(sizeof(int64_t) * numOfRows); + if (pRight->nodeType == TSQL_NODE_EXPR) { tExprTreeCalcTraverse(pRight, numOfRows, pRightOutput, param, order, getSourceDataBlock); } @@ -398,52 +460,75 @@ void tExprTreeCalcTraverse(tExprNode *pExprs, int32_t numOfRows, char *pOutput, * the type of returned value of one expression is always double float precious */ _bi_consumer_fn_t fp = tGetBiConsumerFn(TSDB_DATA_TYPE_DOUBLE, TSDB_DATA_TYPE_DOUBLE, pExprs->_node.optr); - fp(pLeftOutput, pRightOutput, numOfRows, numOfRows, pOutput, order); + fp(pLeftOutput, pRightOutput, numOfRows, numOfRows, pOutput, TSDB_ORDER_ASC); } else if (pRight->nodeType == TSQL_NODE_COL) { // exprLeft + columnRight _bi_consumer_fn_t fp = tGetBiConsumerFn(TSDB_DATA_TYPE_DOUBLE, pRight->pSchema->type, pExprs->_node.optr); + // set input buffer char *pInputData = getSourceDataBlock(param, pRight->pSchema->name, pRight->pSchema->colId); - fp(pLeftOutput, pInputData, numOfRows, numOfRows, pOutput, order); + if (order == TSDB_ORDER_DESC) { + reverseCopy(pdata, pInputData, pRight->pSchema->type, numOfRows); + fp(pLeftOutput, pdata, numOfRows, numOfRows, pOutput, TSDB_ORDER_ASC); + } else { + fp(pLeftOutput, pInputData, numOfRows, numOfRows, pOutput, TSDB_ORDER_ASC); + } } else if (pRight->nodeType == TSQL_NODE_VALUE) { // exprLeft + 12 _bi_consumer_fn_t fp = tGetBiConsumerFn(TSDB_DATA_TYPE_DOUBLE, pRight->pVal->nType, pExprs->_node.optr); - fp(pLeftOutput, &pRight->pVal->i64Key, numOfRows, 1, pOutput, order); + fp(pLeftOutput, &pRight->pVal->i64Key, numOfRows, 1, pOutput, TSDB_ORDER_ASC); } } else if (pLeft->nodeType == TSQL_NODE_COL) { // column data specified on left-hand-side char *pLeftInputData = getSourceDataBlock(param, pLeft->pSchema->name, pLeft->pSchema->colId); if (pRight->nodeType == TSQL_NODE_EXPR) { // columnLeft + expr2 _bi_consumer_fn_t fp = tGetBiConsumerFn(pLeft->pSchema->type, TSDB_DATA_TYPE_DOUBLE, pExprs->_node.optr); - fp(pLeftInputData, pRightOutput, numOfRows, numOfRows, pOutput, order); + + if (order == TSDB_ORDER_DESC) { + reverseCopy(pdata, pLeftInputData, pLeft->pSchema->type, numOfRows); + fp(pdata, pRightOutput, numOfRows, numOfRows, pOutput, TSDB_ORDER_ASC); + } else { + fp(pLeftInputData, pRightOutput, numOfRows, numOfRows, pOutput, TSDB_ORDER_ASC); + } } else if (pRight->nodeType == TSQL_NODE_COL) { // columnLeft + columnRight // column data specified on right-hand-side char *pRightInputData = getSourceDataBlock(param, pRight->pSchema->name, pRight->pSchema->colId); - _bi_consumer_fn_t fp = tGetBiConsumerFn(pLeft->pSchema->type, pRight->pSchema->type, pExprs->_node.optr); - fp(pLeftInputData, pRightInputData, numOfRows, numOfRows, pOutput, order); + // both columns are descending order, do not reverse the source data + fp(pLeftInputData, pRightInputData, numOfRows, numOfRows, pOutput, order); } else if (pRight->nodeType == TSQL_NODE_VALUE) { // columnLeft + 12 _bi_consumer_fn_t fp = tGetBiConsumerFn(pLeft->pSchema->type, pRight->pVal->nType, pExprs->_node.optr); - fp(pLeftInputData, &pRight->pVal->i64Key, numOfRows, 1, pOutput, order); + + if (order == TSDB_ORDER_DESC) { + reverseCopy(pdata, pLeftInputData, pLeft->pSchema->type, numOfRows); + fp(pdata, &pRight->pVal->i64Key, numOfRows, 1, pOutput, TSDB_ORDER_ASC); + } else { + fp(pLeftInputData, &pRight->pVal->i64Key, numOfRows, 1, pOutput, TSDB_ORDER_ASC); + } } } else { // column data specified on left-hand-side if (pRight->nodeType == TSQL_NODE_EXPR) { // 12 + expr2 _bi_consumer_fn_t fp = tGetBiConsumerFn(pLeft->pVal->nType, TSDB_DATA_TYPE_DOUBLE, pExprs->_node.optr); - fp(&pLeft->pVal->i64Key, pRightOutput, 1, numOfRows, pOutput, order); + fp(&pLeft->pVal->i64Key, pRightOutput, 1, numOfRows, pOutput, TSDB_ORDER_ASC); } else if (pRight->nodeType == TSQL_NODE_COL) { // 12 + columnRight // column data specified on right-hand-side char *pRightInputData = getSourceDataBlock(param, pRight->pSchema->name, pRight->pSchema->colId); - _bi_consumer_fn_t fp = tGetBiConsumerFn(pLeft->pVal->nType, pRight->pSchema->type, pExprs->_node.optr); - fp(&pLeft->pVal->i64Key, pRightInputData, 1, numOfRows, pOutput, order); + + if (order == TSDB_ORDER_DESC) { + reverseCopy(pdata, pRightInputData, pRight->pSchema->type, numOfRows); + fp(&pLeft->pVal->i64Key, pdata, numOfRows, 1, pOutput, TSDB_ORDER_ASC); + } else { + fp(&pLeft->pVal->i64Key, pRightInputData, 1, numOfRows, pOutput, TSDB_ORDER_ASC); + } } else if (pRight->nodeType == TSQL_NODE_VALUE) { // 12 + 12 _bi_consumer_fn_t fp = tGetBiConsumerFn(pLeft->pVal->nType, pRight->pVal->nType, pExprs->_node.optr); - fp(&pLeft->pVal->i64Key, &pRight->pVal->i64Key, 1, 1, pOutput, order); + fp(&pLeft->pVal->i64Key, &pRight->pVal->i64Key, 1, 1, pOutput, TSDB_ORDER_ASC); } } diff --git a/src/query/src/qExecutor.c b/src/query/src/qExecutor.c index 826858d1dd..0c07149e8e 100644 --- a/src/query/src/qExecutor.c +++ b/src/query/src/qExecutor.c @@ -27,6 +27,7 @@ #include "query.h" #include "queryLog.h" #include "tlosertree.h" +#include "ttype.h" #define MAX_ROWS_PER_RESBUF_PAGE ((1u<<12) - 1) @@ -194,6 +195,10 @@ static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo * static int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo); static int32_t checkForQueryBuf(size_t numOfTables); static void releaseQueryBuf(size_t numOfTables); +static int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order); +static void doRowwiseTimeWindowInterpolation(SQueryRuntimeEnv* pRuntimeEnv, SArray* pDataBlock, TSKEY prevTs, int32_t prevRowIndex, TSKEY curTs, int32_t curRowIndex, TSKEY windowKey, int32_t type); +static STsdbQueryCond createTsdbQueryCond(SQuery* pQuery); +static STableIdInfo createTableIdInfo(SQuery* pQuery); bool doFilterData(SQuery *pQuery, int32_t elemPos) { for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) { @@ -400,6 +405,17 @@ static bool isTopBottomQuery(SQuery *pQuery) { return false; } +static bool timeWindowInterpoRequired(SQuery *pQuery) { + for(int32_t i = 0; i < pQuery->numOfOutput; ++i) { + int32_t functionId = pQuery->pExpr1[i].base.functionId; + if (functionId == TSDB_FUNC_TWA) { + return true; + } + } + + return false; +} + static bool hasTagValOutput(SQuery* pQuery) { SExprInfo *pExprInfo = &pQuery->pExpr1[0]; if (pQuery->numOfOutput == 1 && pExprInfo->base.functionId == TSDB_FUNC_TS_COMP) { @@ -445,7 +461,7 @@ static bool hasNullValue(SColIndex* pColIndex, SDataStatis *pStatis, SDataStatis return true; } -static SResultRow *doPrepareResultRowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData, +static SResultRow *doPrepareResultRowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SResultRowInfo *pWindowResInfo, char *pData, int16_t bytes, bool masterscan, uint64_t uid) { SET_RES_WINDOW_KEY(pRuntimeEnv->keyBuf, pData, bytes, uid); int32_t *p1 = @@ -457,6 +473,7 @@ static SResultRow *doPrepareResultRowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWin return NULL; } + // TODO refactor // more than the capacity, reallocate the resources if (pWindowResInfo->size >= pWindowResInfo->capacity) { int64_t newCapacity = 0; @@ -501,7 +518,7 @@ static SResultRow *doPrepareResultRowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWin } // get the correct time window according to the handled timestamp -static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) { +static STimeWindow getActiveTimeWindow(SResultRowInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) { STimeWindow w = {0}; if (pWindowResInfo->curIndex == -1) { // the first window, from the previous stored value @@ -594,15 +611,18 @@ static int32_t addNewWindowResultBuf(SResultRow *pWindowRes, SDiskbasedResultBuf return 0; } -static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, SDataBlockInfo* pBockInfo, - STimeWindow *win, bool masterscan, bool* newWind) { +static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SResultRowInfo *pWindowResInfo, SDataBlockInfo* pBockInfo, + STimeWindow *win, bool masterscan, bool* newWind, SResultRow** pResult) { assert(win->skey <= win->ekey); SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf; - SResultRow *pResultRow = doPrepareResultRowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey, TSDB_KEYSIZE, masterscan, pBockInfo->uid); + // todo refactor + int64_t uid = getResultInfoUId(pRuntimeEnv); + SResultRow *pResultRow = doPrepareResultRowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey, TSDB_KEYSIZE, masterscan, uid); if (pResultRow == NULL) { *newWind = false; + // no master scan, no result generated means error occurs return masterscan? -1:0; } @@ -618,15 +638,40 @@ static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowRes // set time window for current result pResultRow->win = (*win); + *pResult = pResultRow; setResultRowOutputBufInitCtx(pRuntimeEnv, pResultRow); + return TSDB_CODE_SUCCESS; } -static bool getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) { +static bool getResultRowStatus(SResultRowInfo *pWindowResInfo, int32_t slot) { assert(slot >= 0 && slot < pWindowResInfo->size); return pWindowResInfo->pResult[slot]->closed; } +typedef enum SResultTsInterpType { + RESULT_ROW_START_INTERP = 1, + RESULT_ROW_END_INTERP = 2, +} SResultTsInterpType; + +static void setResultRowInterpo(SResultRow* pResult, SResultTsInterpType type) { + assert(pResult != NULL && (type == RESULT_ROW_START_INTERP || type == RESULT_ROW_END_INTERP)); + if (type == RESULT_ROW_START_INTERP) { + pResult->startInterp = true; + } else { + pResult->endInterp = true; + } +} + +static bool resultRowInterpolated(SResultRow* pResult, SResultTsInterpType type) { + assert(pResult != NULL && (type == RESULT_ROW_START_INTERP || type == RESULT_ROW_END_INTERP)); + if (type == RESULT_ROW_START_INTERP) { + return pResult->startInterp == true; + } else { + return pResult->endInterp == true; + } +} + static FORCE_INLINE int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos, int16_t order, int64_t *pData) { int32_t forwardStep = 0; @@ -658,7 +703,7 @@ static FORCE_INLINE int32_t getForwardStepsInBlock(int32_t numOfRows, __block_se /** * NOTE: the query status only set for the first scan of master scan. */ -static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) { +static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SResultRowInfo *pWindowResInfo) { SQuery *pQuery = pRuntimeEnv->pQuery; if (pRuntimeEnv->scanFlag != MASTER_SCAN) { return pWindowResInfo->size; @@ -716,7 +761,7 @@ static int32_t doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKe pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex]->win.skey; // the number of completed slots are larger than the threshold, return current generated results to client. - if (numOfClosed > pWindowResInfo->threshold) { + if (numOfClosed > pQuery->rec.threshold) { qDebug("QInfo:%p total result window:%d closed:%d, reached the output threshold %d, return", GET_QINFO_ADDR(pRuntimeEnv), pWindowResInfo->size, numOfClosed, pQuery->rec.threshold); @@ -947,7 +992,7 @@ static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas if (functionId == TSDB_FUNC_ARITHM) { sas->pArithExpr = &pQuery->pExpr1[col]; - sas->offset = (QUERY_IS_ASC_QUERY(pQuery)) ? pQuery->pos : pQuery->pos - (size - 1); + sas->offset = (QUERY_IS_ASC_QUERY(pQuery))? pQuery->pos : pQuery->pos - (size - 1); sas->colList = pQuery->colList; sas->numOfCols = pQuery->numOfCols; sas->data = calloc(pQuery->numOfCols, POINTER_BYTES); @@ -990,6 +1035,117 @@ static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas return dataBlock; } +static void setNotInterpoWindowKey(SQLFunctionCtx* pCtx, int32_t numOfOutput, int32_t type) { + if (type == RESULT_ROW_START_INTERP) { + for (int32_t k = 0; k < numOfOutput; ++k) { + pCtx[k].start.key = INT64_MIN; + } + } else { + for (int32_t k = 0; k < numOfOutput; ++k) { + pCtx[k].end.key = INT64_MIN; + } + } +} + +//static double getTSWindowInterpoVal(SColumnInfoData* pColInfo, int16_t srcColIndex, int16_t rowIndex, TSKEY key, char** prevRow, TSKEY* tsCols, int32_t step) { +// TSKEY start = tsCols[rowIndex]; +// TSKEY prevTs = (rowIndex == 0)? *(TSKEY *) prevRow[0] : tsCols[rowIndex - step]; +// +// double v1 = 0, v2 = 0, v = 0; +// char *prevVal = (rowIndex == 0)? prevRow[srcColIndex] : ((char*)pColInfo->pData) + (rowIndex - step) * pColInfo->info.bytes; +// +// GET_TYPED_DATA(v1, double, pColInfo->info.type, (char *)prevVal); +// GET_TYPED_DATA(v2, double, pColInfo->info.type, (char *)pColInfo->pData + rowIndex * pColInfo->info.bytes); +// +// SPoint point1 = (SPoint){.key = prevTs, .val = &v1}; +// SPoint point2 = (SPoint){.key = start, .val = &v2}; +// SPoint point = (SPoint){.key = key, .val = &v}; +// taosGetLinearInterpolationVal(TSDB_DATA_TYPE_DOUBLE, &point1, &point2, &point); +// +// return v; +//} + +// window start key interpolation +static bool setTimeWindowInterpolationStartTs(SQueryRuntimeEnv* pRuntimeEnv, int32_t pos, int32_t numOfRows, SArray* pDataBlock, TSKEY* tsCols, STimeWindow* win) { + SQuery* pQuery = pRuntimeEnv->pQuery; + + TSKEY curTs = tsCols[pos]; + TSKEY lastTs = *(TSKEY *) pRuntimeEnv->prevRow[0]; + + // lastTs == INT64_MIN and pos == 0 means this is the first time window, interpolation is not needed. + // start exactly from this point, no need to do interpolation + TSKEY key = QUERY_IS_ASC_QUERY(pQuery)? win->skey:win->ekey; + if (key == curTs) { + setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP); + return true; + } + + if (lastTs == INT64_MIN && ((pos == 0 && QUERY_IS_ASC_QUERY(pQuery)) || (pos == (numOfRows - 1) && !QUERY_IS_ASC_QUERY(pQuery)))) { + setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP); + return true; + } + + int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order); + TSKEY prevTs = ((pos == 0 && QUERY_IS_ASC_QUERY(pQuery)) || (pos == (numOfRows - 1) && !QUERY_IS_ASC_QUERY(pQuery)))? + lastTs:tsCols[pos - step]; + + doRowwiseTimeWindowInterpolation(pRuntimeEnv, pDataBlock, prevTs, pos - step, curTs, pos, key, RESULT_ROW_START_INTERP); + return true; +} + +static bool setTimeWindowInterpolationEndTs(SQueryRuntimeEnv* pRuntimeEnv, int32_t endRowIndex, SArray* pDataBlock, TSKEY* tsCols, TSKEY blockEkey, STimeWindow* win) { + SQuery* pQuery = pRuntimeEnv->pQuery; + TSKEY actualEndKey = tsCols[endRowIndex]; + + TSKEY key = QUERY_IS_ASC_QUERY(pQuery)? win->ekey:win->skey; + + // not ended in current data block, do not invoke interpolation + if ((key > blockEkey && QUERY_IS_ASC_QUERY(pQuery)) || (key < blockEkey && !QUERY_IS_ASC_QUERY(pQuery))) { + setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_END_INTERP); + return false; + } + + // there is actual end point of current time window, no interpolation need + if (key == actualEndKey) { + setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_END_INTERP); + return true; + } + + int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order); + int32_t nextRowIndex = endRowIndex + step; + assert(nextRowIndex >= 0); + + TSKEY nextKey = tsCols[nextRowIndex]; + doRowwiseTimeWindowInterpolation(pRuntimeEnv, pDataBlock, actualEndKey, endRowIndex, nextKey, nextRowIndex, key, RESULT_ROW_END_INTERP); + return true; +} + +static void saveDataBlockLastRow(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pDataBlockInfo, SArray* pDataBlock) { + if (pDataBlock == NULL) { + return; + } + + SQuery* pQuery = pRuntimeEnv->pQuery; + int32_t rowIndex = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->rows-1:0; + for (int32_t k = 0; k < pQuery->numOfCols; ++k) { + SColumnInfoData *pColInfo = taosArrayGet(pDataBlock, k); + memcpy(pRuntimeEnv->prevRow[k], ((char*)pColInfo->pData) + (pColInfo->info.bytes * rowIndex), pColInfo->info.bytes); + } +} + +static TSKEY getStartTsKey(SQuery* pQuery, SDataBlockInfo* pDataBlockInfo, TSKEY* tsCols, int32_t step) { + TSKEY ts = TSKEY_INITIAL_VAL; + + if (tsCols == NULL) { + ts = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.skey : pDataBlockInfo->window.ekey; + } else { + int32_t offset = GET_COL_DATA_POS(pQuery, 0, step); + ts = tsCols[offset]; + } + + return ts; +} + /** * todo set the last value for pQueryTableInfo as in rowwiseapplyfunctions * @param pRuntimeEnv @@ -1000,16 +1156,15 @@ static char *getDataBlock(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas * @return the incremental number of output value, so it maybe 0 for fixed number of query, * such as count/min/max etc. */ -static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, - SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo, - __block_search_fn_t searchFn, SArray *pDataBlock) { +static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo, + SResultRowInfo *pWindowResInfo, __block_search_fn_t searchFn, SArray *pDataBlock) { SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx; - bool masterScan = IS_MASTER_SCAN(pRuntimeEnv); + bool masterScan = IS_MASTER_SCAN(pRuntimeEnv); SQuery *pQuery = pRuntimeEnv->pQuery; TSKEY *tsCols = NULL; if (pDataBlock != NULL) { - SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, 0); + SColumnInfoData *pColInfo = taosArrayGet(pDataBlock, 0); tsCols = (TSKEY *)(pColInfo->pData); } @@ -1018,7 +1173,7 @@ static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis * longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY); } - SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv); + SQInfo *pQInfo = GET_QINFO_ADDR(pRuntimeEnv); for (int32_t k = 0; k < pQuery->numOfOutput; ++k) { char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock); setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k, pQInfo->vgId); @@ -1026,18 +1181,15 @@ static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis * int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order); if (QUERY_IS_INTERVAL_QUERY(pQuery)) { - TSKEY ts = TSKEY_INITIAL_VAL; + int32_t prevIndex = curTimeWindowIndex(pWindowResInfo); - if (tsCols == NULL) { - ts = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.skey:pDataBlockInfo->window.ekey; - } else { - int32_t offset = GET_COL_DATA_POS(pQuery, 0, step); - ts = tsCols[offset]; - } - - bool hasTimeWindow = false; + TSKEY ts = getStartTsKey(pQuery, pDataBlockInfo, tsCols, step); STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery); - if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &win, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) { + + bool hasTimeWindow = false; + SResultRow* pResult = NULL; + int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &win, masterScan, &hasTimeWindow, &pResult); + if (ret != TSDB_CODE_SUCCESS) { tfree(sasArray); return; } @@ -1045,11 +1197,59 @@ static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis * int32_t forwardStep = 0; int32_t startPos = pQuery->pos; + // in case of repeat scan/reverse scan, no new time window added. if (hasTimeWindow) { TSKEY ekey = reviseWindowEkey(pQuery, &win); forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, pQuery->pos, ekey, searchFn, true); - bool pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo)); + // prev time window not interpolation yet. + int32_t curIndex = curTimeWindowIndex(pWindowResInfo); + if (prevIndex != -1 && prevIndex < curIndex) { + for(int32_t j = prevIndex; j < curIndex; ++j) { + SResultRow *pRes = pWindowResInfo->pResult[j]; + + STimeWindow w = pRes->win; + ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &w, masterScan, &hasTimeWindow, &pResult); + assert(ret == TSDB_CODE_SUCCESS && !resultRowInterpolated(pResult, RESULT_ROW_END_INTERP)); + + int32_t p = QUERY_IS_ASC_QUERY(pQuery)? 0:pDataBlockInfo->rows-1; + doRowwiseTimeWindowInterpolation(pRuntimeEnv, pDataBlock, *(TSKEY*) pRuntimeEnv->prevRow[0], -1, tsCols[0], p, w.ekey, RESULT_ROW_END_INTERP); + setResultRowInterpo(pResult, RESULT_ROW_END_INTERP); + setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP); + + bool closed = getResultRowStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo)); + doBlockwiseApplyFunctions(pRuntimeEnv, closed, &w, startPos, 0, tsCols, pDataBlockInfo->rows); + } + + // restore current time window + ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &win, masterScan, &hasTimeWindow, &pResult); + assert (ret == TSDB_CODE_SUCCESS); // null data, too many state code + } + + // window start key interpolation + if (pRuntimeEnv->timeWindowInterpo) { + bool done = resultRowInterpolated(pResult, RESULT_ROW_START_INTERP); + if (!done) { + int32_t startRowIndex = pQuery->pos; + bool interp = setTimeWindowInterpolationStartTs(pRuntimeEnv, startRowIndex, pDataBlockInfo->rows, pDataBlock, tsCols, &win); + if (interp) { + setResultRowInterpo(pResult, RESULT_ROW_START_INTERP); + } + } + + done = resultRowInterpolated(pResult, RESULT_ROW_END_INTERP); + if (!done) { + int32_t endRowIndex = pQuery->pos + (forwardStep - 1) * step; + + TSKEY endKey = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey; + bool interp = setTimeWindowInterpolationEndTs(pRuntimeEnv, endRowIndex, pDataBlock, tsCols, endKey, &win); + if (interp) { + setResultRowInterpo(pResult, RESULT_ROW_END_INTERP); + } + } + } + + bool pStatus = getResultRowStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo)); doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, startPos, forwardStep, tsCols, pDataBlockInfo->rows); } @@ -1065,7 +1265,8 @@ static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis * // null data, failed to allocate more memory buffer hasTimeWindow = false; - if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) { + if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &nextWin, masterScan, &hasTimeWindow, &pResult) != + TSDB_CODE_SUCCESS) { break; } @@ -1076,7 +1277,29 @@ static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis * TSKEY ekey = reviseWindowEkey(pQuery, &nextWin); forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, tsCols, startPos, ekey, searchFn, true); - bool closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo)); + // window start(end) key interpolation + if (pRuntimeEnv->timeWindowInterpo) { + bool done = resultRowInterpolated(pResult, RESULT_ROW_START_INTERP); + if (!done) { + int32_t startRowIndex = startPos; + bool interp = setTimeWindowInterpolationStartTs(pRuntimeEnv, startRowIndex, pDataBlockInfo->rows, pDataBlock, tsCols, &nextWin); + if (interp) { + setResultRowInterpo(pResult, RESULT_ROW_START_INTERP); + } + } + + done = resultRowInterpolated(pResult, RESULT_ROW_END_INTERP); + if (!done) { + int32_t endRowIndex = startPos + (forwardStep - 1)*step; + TSKEY endKey = QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey; + bool interp = setTimeWindowInterpolationEndTs(pRuntimeEnv, endRowIndex, pDataBlock, tsCols, endKey, &nextWin); + if (interp) { + setResultRowInterpo(pResult, RESULT_ROW_END_INTERP); + } + } + } + + bool closed = getResultRowStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo)); doBlockwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, startPos, forwardStep, tsCols, pDataBlockInfo->rows); } @@ -1090,12 +1313,17 @@ static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis * for (int32_t k = 0; k < pQuery->numOfOutput; ++k) { int32_t functionId = pQuery->pExpr1[k].base.functionId; if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) { + pCtx[k].nStartQueryTimestamp = pDataBlockInfo->window.skey; aAggs[functionId].xFunction(&pCtx[k]); } } } - for(int32_t i = 0; i < pQuery->numOfOutput; ++i) { + if (pRuntimeEnv->timeWindowInterpo) { + saveDataBlockLastRow(pRuntimeEnv, pDataBlockInfo, pDataBlock); + } + + for (int32_t i = 0; i < pQuery->numOfOutput; ++i) { if (pQuery->pExpr1[i].base.functionId != TSDB_FUNC_ARITHM) { continue; } @@ -1270,8 +1498,84 @@ static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx return true; } +void doRowwiseTimeWindowInterpolation(SQueryRuntimeEnv* pRuntimeEnv, SArray* pDataBlock, TSKEY prevTs, int32_t prevRowIndex, TSKEY curTs, int32_t curRowIndex, TSKEY windowKey, int32_t type) { + SQuery* pQuery = pRuntimeEnv->pQuery; + for (int32_t k = 0; k < pQuery->numOfOutput; ++k) { + int32_t functionId = pQuery->pExpr1[k].base.functionId; + if (functionId != TSDB_FUNC_TWA) { + pRuntimeEnv->pCtx[k].start.key = INT64_MIN; + continue; + } + + SColIndex* pColIndex = &pQuery->pExpr1[k].base.colInfo; + int16_t index = pColIndex->colIndex; + SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, index); + + assert(pColInfo->info.colId == pColIndex->colId && curTs != windowKey); + double v1 = 0, v2 = 0, v = 0; + + if (prevRowIndex == -1) { + GET_TYPED_DATA(v1, double, pColInfo->info.type, (char *)pRuntimeEnv->prevRow[k]); + } else { + GET_TYPED_DATA(v1, double, pColInfo->info.type, (char *)pColInfo->pData + prevRowIndex * pColInfo->info.bytes); + } + + GET_TYPED_DATA(v2, double, pColInfo->info.type, (char *)pColInfo->pData + curRowIndex * pColInfo->info.bytes); + + SPoint point1 = (SPoint){.key = prevTs, .val = &v1}; + SPoint point2 = (SPoint){.key = curTs, .val = &v2}; + SPoint point = (SPoint){.key = windowKey, .val = &v}; + taosGetLinearInterpolationVal(TSDB_DATA_TYPE_DOUBLE, &point1, &point2, &point); + + if (type == RESULT_ROW_START_INTERP) { + pRuntimeEnv->pCtx[k].start.key = point.key; + pRuntimeEnv->pCtx[k].start.val = v; + } else { + pRuntimeEnv->pCtx[k].end.key = point.key; + pRuntimeEnv->pCtx[k].end.val = v; + } + } +} + +static void setTimeWindowSKeyInterp(SQueryRuntimeEnv* pRuntimeEnv, SArray* pDataBlock, TSKEY prevTs, int32_t prevRowIndex, TSKEY ts, int32_t offset, SResultRow* pResult, STimeWindow* win) { + SQuery* pQuery = pRuntimeEnv->pQuery; + + bool done = resultRowInterpolated(pResult, RESULT_ROW_START_INTERP); + if (!done) { + TSKEY key = QUERY_IS_ASC_QUERY(pQuery)? win->skey:win->ekey; + if (key == ts) { + setResultRowInterpo(pResult, RESULT_ROW_START_INTERP); + } else if (prevTs != INT64_MIN && ((QUERY_IS_ASC_QUERY(pQuery) && prevTs < key) || (!QUERY_IS_ASC_QUERY(pQuery) && prevTs > key))) { + doRowwiseTimeWindowInterpolation(pRuntimeEnv, pDataBlock, prevTs, prevRowIndex, ts, offset, key, RESULT_ROW_START_INTERP); + setResultRowInterpo(pResult, RESULT_ROW_START_INTERP); + } else { + setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP); + } + + setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_END_INTERP); + for (int32_t k = 0; k < pQuery->numOfOutput; ++k) { + pRuntimeEnv->pCtx[k].size = 1; + } + } else { + setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP); + } +} + +static void setTimeWindowEKeyInterp(SQueryRuntimeEnv* pRuntimeEnv, SArray* pDataBlock, TSKEY prevTs, int32_t prevRowIndex, TSKEY ts, int32_t offset, SResultRow* pResult, STimeWindow* win) { + SQuery* pQuery = pRuntimeEnv->pQuery; + + TSKEY key = QUERY_IS_ASC_QUERY(pQuery)? win->ekey:win->skey; + doRowwiseTimeWindowInterpolation(pRuntimeEnv, pDataBlock, prevTs, prevRowIndex, ts, offset, key, RESULT_ROW_END_INTERP); + setResultRowInterpo(pResult, RESULT_ROW_END_INTERP); + + setNotInterpoWindowKey(pRuntimeEnv->pCtx, pQuery->numOfOutput, RESULT_ROW_START_INTERP); + for (int32_t i = 0; i < pQuery->numOfOutput; ++i) { + pRuntimeEnv->pCtx[i].size = 0; + } +} + static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo, - SWindowResInfo *pWindowResInfo, SArray *pDataBlock) { + SResultRowInfo *pWindowResInfo, SArray *pDataBlock) { SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx; bool masterScan = IS_MASTER_SCAN(pRuntimeEnv); @@ -1300,6 +1604,7 @@ static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pS for (int32_t k = 0; k < pQuery->numOfOutput; ++k) { char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock); setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k, pQInfo->vgId); + pCtx[k].size = 1; } // set the input column data @@ -1318,20 +1623,21 @@ static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pS pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order); } - int32_t j = 0; int32_t offset = -1; + TSKEY prevTs = *(TSKEY*) pRuntimeEnv->prevRow[0]; + int32_t prevRowIndex = -1; - for (j = 0; j < pDataBlockInfo->rows; ++j) { + for (int32_t j = 0; j < pDataBlockInfo->rows; ++j) { offset = GET_COL_DATA_POS(pQuery, j, step); if (pRuntimeEnv->pTSBuf != NULL) { - int32_t r = doTSJoinFilter(pRuntimeEnv, offset); - if (r == TS_JOIN_TAG_NOT_EQUALS) { + int32_t ret = doTSJoinFilter(pRuntimeEnv, offset); + if (ret == TS_JOIN_TAG_NOT_EQUALS) { break; - } else if (r == TS_JOIN_TS_NOT_EQUALS) { + } else if (ret == TS_JOIN_TS_NOT_EQUALS) { continue; } else { - assert(r == TS_JOIN_TS_EQUAL); + assert(ret == TS_JOIN_TS_EQUAL); } } @@ -1341,11 +1647,14 @@ static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pS // interval window query, decide the time window according to the primary timestamp if (QUERY_IS_INTERVAL_QUERY(pQuery)) { - int64_t ts = tsCols[offset]; + int32_t prevWindowIndex = curTimeWindowIndex(pWindowResInfo); + int64_t ts = tsCols[offset]; + STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery); - bool hasTimeWindow = false; - int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &win, masterScan, &hasTimeWindow); + bool hasTimeWindow = false; + SResultRow* pResult = NULL; + int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &win, masterScan, &hasTimeWindow, &pResult); if (ret != TSDB_CODE_SUCCESS) { // null data, too many state code continue; } @@ -1354,7 +1663,35 @@ static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pS continue; } - bool closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo)); + // window start key interpolation + if (pRuntimeEnv->timeWindowInterpo) { + // check for the time window end time interpolation + int32_t curIndex = curTimeWindowIndex(pWindowResInfo); + if (prevWindowIndex != -1 && prevWindowIndex < curIndex) { + for (int32_t k = prevWindowIndex; k < curIndex; ++k) { + SResultRow *pRes = pWindowResInfo->pResult[k]; + + ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &pRes->win, masterScan, &hasTimeWindow, &pResult); + assert(ret == TSDB_CODE_SUCCESS && !resultRowInterpolated(pResult, RESULT_ROW_END_INTERP)); + + setTimeWindowEKeyInterp(pRuntimeEnv, pDataBlock, prevTs, prevRowIndex, ts, offset, pResult, &pRes->win); + + bool closed = getResultRowStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo)); + doRowwiseApplyFunctions(pRuntimeEnv, closed, &pRes->win, offset); + } + + // restore current time window + ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &win, masterScan, &hasTimeWindow, + &pResult); + if (ret != TSDB_CODE_SUCCESS) { // null data, too many state code + continue; + } + } + + setTimeWindowSKeyInterp(pRuntimeEnv, pDataBlock, prevTs, prevRowIndex, ts, offset, pResult, &win); + } + + bool closed = getResultRowStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo)); doRowwiseApplyFunctions(pRuntimeEnv, closed, &win, offset); STimeWindow nextWin = win; @@ -1373,12 +1710,13 @@ static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pS // null data, failed to allocate more memory buffer hasTimeWindow = false; - if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &nextWin, masterScan, &hasTimeWindow) != TSDB_CODE_SUCCESS) { + if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pDataBlockInfo, &nextWin, masterScan, &hasTimeWindow, &pResult) != TSDB_CODE_SUCCESS) { break; } if (hasTimeWindow) { - closed = getTimeWindowResStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo)); + setTimeWindowSKeyInterp(pRuntimeEnv, pDataBlock, prevTs, prevRowIndex, ts, offset, pResult, &nextWin); + closed = getResultRowStatus(pWindowResInfo, curTimeWindowIndex(pWindowResInfo)); doRowwiseApplyFunctions(pRuntimeEnv, closed, &nextWin, offset); } } @@ -1403,6 +1741,9 @@ static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pS } } + prevTs = tsCols[offset]; + prevRowIndex = offset; + if (pRuntimeEnv->pTSBuf != NULL) { // if timestamp filter list is empty, quit current query if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) { @@ -1440,7 +1781,7 @@ static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBl SQuery *pQuery = pRuntimeEnv->pQuery; STableQueryInfo* pTableQInfo = pQuery->current; - SWindowResInfo* pWindowResInfo = &pRuntimeEnv->windowResInfo; + SResultRowInfo* pWindowResInfo = &pRuntimeEnv->windowResInfo; if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) { rowwiseApplyFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock); @@ -1528,10 +1869,10 @@ void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY * top/bottom values emerge, so does diff function */ if (functionId == TSDB_FUNC_TWA) { - SResultRowCellInfo* pInfo = GET_RES_INFO(pCtx); - STwaInfo *pTWAInfo = (STwaInfo*) GET_ROWCELL_INTERBUF(pInfo); - pTWAInfo->SKey = pQuery->window.skey; - pTWAInfo->EKey = pQuery->window.ekey; + pCtx->param[1].i64Key = pQuery->window.skey; + pCtx->param[1].nType = TSDB_DATA_TYPE_BIGINT; + pCtx->param[2].i64Key = pQuery->window.ekey; + pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT; } } else if (functionId == TSDB_FUNC_ARITHM) { @@ -1677,6 +2018,8 @@ static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order pCtx->functionId = pSqlFuncMsg->functionId; pCtx->stableQuery = pRuntimeEnv->stableQuery; pCtx->interBufBytes = pQuery->pExpr1[i].interBytes; + pCtx->start.key = INT64_MIN; + pCtx->end.key = INT64_MIN; pCtx->numOfParams = pSqlFuncMsg->numOfParams; for (int32_t j = 0; j < pCtx->numOfParams; ++j) { @@ -1711,6 +2054,8 @@ static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int16_t order } + *(int64_t*) pRuntimeEnv->prevRow[0] = INT64_MIN; + // if it is group by normal column, do not set output buffer, the output buffer is pResult // fixed output query/multi-output query for normal table if (!pRuntimeEnv->groupbyNormalCol && !pRuntimeEnv->stableQuery && !QUERY_IS_INTERVAL_QUERY(pRuntimeEnv->pQuery)) { @@ -1781,6 +2126,7 @@ static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) { tfree(pRuntimeEnv->offset); tfree(pRuntimeEnv->keyBuf); tfree(pRuntimeEnv->rowCellInfoOffset); + tfree(pRuntimeEnv->prevRow); taosHashCleanup(pRuntimeEnv->pResultRowHashTable); pRuntimeEnv->pResultRowHashTable = NULL; @@ -2259,7 +2605,7 @@ static bool overlapWithTimeWindow(SQuery* pQuery, SDataBlockInfo* pBlockInfo) { return false; } -int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo * pWindowResInfo, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis, SArray** pDataBlock, uint32_t* status) { +int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, SResultRowInfo * pWindowResInfo, void* pQueryHandle, SDataBlockInfo* pBlockInfo, SDataStatis **pStatis, SArray** pDataBlock, uint32_t* status) { SQuery *pQuery = pRuntimeEnv->pQuery; *status = BLK_DATA_NO_NEEDED; @@ -2279,12 +2625,14 @@ int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo * pW // the filter result may be incorrect. So in case of interval query, we need to set the correct time output buffer if (QUERY_IS_INTERVAL_QUERY(pQuery)) { bool hasTimeWindow = false; + SResultRow* pResult = NULL; + bool masterScan = IS_MASTER_SCAN(pRuntimeEnv); TSKEY k = QUERY_IS_ASC_QUERY(pQuery)? pBlockInfo->window.skey:pBlockInfo->window.ekey; STimeWindow win = getActiveTimeWindow(pWindowResInfo, k, pQuery); - if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pBlockInfo, &win, masterScan, &hasTimeWindow) != + if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pBlockInfo, &win, masterScan, &hasTimeWindow, &pResult) != TSDB_CODE_SUCCESS) { // todo handle error in set result for timewindow } @@ -2435,6 +2783,7 @@ static void ensureOutputBufferSimple(SQueryRuntimeEnv* pRuntimeEnv, int32_t capa pQuery->rec.capacity = capacity; } +// TODO merge with enuserOutputBufferSimple static void ensureOutputBuffer(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pBlockInfo) { // in case of prj/diff query, ensure the output buffer is sufficient to accommodate the results of current block SQuery* pQuery = pRuntimeEnv->pQuery; @@ -2479,7 +2828,7 @@ static void doSetInitialTimewindow(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo if (QUERY_IS_INTERVAL_QUERY(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == TSKEY_INITIAL_VAL) { STimeWindow w = TSWINDOW_INITIALIZER; - SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; + SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; if (QUERY_IS_ASC_QUERY(pQuery)) { getAlignQueryTimeWindow(pQuery, pBlockInfo->window.skey, pBlockInfo->window.skey, pQuery->window.ekey, &w); @@ -2843,14 +3192,14 @@ int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) return -1; } - SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo; + SResultRowInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo; SResultRow * pWindowRes1 = getResultRow(pWindowResInfo1, leftPos); tFilePage *page1 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes1->pageId); char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1, page1); TSKEY leftTimestamp = GET_INT64_VAL(b1); - SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo; + SResultRowInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo; SResultRow * pWindowRes2 = getResultRow(pWindowResInfo2, rightPos); tFilePage *page2 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes2->pageId); @@ -3090,7 +3439,7 @@ int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) { int32_t pos = pTree->pNode[0].index; - SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo; + SResultRowInfo *pWindowResInfo = &pTableList[pos]->windowResInfo; SResultRow *pWindowRes = getResultRow(pWindowResInfo, cs.position[pos]); tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pageId); @@ -3238,17 +3587,9 @@ static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo * // order has changed already int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order); - - // TODO validate the assertion -// if (!QUERY_IS_ASC_QUERY(pQuery)) { -// assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step); -// } else { -// assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step); -// } - if (pTableQueryInfo->lastKey == pTableQueryInfo->win.skey) { // do nothing, no results - } else { + } else {// NOTE: even win.skey != lastKey, the results may not generated. pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step; } @@ -3262,11 +3603,11 @@ static void updateTableQueryInfoForReverseScan(SQuery *pQuery, STableQueryInfo * pTableQueryInfo->windowResInfo.curIndex = pTableQueryInfo->windowResInfo.size - 1; } -static void disableFuncInReverseScanImpl(SQueryRuntimeEnv* pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t order) { +static void disableFuncInReverseScanImpl(SQueryRuntimeEnv* pRuntimeEnv, SResultRowInfo *pWindowResInfo, int32_t order) { SQuery* pQuery = pRuntimeEnv->pQuery; for (int32_t i = 0; i < pWindowResInfo->size; ++i) { - bool closed = getTimeWindowResStatus(pWindowResInfo, i); + bool closed = getResultRowStatus(pWindowResInfo, i); if (!closed) { continue; } @@ -3294,7 +3635,7 @@ void disableFuncInReverseScan(SQInfo *pQInfo) { int32_t order = pQuery->order.order; // group by normal columns and interval query on normal table - SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; + SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) { disableFuncInReverseScanImpl(pRuntimeEnv, pWindowResInfo, order); } else { // for simple result of table query, @@ -3485,7 +3826,7 @@ bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) { bool toContinue = false; if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) { // for each group result, call the finalize function for each column - SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; + SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; for (int32_t i = 0; i < pWindowResInfo->size; ++i) { SResultRow *pResult = getResultRow(pWindowResInfo, i); @@ -3570,13 +3911,7 @@ static void setEnvBeforeReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatusI SET_REVERSE_SCAN_FLAG(pRuntimeEnv); - STsdbQueryCond cond = { - .order = pQuery->order.order, - .colList = pQuery->colList, - .numOfCols = pQuery->numOfCols, - }; - - TIME_WINDOW_COPY(cond.twindow, pQuery->window); + STsdbQueryCond cond = createTsdbQueryCond(pQuery); setQueryStatus(pQuery, QUERY_NOT_COMPLETED); switchCtxOrder(pRuntimeEnv); @@ -3645,6 +3980,8 @@ void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) { // do nothing if no data blocks are found qualified during scan if (qstatus.lastKey != pTableQueryInfo->lastKey) { qstatus.curWindow.ekey = pTableQueryInfo->lastKey - step; + } else { // the lastkey does not increase, which means no data checked yet + qDebug("QInfo:%p no results generated in this scan", pQInfo); } qstatus.lastKey = pTableQueryInfo->lastKey; @@ -3659,18 +3996,11 @@ void scanOneTableDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, TSKEY start) { break; } - STsdbQueryCond cond = { - .order = pQuery->order.order, - .colList = pQuery->colList, - .numOfCols = pQuery->numOfCols, - }; - - TIME_WINDOW_COPY(cond.twindow, qstatus.curWindow); - if (pRuntimeEnv->pSecQueryHandle != NULL) { tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle); } + STsdbQueryCond cond = createTsdbQueryCond(pQuery); restoreTimeWindow(&pQInfo->tableGroupInfo, &cond); pRuntimeEnv->pSecQueryHandle = tsdbQueryTables(pQInfo->tsdb, &cond, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef); if (pRuntimeEnv->pSecQueryHandle == NULL) { @@ -3708,7 +4038,7 @@ void finalizeQueryResult(SQueryRuntimeEnv *pRuntimeEnv) { if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) { // for each group result, call the finalize function for each column - SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; + SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; if (pRuntimeEnv->groupbyNormalCol) { closeAllTimeWindow(pWindowResInfo); } @@ -3764,9 +4094,8 @@ static STableQueryInfo *createTableQueryInfo(SQueryRuntimeEnv *pRuntimeEnv, void // set more initial size of interval/groupby query if (QUERY_IS_INTERVAL_QUERY(pQuery) || pRuntimeEnv->groupbyNormalCol) { - int32_t initialSize = 16; - int32_t initialThreshold = 100; - int32_t code = initWindowResInfo(&pTableQueryInfo->windowResInfo, initialSize, initialThreshold, TSDB_DATA_TYPE_INT); + int32_t initialSize = 128; + int32_t code = initWindowResInfo(&pTableQueryInfo->windowResInfo, initialSize, TSDB_DATA_TYPE_INT); if (code != TSDB_CODE_SUCCESS) { return NULL; } @@ -3793,7 +4122,7 @@ void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo) { void setExecutionContext(SQInfo *pQInfo, int32_t groupIndex, TSKEY nextKey) { SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; STableQueryInfo *pTableQueryInfo = pRuntimeEnv->pQuery->current; - SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; + SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; // lastKey needs to be updated pTableQueryInfo->lastKey = nextKey; @@ -3961,7 +4290,7 @@ void setIntervalQueryRange(SQInfo *pQInfo, TSKEY key) { * operations involve. */ STimeWindow w = TSWINDOW_INITIALIZER; - SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo; + SResultRowInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo; TSKEY sk = MIN(win.skey, win.ekey); TSKEY ek = MAX(win.skey, win.ekey); @@ -4005,7 +4334,7 @@ bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) { return loadPrimaryTS; } -static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo, int32_t orderType) { +static int32_t doCopyToSData(SQInfo *pQInfo, SResultRowInfo *pResultInfo, int32_t orderType) { SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; SQuery * pQuery = pRuntimeEnv->pQuery; @@ -4082,7 +4411,7 @@ static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo, int32_ * @param pQInfo * @param result */ -void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo) { +void copyFromWindowResToSData(SQInfo *pQInfo, SResultRowInfo *pResultInfo) { SQuery *pQuery = pQInfo->runtimeEnv.pQuery; int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSDB_ORDER_ASC; @@ -4121,7 +4450,7 @@ static void stableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBloc SQuery * pQuery = pRuntimeEnv->pQuery; STableQueryInfo* pTableQueryInfo = pQuery->current; - SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo; + SResultRowInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo; pQuery->pos = QUERY_IS_ASC_QUERY(pQuery)? 0 : pDataBlockInfo->rows - 1; if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->groupbyNormalCol) { @@ -4195,16 +4524,19 @@ static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data } } - int32_t numOfTables = (int32_t)taosArrayGetSize(pQInfo->arrTableIdInfo); + int32_t numOfTables = (int32_t) taosHashGetSize(pQInfo->arrTableIdInfo); *(int32_t*)data = htonl(numOfTables); data += sizeof(int32_t); - for(int32_t i = 0; i < numOfTables; i++) { - STableIdInfo* pSrc = taosArrayGet(pQInfo->arrTableIdInfo, i); + + STableIdInfo* item = taosHashIterate(pQInfo->arrTableIdInfo, NULL); + while(item) { STableIdInfo* pDst = (STableIdInfo*)data; - pDst->uid = htobe64(pSrc->uid); - pDst->tid = htonl(pSrc->tid); - pDst->key = htobe64(pSrc->key); + pDst->uid = htobe64(item->uid); + pDst->tid = htonl(item->tid); + pDst->key = htobe64(item->key); + data += sizeof(STableIdInfo); + item = taosHashIterate(pQInfo->arrTableIdInfo, item); } // Check if query is completed or not for stable query or normal table query respectively. @@ -4366,7 +4698,7 @@ void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) { static TSKEY doSkipIntervalProcess(SQueryRuntimeEnv* pRuntimeEnv, STimeWindow* win, SDataBlockInfo* pBlockInfo, STableQueryInfo* pTableQueryInfo) { SQuery *pQuery = pRuntimeEnv->pQuery; - SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; + SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; assert(pQuery->limit.offset == 0); STimeWindow tw = *win; @@ -4416,7 +4748,23 @@ static TSKEY doSkipIntervalProcess(SQueryRuntimeEnv* pRuntimeEnv, STimeWindow* w static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) { SQuery *pQuery = pRuntimeEnv->pQuery; - *start = pQuery->current->lastKey; + + // get the first unclosed time window + bool assign = false; + for(int32_t i = 0; i < pRuntimeEnv->windowResInfo.size; ++i) { + if (pRuntimeEnv->windowResInfo.pResult[i]->closed) { + continue; + } + + assign = true; + *start = pRuntimeEnv->windowResInfo.pResult[i]->win.skey; + } + + if (!assign) { + *start = pQuery->current->lastKey; + } + + assert(*start <= pQuery->current->lastKey); // if queried with value filter, do NOT forward query start position if (pQuery->limit.offset <= 0 || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || pRuntimeEnv->pFillInfo != NULL) { @@ -4432,7 +4780,7 @@ static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) { STimeWindow w = TSWINDOW_INITIALIZER; - SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; + SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; STableQueryInfo *pTableQueryInfo = pQuery->current; SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER; @@ -4531,13 +4879,7 @@ static int32_t setupQueryHandle(void* tsdb, SQInfo* pQInfo, bool isSTableQuery) return TSDB_CODE_SUCCESS; } - STsdbQueryCond cond = { - .order = pQuery->order.order, - .colList = pQuery->colList, - .numOfCols = pQuery->numOfCols, - }; - - TIME_WINDOW_COPY(cond.twindow, pQuery->window); + STsdbQueryCond cond = createTsdbQueryCond(pQuery); if (!isSTableQuery && (pQInfo->tableqinfoGroupInfo.numOfTables == 1) @@ -4617,6 +4959,7 @@ int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bo pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery); pRuntimeEnv->hasTagResults = hasTagValOutput(pQuery); + pRuntimeEnv->timeWindowInterpo = timeWindowInterpoRequired(pQuery); setScanLimitationByResultBuffer(pQuery); @@ -4653,20 +4996,13 @@ int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bo if (!QUERY_IS_INTERVAL_QUERY(pQuery)) { int16_t type = TSDB_DATA_TYPE_NULL; - int32_t threshold = 0; - if (pRuntimeEnv->groupbyNormalCol) { // group by columns not tags; type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr); - threshold = 4000; } else { type = TSDB_DATA_TYPE_INT; // group id - threshold = (int32_t)(GET_NUM_OF_TABLEGROUP(pQInfo)); - if (threshold < 8) { - threshold = 8; - } } - code = initWindowResInfo(&pRuntimeEnv->windowResInfo, 8, threshold, type); + code = initWindowResInfo(&pRuntimeEnv->windowResInfo, 8, type); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -4686,7 +5022,7 @@ int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bo type = TSDB_DATA_TYPE_TIMESTAMP; } - code = initWindowResInfo(&pRuntimeEnv->windowResInfo, numOfResultRows, 1024, type); + code = initWindowResInfo(&pRuntimeEnv->windowResInfo, numOfResultRows, type); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -4744,6 +5080,20 @@ static FORCE_INLINE void setEnvForEachBlock(SQInfo* pQInfo, STableQueryInfo* pTa } } +static void doTableQueryInfoTimeWindowCheck(SQuery* pQuery, STableQueryInfo* pTableQueryInfo) { + if (QUERY_IS_ASC_QUERY(pQuery)) { + assert( + (pTableQueryInfo->win.skey <= pTableQueryInfo->win.ekey) && + (pTableQueryInfo->lastKey >= pTableQueryInfo->win.skey) && + (pTableQueryInfo->win.skey >= pQuery->window.skey && pTableQueryInfo->win.ekey <= pQuery->window.ekey)); + } else { + assert( + (pTableQueryInfo->win.skey >= pTableQueryInfo->win.ekey) && + (pTableQueryInfo->lastKey <= pTableQueryInfo->win.skey) && + (pTableQueryInfo->win.skey <= pQuery->window.skey && pTableQueryInfo->win.ekey >= pQuery->window.ekey)); + } +} + static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) { SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; SQuery* pQuery = pRuntimeEnv->pQuery; @@ -4770,17 +5120,7 @@ static int64_t scanMultiTableDataBlocks(SQInfo *pQInfo) { } pQuery->current = *pTableQueryInfo; - if (QUERY_IS_ASC_QUERY(pQuery)) { - assert( - ((*pTableQueryInfo)->win.skey <= (*pTableQueryInfo)->win.ekey) && - ((*pTableQueryInfo)->lastKey >= (*pTableQueryInfo)->win.skey) && - ((*pTableQueryInfo)->win.skey >= pQuery->window.skey && (*pTableQueryInfo)->win.ekey <= pQuery->window.ekey)); - } else { - assert( - ((*pTableQueryInfo)->win.skey >= (*pTableQueryInfo)->win.ekey) && - ((*pTableQueryInfo)->lastKey <= (*pTableQueryInfo)->win.skey) && - ((*pTableQueryInfo)->win.skey <= pQuery->window.skey && (*pTableQueryInfo)->win.ekey >= pQuery->window.ekey)); - } + doTableQueryInfoTimeWindowCheck(pQuery, *pTableQueryInfo); if (!pRuntimeEnv->groupbyNormalCol) { setEnvForEachBlock(pQInfo, *pTableQueryInfo, &blockInfo); @@ -4929,6 +5269,41 @@ static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) { return true; } +STsdbQueryCond createTsdbQueryCond(SQuery* pQuery) { + STsdbQueryCond cond = { + .colList = pQuery->colList, + .order = pQuery->order.order, + .numOfCols = pQuery->numOfCols, + }; + + TIME_WINDOW_COPY(cond.twindow, pQuery->window); + return cond; +} + +static STableIdInfo createTableIdInfo(SQuery* pQuery) { + assert(pQuery != NULL && pQuery->current != NULL); + + STableIdInfo tidInfo; + STableId* id = TSDB_TABLEID(pQuery->current->pTable); + + tidInfo.uid = id->uid; + tidInfo.tid = id->tid; + tidInfo.key = pQuery->current->lastKey; + + return tidInfo; +} + +static void updateTableIdInfo(SQuery* pQuery, SHashObj* pTableIdInfo) { + STableIdInfo tidInfo = createTableIdInfo(pQuery); + STableIdInfo* idinfo = taosHashGet(pTableIdInfo, &tidInfo.tid, sizeof(tidInfo.tid)); + if (idinfo != NULL) { + assert(idinfo->tid == tidInfo.tid && idinfo->uid == tidInfo.uid); + idinfo->key = tidInfo.key; + } else { + taosHashPut(pTableIdInfo, &tidInfo.tid, sizeof(tidInfo.tid), &tidInfo, sizeof(STableIdInfo)); + } +} + /** * super table query handler * 1. super table projection query, group-by on normal columns query, ts-comp query @@ -4948,18 +5323,11 @@ static void sequentialTableProcess(SQInfo *pQInfo) { assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0); while (pQInfo->groupIndex < numOfGroups) { - SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex); + SArray *group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex); - qDebug("QInfo:%p last_row query on group:%d, total group:%" PRIzu ", current group:%p", pQInfo, pQInfo->groupIndex, - numOfGroups, group); - - STsdbQueryCond cond = { - .colList = pQuery->colList, - .order = pQuery->order.order, - .numOfCols = pQuery->numOfCols, - }; - - TIME_WINDOW_COPY(cond.twindow, pQuery->window); + qDebug("QInfo:%p point interpolation query on group:%d, total group:%" PRIzu ", current group:%p", pQInfo, + pQInfo->groupIndex, numOfGroups, group); + STsdbQueryCond cond = createTsdbQueryCond(pQuery); SArray *g1 = taosArrayInit(1, POINTER_BYTES); SArray *tx = taosArrayClone(group); @@ -4983,14 +5351,14 @@ static void sequentialTableProcess(SQInfo *pQInfo) { initCtxOutputBuf(pRuntimeEnv); - SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle); + SArray *s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle); assert(taosArrayGetSize(s) >= 1); setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb); taosArrayDestroy(s); // here we simply set the first table as current table - SArray* first = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex); + SArray *first = GET_TABLEGROUP(pQInfo, pQInfo->groupIndex); pQuery->current = taosArrayGetP(first, 0); scanOneTableDataBlocks(pRuntimeEnv, pQuery->current->lastKey); @@ -5012,19 +5380,14 @@ static void sequentialTableProcess(SQInfo *pQInfo) { break; } } - } else if (pRuntimeEnv->groupbyNormalCol) { // group-by on normal columns query + } else if (pRuntimeEnv->groupbyNormalCol) { // group-by on normal columns query while (pQInfo->groupIndex < numOfGroups) { - SArray* group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex); + SArray *group = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, pQInfo->groupIndex); - qDebug("QInfo:%p group by normal columns group:%d, total group:%" PRIzu "", pQInfo, pQInfo->groupIndex, numOfGroups); + qDebug("QInfo:%p group by normal columns group:%d, total group:%" PRIzu "", pQInfo, pQInfo->groupIndex, + numOfGroups); - STsdbQueryCond cond = { - .colList = pQuery->colList, - .order = pQuery->order.order, - .numOfCols = pQuery->numOfCols, - }; - - TIME_WINDOW_COPY(cond.twindow, pQuery->window); + STsdbQueryCond cond = createTsdbQueryCond(pQuery); SArray *g1 = taosArrayInit(1, POINTER_BYTES); SArray *tx = taosArrayClone(group); @@ -5047,7 +5410,7 @@ static void sequentialTableProcess(SQInfo *pQInfo) { longjmp(pRuntimeEnv->env, terrno); } - SArray* s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle); + SArray *s = tsdbGetQueriedTableList(pRuntimeEnv->pQueryHandle); assert(taosArrayGetSize(s) >= 1); setTagVal(pRuntimeEnv, taosArrayGetP(s, 0), pQInfo->tsdb); @@ -5056,26 +5419,26 @@ static void sequentialTableProcess(SQInfo *pQInfo) { scanMultiTableDataBlocks(pQInfo); pQInfo->groupIndex += 1; - SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; + SResultRowInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; - // no results generated for current group, continue to try the next group + // no results generated for current group, continue to try the next group taosArrayDestroy(s); if (pWindowResInfo->size <= 0) { continue; } for (int32_t i = 0; i < pWindowResInfo->size; ++i) { - pWindowResInfo->pResult[i]->closed = true; // enable return all results for group by normal columns + pWindowResInfo->pResult[i]->closed = true; // enable return all results for group by normal columns SResultRow *pResult = pWindowResInfo->pResult[i]; for (int32_t j = 0; j < pQuery->numOfOutput; ++j) { - SResultRowCellInfo* pCell = getResultCell(pRuntimeEnv, pResult, j); + SResultRowCellInfo *pCell = getResultCell(pRuntimeEnv, pResult, j); pResult->numOfRows = (uint16_t)(MAX(pResult->numOfRows, pCell->numOfRes)); } } qDebug("QInfo:%p generated groupby columns results %d rows for group %d completed", pQInfo, pWindowResInfo->size, - pQInfo->groupIndex); + pQInfo->groupIndex); int32_t currentGroupIndex = pQInfo->groupIndex; pQuery->rec.rows = 0; @@ -5084,16 +5447,109 @@ static void sequentialTableProcess(SQInfo *pQInfo) { ensureOutputBufferSimple(pRuntimeEnv, pWindowResInfo->size); copyFromWindowResToSData(pQInfo, pWindowResInfo); - pQInfo->groupIndex = currentGroupIndex; //restore the group index + pQInfo->groupIndex = currentGroupIndex; // restore the group index assert(pQuery->rec.rows == pWindowResInfo->size); clearClosedTimeWindow(pRuntimeEnv); break; } + } else if (pRuntimeEnv->queryWindowIdentical && pRuntimeEnv->pTSBuf == NULL) { + //super table projection query with identical query time range for all tables. + SDataBlockInfo blockInfo = SDATA_BLOCK_INITIALIZER; + resetDefaultResInfoOutputBuf(pRuntimeEnv); + + SArray *group = GET_TABLEGROUP(pQInfo, 0); + assert(taosArrayGetSize(group) == pQInfo->tableqinfoGroupInfo.numOfTables && + 1 == taosArrayGetSize(pQInfo->tableqinfoGroupInfo.pGroupList)); + + void *pQueryHandle = pRuntimeEnv->pQueryHandle; + if (pQueryHandle == NULL) { + STsdbQueryCond con = createTsdbQueryCond(pQuery); + pRuntimeEnv->pQueryHandle = tsdbQueryTables(pQInfo->tsdb, &con, &pQInfo->tableGroupInfo, pQInfo, &pQInfo->memRef); + pQueryHandle = pRuntimeEnv->pQueryHandle; + } + + // skip blocks without load the actual data block from file if no filter condition present + // skipBlocks(&pQInfo->runtimeEnv); + // if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) { + // setQueryStatus(pQuery, QUERY_COMPLETED); + // return; + // } + + bool hasMoreBlock = true; + SQueryCostInfo *summary = &pRuntimeEnv->summary; + while ((hasMoreBlock = tsdbNextDataBlock(pQueryHandle)) == true) { + summary->totalBlocks += 1; + + if (IS_QUERY_KILLED(pQInfo)) { + longjmp(pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED); + } + + tsdbRetrieveDataBlockInfo(pQueryHandle, &blockInfo); + STableQueryInfo **pTableQueryInfo = + (STableQueryInfo **)taosHashGet(pQInfo->tableqinfoGroupInfo.map, &blockInfo.tid, sizeof(blockInfo.tid)); + if (pTableQueryInfo == NULL) { + break; + } + + pQuery->current = *pTableQueryInfo; + doTableQueryInfoTimeWindowCheck(pQuery, *pTableQueryInfo); + + if (pRuntimeEnv->hasTagResults) { + setTagVal(pRuntimeEnv, pQuery->current->pTable, pQInfo->tsdb); + } + + uint32_t status = 0; + SDataStatis *pStatis = NULL; + SArray *pDataBlock = NULL; + + int32_t ret = loadDataBlockOnDemand(pRuntimeEnv, &pQuery->current->windowResInfo, pQueryHandle, &blockInfo, + &pStatis, &pDataBlock, &status); + if (ret != TSDB_CODE_SUCCESS) { + break; + } + + assert(status != BLK_DATA_DISCARD); + ensureOutputBuffer(pRuntimeEnv, &blockInfo); + + pQuery->pos = QUERY_IS_ASC_QUERY(pQuery) ? 0 : blockInfo.rows - 1; + int32_t numOfRes = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, pDataBlock); + + summary->totalRows += blockInfo.rows; + qDebug("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", numOfRows:%d, numOfRes:%d, lastKey:%" PRId64, + GET_QINFO_ADDR(pRuntimeEnv), blockInfo.window.skey, blockInfo.window.ekey, blockInfo.rows, numOfRes, + pQuery->current->lastKey); + + pQuery->rec.rows = getNumOfResult(pRuntimeEnv); + + // the flag may be set by tableApplyFunctionsOnBlock, clear it here + CLEAR_QUERY_STATUS(pQuery, QUERY_COMPLETED); + + updateTableIdInfo(pQuery, pQInfo->arrTableIdInfo); + skipResults(pRuntimeEnv); + + // the limitation of output result is reached, set the query completed + if (limitResults(pRuntimeEnv)) { + setQueryStatus(pQuery, QUERY_COMPLETED); + SET_STABLE_QUERY_OVER(pQInfo); + break; + } + + // while the output buffer is full or limit/offset is applied, query may be paused here + if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL|QUERY_COMPLETED)) { + break; + } + } + + if (!hasMoreBlock) { + setQueryStatus(pQuery, QUERY_COMPLETED); + SET_STABLE_QUERY_OVER(pQInfo); + } } else { /* - * 1. super table projection query, 2. ts-comp query - * if the subgroup index is larger than 0, results generated by group by tbname,k is existed. + * the following two cases handled here. + * 1. ts-comp query, and 2. the super table projection query with different query time range for each table. + * If the subgroup index is larger than 0, results generated by group by tbname,k is existed. * we need to return it to client in the first place. */ if (pQInfo->groupIndex > 0) { @@ -5156,14 +5612,7 @@ static void sequentialTableProcess(SQInfo *pQInfo) { * to ensure that, we can reset the query range once query on a meter is completed. */ pQInfo->tableIndex++; - - STableIdInfo tidInfo = {0}; - - STableId* id = TSDB_TABLEID(pQuery->current->pTable); - tidInfo.uid = id->uid; - tidInfo.tid = id->tid; - tidInfo.key = pQuery->current->lastKey; - taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo); + updateTableIdInfo(pQuery, pQInfo->arrTableIdInfo); // if the buffer is full or group by each table, we need to jump out of the loop if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) { @@ -5190,31 +5639,31 @@ static void sequentialTableProcess(SQInfo *pQInfo) { if (pQInfo->tableIndex >= pQInfo->tableqinfoGroupInfo.numOfTables) { setQueryStatus(pQuery, QUERY_COMPLETED); } - } - /* - * 1. super table projection query, group-by on normal columns query, ts-comp query - * 2. point interpolation query, last row query - * - * group-by on normal columns query and last_row query do NOT invoke the finalizer here, - * since the finalize stage will be done at the client side. - * - * projection query, point interpolation query do not need the finalizer. - * - * Only the ts-comp query requires the finalizer function to be executed here. - */ - if (isTSCompQuery(pQuery)) { - finalizeQueryResult(pRuntimeEnv); - } + /* + * 1. super table projection query, group-by on normal columns query, ts-comp query + * 2. point interpolation query, last row query + * + * group-by on normal columns query and last_row query do NOT invoke the finalizer here, + * since the finalize stage will be done at the client side. + * + * projection query, point interpolation query do not need the finalizer. + * + * Only the ts-comp query requires the finalizer function to be executed here. + */ + if (isTSCompQuery(pQuery)) { + finalizeQueryResult(pRuntimeEnv); + } - if (pRuntimeEnv->pTSBuf != NULL) { - pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur; - } + if (pRuntimeEnv->pTSBuf != NULL) { + pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur; + } - qDebug( - "QInfo %p numOfTables:%" PRIu64 ", index:%d, numOfGroups:%" PRIzu ", %" PRId64 " points returned, total:%" PRId64 ", offset:%" PRId64, - pQInfo, (uint64_t)pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, pQuery->rec.total, - pQuery->limit.offset); + qDebug("QInfo %p numOfTables:%" PRIu64 ", index:%d, numOfGroups:%" PRIzu ", %" PRId64 + " points returned, total:%" PRId64 ", offset:%" PRId64, + pQInfo, (uint64_t)pQInfo->tableqinfoGroupInfo.numOfTables, pQInfo->tableIndex, numOfGroups, pQuery->rec.rows, + pQuery->rec.total, pQuery->limit.offset); + } } static void doSaveContext(SQInfo *pQInfo) { @@ -5229,13 +5678,7 @@ static void doSaveContext(SQInfo *pQInfo) { SWITCH_ORDER(pRuntimeEnv->pTSBuf->cur.order); } - STsdbQueryCond cond = { - .order = pQuery->order.order, - .colList = pQuery->colList, - .numOfCols = pQuery->numOfCols, - }; - - TIME_WINDOW_COPY(cond.twindow, pQuery->window); + STsdbQueryCond cond = createTsdbQueryCond(pQuery); // clean unused handle if (pRuntimeEnv->pSecQueryHandle != NULL) { @@ -5508,13 +5951,8 @@ static void tableMultiOutputProcess(SQInfo *pQInfo, STableQueryInfo* pTableInfo) qDebug("QInfo:%p query paused due to output limitation, next qrange:%" PRId64 "-%" PRId64, pQInfo, pQuery->current->lastKey, pQuery->window.ekey); } else if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) { - STableIdInfo tidInfo; - STableId* id = TSDB_TABLEID(pQuery->current->pTable); - - tidInfo.uid = id->uid; - tidInfo.tid = id->tid; - tidInfo.key = pQuery->current->lastKey; - taosArrayPush(pQInfo->arrTableIdInfo, &tidInfo); + STableIdInfo tidInfo = createTableIdInfo(pQuery); + taosHashPut(pQInfo->arrTableIdInfo, &tidInfo.tid, sizeof(tidInfo.tid), &tidInfo, sizeof(STableIdInfo)); } if (!isTSCompQuery(pQuery)) { @@ -5836,11 +6274,9 @@ static int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SArray **pTableIdList, pQueryMsg->interval.interval = htobe64(pQueryMsg->interval.interval); pQueryMsg->interval.sliding = htobe64(pQueryMsg->interval.sliding); pQueryMsg->interval.offset = htobe64(pQueryMsg->interval.offset); - // pQueryMsg->interval.intervalUnit = pQueryMsg->interval.intervalUnit; - // pQueryMsg->interval.slidingUnit = pQueryMsg->interval.slidingUnit; - // pQueryMsg->interval.offsetUnit = pQueryMsg->interval.offsetUnit; pQueryMsg->limit = htobe64(pQueryMsg->limit); pQueryMsg->offset = htobe64(pQueryMsg->offset); + pQueryMsg->tableLimit = htobe64(pQueryMsg->tableLimit); pQueryMsg->order = htons(pQueryMsg->order); pQueryMsg->orderColId = htons(pQueryMsg->orderColId); @@ -6447,9 +6883,11 @@ static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SSqlGroupbyExpr *pGrou goto _cleanup; } + int32_t srcSize = 0; for (int16_t i = 0; i < numOfCols; ++i) { pQuery->colList[i] = pQueryMsg->colList[i]; pQuery->colList[i].filters = tscFilterInfoClone(pQueryMsg->colList[i].filters, pQuery->colList[i].numOfFilters); + srcSize += pQuery->colList[i].bytes; } // calculate the result row size @@ -6510,14 +6948,20 @@ static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SSqlGroupbyExpr *pGrou taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_NO_LOCK); } - int tableIndex = 0; - pQInfo->runtimeEnv.interBufSize = getOutputInterResultBufSize(pQuery); pQInfo->runtimeEnv.summary.tableInfoSize += (pTableGroupInfo->numOfTables * sizeof(STableQueryInfo)); pQInfo->runtimeEnv.pResultRowHashTable = taosHashInit(pTableGroupInfo->numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK); pQInfo->runtimeEnv.keyBuf = malloc(TSDB_MAX_BYTES_PER_ROW); pQInfo->runtimeEnv.pool = initResultRowPool(getWindowResultSize(&pQInfo->runtimeEnv)); + pQInfo->runtimeEnv.prevRow = malloc(POINTER_BYTES * pQuery->numOfCols + srcSize); + + char* start = POINTER_BYTES * pQuery->numOfCols + (char*) pQInfo->runtimeEnv.prevRow; + pQInfo->runtimeEnv.prevRow[0] = start; + + for(int32_t i = 1; i < pQuery->numOfCols; ++i) { + pQInfo->runtimeEnv.prevRow[i] = pQInfo->runtimeEnv.prevRow[i - 1] + pQuery->colList[i-1].bytes; + } pQInfo->pBuf = calloc(pTableGroupInfo->numOfTables, sizeof(STableQueryInfo)); if (pQInfo->pBuf == NULL) { @@ -6525,7 +6969,7 @@ static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SSqlGroupbyExpr *pGrou } // NOTE: pTableCheckInfo need to update the query time range and the lastKey info - pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo)); + pQInfo->arrTableIdInfo = taosHashInit(pTableGroupInfo->numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_NO_LOCK); pQInfo->dataReady = QUERY_RESULT_NOT_READY; pQInfo->rspContext = NULL; pthread_mutex_init(&pQInfo->lock, NULL); @@ -6535,10 +6979,10 @@ static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SSqlGroupbyExpr *pGrou pQuery->window = pQueryMsg->window; changeExecuteScanOrder(pQInfo, pQueryMsg, stableQuery); + pQInfo->runtimeEnv.queryWindowIdentical = true; STimeWindow window = pQuery->window; int32_t index = 0; - for(int32_t i = 0; i < numOfGroups; ++i) { SArray* pa = taosArrayGetP(pQInfo->tableGroupInfo.pGroupList, i); @@ -6553,9 +6997,12 @@ static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SSqlGroupbyExpr *pGrou for(int32_t j = 0; j < s; ++j) { STableKeyInfo* info = taosArrayGet(pa, j); - void* buf = (char*) pQInfo->pBuf + index * sizeof(STableQueryInfo); - window.skey = info->lastKey; + if (info->lastKey != pQuery->window.skey) { + pQInfo->runtimeEnv.queryWindowIdentical = false; + } + + void* buf = (char*) pQInfo->pBuf + index * sizeof(STableQueryInfo); STableQueryInfo* item = createTableQueryInfo(&pQInfo->runtimeEnv, info->pTable, window, buf); if (item == NULL) { goto _cleanup; @@ -6769,7 +7216,7 @@ static void freeQInfo(SQInfo *pQInfo) { tfree(pQInfo->pBuf); tsdbDestroyTableGroup(&pQInfo->tableGroupInfo); - taosArrayDestroy(pQInfo->arrTableIdInfo); + taosHashCleanup(pQInfo->arrTableIdInfo); pQInfo->signature = 0; @@ -7149,7 +7596,7 @@ int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *co size_t size = getResultSize(pQInfo, &pQuery->rec.rows); size += sizeof(int32_t); - size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo); + size += sizeof(STableIdInfo) * taosHashGetSize(pQInfo->arrTableIdInfo); *contLen = (int32_t)(size + sizeof(SRetrieveTableRsp)); diff --git a/src/query/src/qParserImpl.c b/src/query/src/qParserImpl.c index 5cd9d3c77a..7d71d9f7f1 100644 --- a/src/query/src/qParserImpl.c +++ b/src/query/src/qParserImpl.c @@ -405,14 +405,29 @@ void tSQLSetColumnType(TAOS_FIELD *pField, SStrToken *type) { if (type->type == 0) { pField->bytes = 0; } else { - pField->bytes = (int16_t)(-(int32_t)type->type * TSDB_NCHAR_SIZE + VARSTR_HEADER_SIZE); + int32_t bytes = -(int32_t)(type->type); + if (bytes > (TSDB_MAX_NCHAR_LEN - VARSTR_HEADER_SIZE) / TSDB_NCHAR_SIZE) { + // we have to postpone reporting the error because it cannot be done here + // as pField->bytes is int16_t, use 'TSDB_MAX_NCHAR_LEN + 1' to avoid overflow + bytes = TSDB_MAX_NCHAR_LEN + 1; + } else { + bytes = bytes * TSDB_NCHAR_SIZE + VARSTR_HEADER_SIZE; + } + pField->bytes = (int16_t)bytes; } } else if (i == TSDB_DATA_TYPE_BINARY) { /* for binary, the TOKENTYPE is the length of binary */ if (type->type == 0) { pField->bytes = 0; } else { - pField->bytes = (int16_t) (-(int32_t) type->type + VARSTR_HEADER_SIZE); + int32_t bytes = -(int32_t)(type->type); + if (bytes > TSDB_MAX_BINARY_LEN - VARSTR_HEADER_SIZE) { + // refer comment for NCHAR above + bytes = TSDB_MAX_BINARY_LEN + 1; + } else { + bytes += VARSTR_HEADER_SIZE; + } + pField->bytes = (int16_t)bytes; } } break; diff --git a/src/query/src/qResultbuf.c b/src/query/src/qResultbuf.c index 55a7aea53a..bc7243830d 100644 --- a/src/query/src/qResultbuf.c +++ b/src/query/src/qResultbuf.c @@ -423,9 +423,8 @@ void destroyResultBuf(SDiskbasedResultBuf* pResultBuf) { unlink(pResultBuf->path); tfree(pResultBuf->path); - SHashMutableIterator* iter = taosHashCreateIter(pResultBuf->groupSet); - while(taosHashIterNext(iter)) { - SArray** p = (SArray**) taosHashIterGet(iter); + SArray** p = taosHashIterate(pResultBuf->groupSet, NULL); + while(p) { size_t n = taosArrayGetSize(*p); for(int32_t i = 0; i < n; ++i) { SPageInfo* pi = taosArrayGetP(*p, i); @@ -434,10 +433,9 @@ void destroyResultBuf(SDiskbasedResultBuf* pResultBuf) { } taosArrayDestroy(*p); + p = taosHashIterate(pResultBuf->groupSet, p); } - taosHashDestroyIter(iter); - tdListFree(pResultBuf->lruList); taosArrayDestroy(pResultBuf->emptyDummyIdList); taosHashCleanup(pResultBuf->groupSet); diff --git a/src/query/src/qUtil.c b/src/query/src/qUtil.c index 3f56366db8..6c845b012f 100644 --- a/src/query/src/qUtil.c +++ b/src/query/src/qUtil.c @@ -43,51 +43,48 @@ int32_t getOutputInterResultBufSize(SQuery* pQuery) { return size; } -int32_t initWindowResInfo(SWindowResInfo *pWindowResInfo, int32_t size, int32_t threshold, int16_t type) { - pWindowResInfo->capacity = size; - pWindowResInfo->threshold = threshold; - - pWindowResInfo->type = type; - pWindowResInfo->curIndex = -1; - pWindowResInfo->size = 0; - pWindowResInfo->prevSKey = TSKEY_INITIAL_VAL; +int32_t initWindowResInfo(SResultRowInfo *pResultRowInfo, int32_t size, int16_t type) { + pResultRowInfo->capacity = size; - pWindowResInfo->pResult = calloc(pWindowResInfo->capacity, POINTER_BYTES); - if (pWindowResInfo->pResult == NULL) { + pResultRowInfo->type = type; + pResultRowInfo->curIndex = -1; + pResultRowInfo->size = 0; + pResultRowInfo->prevSKey = TSKEY_INITIAL_VAL; + + pResultRowInfo->pResult = calloc(pResultRowInfo->capacity, POINTER_BYTES); + if (pResultRowInfo->pResult == NULL) { return TSDB_CODE_QRY_OUT_OF_MEMORY; } return TSDB_CODE_SUCCESS; } -void cleanupTimeWindowInfo(SWindowResInfo *pWindowResInfo) { - if (pWindowResInfo == NULL) { +void cleanupTimeWindowInfo(SResultRowInfo *pResultRowInfo) { + if (pResultRowInfo == NULL) { return; } - if (pWindowResInfo->capacity == 0) { - assert(pWindowResInfo->pResult == NULL); + if (pResultRowInfo->capacity == 0) { + assert(pResultRowInfo->pResult == NULL); return; } - if (pWindowResInfo->type == TSDB_DATA_TYPE_BINARY || pWindowResInfo->type == TSDB_DATA_TYPE_NCHAR) { - for(int32_t i = 0; i < pWindowResInfo->size; ++i) { - tfree(pWindowResInfo->pResult[i]->key); + if (pResultRowInfo->type == TSDB_DATA_TYPE_BINARY || pResultRowInfo->type == TSDB_DATA_TYPE_NCHAR) { + for(int32_t i = 0; i < pResultRowInfo->size; ++i) { + tfree(pResultRowInfo->pResult[i]->key); } } - tfree(pWindowResInfo->pResult); + tfree(pResultRowInfo->pResult); } -void resetTimeWindowInfo(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo) { - if (pWindowResInfo == NULL || pWindowResInfo->capacity == 0) { +void resetTimeWindowInfo(SQueryRuntimeEnv *pRuntimeEnv, SResultRowInfo *pResultRowInfo) { + if (pResultRowInfo == NULL || pResultRowInfo->capacity == 0) { return; } -// assert(pWindowResInfo->size == 1); - - for (int32_t i = 0; i < pWindowResInfo->size; ++i) { - SResultRow *pWindowRes = pWindowResInfo->pResult[i]; - clearResultRow(pRuntimeEnv, pWindowRes, pWindowResInfo->type); + for (int32_t i = 0; i < pResultRowInfo->size; ++i) { + SResultRow *pWindowRes = pResultRowInfo->pResult[i]; + clearResultRow(pRuntimeEnv, pWindowRes, pResultRowInfo->type); int32_t groupIndex = 0; int64_t uid = 0; @@ -96,30 +93,30 @@ void resetTimeWindowInfo(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowR taosHashRemove(pRuntimeEnv->pResultRowHashTable, (const char *)pRuntimeEnv->keyBuf, GET_RES_WINDOW_KEY_LEN(sizeof(groupIndex))); } - pWindowResInfo->curIndex = -1; - pWindowResInfo->size = 0; + pResultRowInfo->curIndex = -1; + pResultRowInfo->size = 0; - pWindowResInfo->startTime = TSKEY_INITIAL_VAL; - pWindowResInfo->prevSKey = TSKEY_INITIAL_VAL; + pResultRowInfo->startTime = TSKEY_INITIAL_VAL; + pResultRowInfo->prevSKey = TSKEY_INITIAL_VAL; } void clearFirstNWindowRes(SQueryRuntimeEnv *pRuntimeEnv, int32_t num) { - SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; - if (pWindowResInfo == NULL || pWindowResInfo->capacity == 0 || pWindowResInfo->size == 0 || num == 0) { + SResultRowInfo *pResultRowInfo = &pRuntimeEnv->windowResInfo; + if (pResultRowInfo == NULL || pResultRowInfo->capacity == 0 || pResultRowInfo->size == 0 || num == 0) { return; } - int32_t numOfClosed = numOfClosedTimeWindow(pWindowResInfo); + int32_t numOfClosed = numOfClosedTimeWindow(pResultRowInfo); assert(num >= 0 && num <= numOfClosed); - int16_t type = pWindowResInfo->type; + int16_t type = pResultRowInfo->type; int64_t uid = getResultInfoUId(pRuntimeEnv); char *key = NULL; int16_t bytes = -1; for (int32_t i = 0; i < num; ++i) { - SResultRow *pResult = pWindowResInfo->pResult[i]; + SResultRow *pResult = pResultRowInfo->pResult[i]; if (pResult->closed) { // remove the window slot from hash table getResultRowKeyInfo(pResult, type, &key, &bytes); SET_RES_WINDOW_KEY(pRuntimeEnv->keyBuf, key, bytes, uid); @@ -129,23 +126,23 @@ void clearFirstNWindowRes(SQueryRuntimeEnv *pRuntimeEnv, int32_t num) { } } - int32_t remain = pWindowResInfo->size - num; + int32_t remain = pResultRowInfo->size - num; // clear all the closed windows from the window list for (int32_t k = 0; k < remain; ++k) { - copyResultRow(pRuntimeEnv, pWindowResInfo->pResult[k], pWindowResInfo->pResult[num + k], type); + copyResultRow(pRuntimeEnv, pResultRowInfo->pResult[k], pResultRowInfo->pResult[num + k], type); } // move the unclosed window in the front of the window list - for (int32_t k = remain; k < pWindowResInfo->size; ++k) { - SResultRow *pWindowRes = pWindowResInfo->pResult[k]; - clearResultRow(pRuntimeEnv, pWindowRes, pWindowResInfo->type); + for (int32_t k = remain; k < pResultRowInfo->size; ++k) { + SResultRow *pWindowRes = pResultRowInfo->pResult[k]; + clearResultRow(pRuntimeEnv, pWindowRes, pResultRowInfo->type); } - pWindowResInfo->size = remain; + pResultRowInfo->size = remain; - for (int32_t k = 0; k < pWindowResInfo->size; ++k) { - SResultRow *pResult = pWindowResInfo->pResult[k]; + for (int32_t k = 0; k < pResultRowInfo->size; ++k) { + SResultRow *pResult = pResultRowInfo->pResult[k]; getResultRowKeyInfo(pResult, type, &key, &bytes); SET_RES_WINDOW_KEY(pRuntimeEnv->keyBuf, key, bytes, uid); @@ -153,43 +150,43 @@ void clearFirstNWindowRes(SQueryRuntimeEnv *pRuntimeEnv, int32_t num) { assert(p != NULL); int32_t v = (*p - num); - assert(v >= 0 && v <= pWindowResInfo->size); + assert(v >= 0 && v <= pResultRowInfo->size); SET_RES_WINDOW_KEY(pRuntimeEnv->keyBuf, key, bytes, uid); taosHashPut(pRuntimeEnv->pResultRowHashTable, pRuntimeEnv->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes), (char *)&v, sizeof(int32_t)); } - pWindowResInfo->curIndex = -1; + pResultRowInfo->curIndex = -1; } void clearClosedTimeWindow(SQueryRuntimeEnv *pRuntimeEnv) { - SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; - if (pWindowResInfo == NULL || pWindowResInfo->capacity == 0 || pWindowResInfo->size == 0) { + SResultRowInfo *pResultRowInfo = &pRuntimeEnv->windowResInfo; + if (pResultRowInfo == NULL || pResultRowInfo->capacity == 0 || pResultRowInfo->size == 0) { return; } - int32_t numOfClosed = numOfClosedTimeWindow(pWindowResInfo); + int32_t numOfClosed = numOfClosedTimeWindow(pResultRowInfo); clearFirstNWindowRes(pRuntimeEnv, numOfClosed); } -int32_t numOfClosedTimeWindow(SWindowResInfo *pWindowResInfo) { +int32_t numOfClosedTimeWindow(SResultRowInfo *pResultRowInfo) { int32_t i = 0; - while (i < pWindowResInfo->size && pWindowResInfo->pResult[i]->closed) { + while (i < pResultRowInfo->size && pResultRowInfo->pResult[i]->closed) { ++i; } return i; } -void closeAllTimeWindow(SWindowResInfo *pWindowResInfo) { - assert(pWindowResInfo->size >= 0 && pWindowResInfo->capacity >= pWindowResInfo->size); +void closeAllTimeWindow(SResultRowInfo *pResultRowInfo) { + assert(pResultRowInfo->size >= 0 && pResultRowInfo->capacity >= pResultRowInfo->size); - for (int32_t i = 0; i < pWindowResInfo->size; ++i) { - if (pWindowResInfo->pResult[i]->closed) { + for (int32_t i = 0; i < pResultRowInfo->size; ++i) { + if (pResultRowInfo->pResult[i]->closed) { continue; } - pWindowResInfo->pResult[i]->closed = true; + pResultRowInfo->pResult[i]->closed = true; } } @@ -198,41 +195,41 @@ void closeAllTimeWindow(SWindowResInfo *pWindowResInfo) { * the last qualified time stamp in case of sliding query, which the sliding time is not equalled to the interval time. * NOTE: remove redundant, only when the result set order equals to traverse order */ -void removeRedundantWindow(SWindowResInfo *pWindowResInfo, TSKEY lastKey, int32_t order) { - assert(pWindowResInfo->size >= 0 && pWindowResInfo->capacity >= pWindowResInfo->size); - if (pWindowResInfo->size <= 1) { +void removeRedundantWindow(SResultRowInfo *pResultRowInfo, TSKEY lastKey, int32_t order) { + assert(pResultRowInfo->size >= 0 && pResultRowInfo->capacity >= pResultRowInfo->size); + if (pResultRowInfo->size <= 1) { return; } // get the result order - int32_t resultOrder = (pWindowResInfo->pResult[0]->win.skey < pWindowResInfo->pResult[1]->win.skey)? 1:-1; + int32_t resultOrder = (pResultRowInfo->pResult[0]->win.skey < pResultRowInfo->pResult[1]->win.skey)? 1:-1; if (order != resultOrder) { return; } int32_t i = 0; if (order == QUERY_ASC_FORWARD_STEP) { - TSKEY ekey = pWindowResInfo->pResult[i]->win.ekey; - while (i < pWindowResInfo->size && (ekey < lastKey)) { + TSKEY ekey = pResultRowInfo->pResult[i]->win.ekey; + while (i < pResultRowInfo->size && (ekey < lastKey)) { ++i; } } else if (order == QUERY_DESC_FORWARD_STEP) { - while (i < pWindowResInfo->size && (pWindowResInfo->pResult[i]->win.skey > lastKey)) { + while (i < pResultRowInfo->size && (pResultRowInfo->pResult[i]->win.skey > lastKey)) { ++i; } } - if (i < pWindowResInfo->size) { - pWindowResInfo->size = (i + 1); + if (i < pResultRowInfo->size) { + pResultRowInfo->size = (i + 1); } } -bool isWindowResClosed(SWindowResInfo *pWindowResInfo, int32_t slot) { - return (getResultRow(pWindowResInfo, slot)->closed == true); +bool isWindowResClosed(SResultRowInfo *pResultRowInfo, int32_t slot) { + return (getResultRow(pResultRowInfo, slot)->closed == true); } -void closeTimeWindow(SWindowResInfo *pWindowResInfo, int32_t slot) { - getResultRow(pWindowResInfo, slot)->closed = true; +void closeTimeWindow(SResultRowInfo *pResultRowInfo, int32_t slot) { + getResultRow(pResultRowInfo, slot)->closed = true; } void clearResultRow(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pWindowRes, int16_t type) { @@ -395,11 +392,10 @@ uint64_t getResultInfoUId(SQueryRuntimeEnv* pRuntimeEnv) { } SQuery* pQuery = pRuntimeEnv->pQuery; - if ((pQuery->checkBuffer == 1 && pQuery->interval.interval == 0) || isPointInterpoQuery(pQuery) || - pRuntimeEnv->groupbyNormalCol) { + if (pQuery->interval.interval == 0 || isPointInterpoQuery(pQuery) || pRuntimeEnv->groupbyNormalCol) { return 0; } - STableId* id = TSDB_TABLEID(pRuntimeEnv->pQuery->current); + STableId* id = TSDB_TABLEID(pRuntimeEnv->pQuery->current->pTable); return id->uid; } \ No newline at end of file diff --git a/src/rpc/src/rpcMain.c b/src/rpc/src/rpcMain.c index acceaf9d7a..00a97d7bc2 100644 --- a/src/rpc/src/rpcMain.c +++ b/src/rpc/src/rpcMain.c @@ -630,8 +630,16 @@ static void rpcReleaseConn(SRpcConn *pConn) { } else { // if there is an outgoing message, free it if (pConn->outType && pConn->pReqMsg) { - if (pConn->pContext) pConn->pContext->pConn = NULL; - taosRemoveRef(tsRpcRefId, pConn->pContext->rid); + SRpcReqContext *pContext = pConn->pContext; + if (pContext->pRsp) { + // for synchronous API, post semaphore to unblock app + pContext->pRsp->code = TSDB_CODE_RPC_APP_ERROR; + pContext->pRsp->pCont = NULL; + pContext->pRsp->contLen = 0; + tsem_post(pContext->pSem); + } + pContext->pConn = NULL; + taosRemoveRef(tsRpcRefId, pContext->rid); } } diff --git a/src/rpc/src/rpcTcp.c b/src/rpc/src/rpcTcp.c index 7b8cf3cda2..2850046d05 100644 --- a/src/rpc/src/rpcTcp.c +++ b/src/rpc/src/rpcTcp.c @@ -371,10 +371,13 @@ void taosCloseTcpConnection(void *chandle) { int taosSendTcpData(uint32_t ip, uint16_t port, void *data, int len, void *chandle) { SFdObj *pFdObj = chandle; - if (pFdObj == NULL || pFdObj->signature != pFdObj) return -1; + SThreadObj *pThreadObj = pFdObj->pThreadObj; - return taosWriteMsg(pFdObj->fd, data, len); + int ret = taosWriteMsg(pFdObj->fd, data, len); + tTrace("%s %p TCP data is sent, FD:%p fd:%d bytes:%d", pThreadObj->label, pFdObj->thandle, pFdObj, pFdObj->fd, ret); + + return ret; } static void taosReportBrokenLink(SFdObj *pFdObj) { @@ -409,7 +412,7 @@ static int taosReadTcpData(SFdObj *pFdObj, SRecvInfo *pInfo) { headLen = taosReadMsg(pFdObj->fd, &rpcHead, sizeof(SRpcHead)); if (headLen != sizeof(SRpcHead)) { - tDebug("%s %p read error, headLen:%d", pThreadObj->label, pFdObj->thandle, headLen); + tDebug("%s %p read error, FD:%p headLen:%d", pThreadObj->label, pFdObj->thandle, pFdObj, headLen); return -1; } @@ -420,7 +423,7 @@ static int taosReadTcpData(SFdObj *pFdObj, SRecvInfo *pInfo) { tError("%s %p TCP malloc(size:%d) fail", pThreadObj->label, pFdObj->thandle, msgLen); return -1; } else { - tTrace("TCP malloc mem:%p size:%d", buffer, size); + tTrace("%s %p read data, FD:%p fd:%d TCP malloc mem:%p", pThreadObj->label, pFdObj->thandle, pFdObj, pFdObj->fd, buffer); } msg = buffer + tsRpcOverhead; @@ -583,8 +586,8 @@ static void taosFreeFdObj(SFdObj *pFdObj) { pthread_mutex_unlock(&pThreadObj->mutex); - tDebug("%s %p TCP connection is closed, FD:%p numOfFds:%d", - pThreadObj->label, pFdObj->thandle, pFdObj, pThreadObj->numOfFds); + tDebug("%s %p TCP connection is closed, FD:%p fd:%d numOfFds:%d", + pThreadObj->label, pFdObj->thandle, pFdObj, pFdObj->fd, pThreadObj->numOfFds); tfree(pFdObj); } diff --git a/src/sync/inc/syncInt.h b/src/sync/inc/syncInt.h index 05b7adc5f4..6d0c52284f 100644 --- a/src/sync/inc/syncInt.h +++ b/src/sync/inc/syncInt.h @@ -139,15 +139,14 @@ typedef struct SsyncPeer { char id[TSDB_EP_LEN + 32]; // peer vgId + end point uint64_t version; uint64_t sversion; // track the peer version in retrieve process + uint64_t lastFileVer; // track the file version while retrieve + uint64_t lastWalVer; // track the wal version while retrieve int32_t syncFd; int32_t peerFd; // forward FD int32_t numOfRetrieves; // number of retrieves tried int32_t fileChanged; // a flag to indicate file is changed during retrieving process void * timer; void * pConn; - int32_t notifyFd; - int32_t watchNum; - int32_t *watchFd; int32_t refCount; // reference count struct SSyncNode *pSyncNode; } SSyncPeer; @@ -172,6 +171,7 @@ typedef struct SSyncNode { FNotifyRole notifyRole; FNotifyFlowCtrl notifyFlowCtrl; FNotifyFileSynced notifyFileSynced; + FGetVersion getVersion; pthread_mutex_t mutex; } SSyncNode; diff --git a/src/sync/src/syncMain.c b/src/sync/src/syncMain.c index d2d6d2d7fa..adac532f2d 100644 --- a/src/sync/src/syncMain.c +++ b/src/sync/src/syncMain.c @@ -196,6 +196,7 @@ int64_t syncStart(const SSyncInfo *pInfo) { pNode->confirmForward = pInfo->confirmForward; pNode->notifyFlowCtrl = pInfo->notifyFlowCtrl; pNode->notifyFileSynced = pInfo->notifyFileSynced; + pNode->getVersion = pInfo->getVersion; pNode->selfIndex = -1; pNode->vgId = pInfo->vgId; @@ -497,7 +498,6 @@ int32_t syncDecPeerRef(SSyncPeer *pPeer) { taosReleaseRef(tsSyncRefId, pPeer->pSyncNode->rid); sDebug("%s, resource is freed", pPeer->id); - tfree(pPeer->watchFd); tfree(pPeer); return 0; } @@ -540,7 +540,7 @@ static SSyncPeer *syncAddPeer(SSyncNode *pNode, const SNodeInfo *pInfo) { pPeer->ip = ip; pPeer->port = pInfo->nodePort; pPeer->fqdn[sizeof(pPeer->fqdn) - 1] = 0; - snprintf(pPeer->id, sizeof(pPeer->id), "vgId:%d, peer:%s:%u", pNode->vgId, pPeer->fqdn, pPeer->port); + snprintf(pPeer->id, sizeof(pPeer->id), "vgId:%d, nodeId:%d", pNode->vgId, pPeer->nodeId); pPeer->peerFd = -1; pPeer->syncFd = -1; @@ -1143,8 +1143,7 @@ static void syncProcessIncommingConnection(int32_t connFd, uint32_t sourceIp) { pPeer->syncFd = connFd; syncCreateRestoreDataThread(pPeer); } else { - sDebug("%s, TCP connection is already up(pfd:%d), close one, new pfd:%d sfd:%d", pPeer->id, pPeer->peerFd, connFd, - pPeer->syncFd); + sDebug("%s, TCP connection is up, pfd:%d sfd:%d, old pfd:%d", pPeer->id, connFd, pPeer->syncFd, pPeer->peerFd); syncClosePeerConn(pPeer); pPeer->peerFd = connFd; pPeer->pConn = taosAllocateTcpConn(tsTcpPool, pPeer, connFd); diff --git a/src/sync/src/syncRestore.c b/src/sync/src/syncRestore.c index ed6b63c92d..d156c93865 100644 --- a/src/sync/src/syncRestore.c +++ b/src/sync/src/syncRestore.c @@ -52,12 +52,12 @@ static void syncRemoveExtraFile(SSyncPeer *pPeer, int32_t sindex, int32_t eindex static int32_t syncRestoreFile(SSyncPeer *pPeer, uint64_t *fversion) { SSyncNode *pNode = pPeer->pSyncNode; - SFileInfo minfo; memset(&minfo, 0, sizeof(minfo)); /* = {0}; */ // master file info - SFileInfo sinfo; memset(&sinfo, 0, sizeof(sinfo)); /* = {0}; */ // slave file info - SFileAck fileAck; + SFileInfo minfo; memset(&minfo, 0, sizeof(SFileInfo)); /* = {0}; */ + SFileInfo sinfo; memset(&sinfo, 0, sizeof(SFileInfo)); /* = {0}; */ + SFileAck fileAck = {0}; int32_t code = -1; char name[TSDB_FILENAME_LEN * 2] = {0}; - uint32_t pindex = 0; // index in last restore + uint32_t pindex = 0; // index in last restore bool fileChanged = false; *fversion = 0; @@ -134,7 +134,7 @@ static int32_t syncRestoreFile(SSyncPeer *pPeer, uint64_t *fversion) { // data file is changed, code shall be set to 1 *fversion = minfo.fversion; code = 1; - sDebug("%s, file changed while restore file", pPeer->id); + sDebug("%s, file changed after restore file, fver:%" PRIu64, pPeer->id, *fversion); } if (code < 0) { @@ -160,7 +160,7 @@ static int32_t syncRestoreWal(SSyncPeer *pPeer) { } if (pHead->len == 0) { - sDebug("%s, wal is synced over", pPeer->id); + sDebug("%s, wal is synced over, last wver:%" PRIu64, pPeer->id, lastVer); code = 0; break; } // wal sync over diff --git a/src/sync/src/syncRetrieve.c b/src/sync/src/syncRetrieve.c index 060badba9d..36b197dd46 100644 --- a/src/sync/src/syncRetrieve.c +++ b/src/sync/src/syncRetrieve.c @@ -16,6 +16,7 @@ #define _DEFAULT_SOURCE #include #include "os.h" +#include "taoserror.h" #include "tlog.h" #include "tutil.h" #include "tglobal.h" @@ -25,110 +26,102 @@ #include "tsync.h" #include "syncInt.h" -static int32_t syncAddIntoWatchList(SSyncPeer *pPeer, char *name) { - sDebug("%s, start to monitor:%s", pPeer->id, name); - - if (pPeer->notifyFd <= 0) { - pPeer->watchNum = 0; - pPeer->notifyFd = inotify_init1(IN_NONBLOCK); - if (pPeer->notifyFd < 0) { - sError("%s, failed to init inotify since %s", pPeer->id, strerror(errno)); - return -1; - } - - if (pPeer->watchFd == NULL) pPeer->watchFd = malloc(sizeof(int32_t) * tsMaxWatchFiles); - if (pPeer->watchFd == NULL) { - sError("%s, failed to allocate watchFd", pPeer->id); - return -1; - } - - memset(pPeer->watchFd, -1, sizeof(int32_t) * tsMaxWatchFiles); - } - - int32_t *wd = pPeer->watchFd + pPeer->watchNum; - - if (*wd >= 0) { - if (inotify_rm_watch(pPeer->notifyFd, *wd) < 0) { - sError("%s, failed to remove wd:%d since %s", pPeer->id, *wd, strerror(errno)); - return -1; - } - } - - *wd = inotify_add_watch(pPeer->notifyFd, name, IN_MODIFY | IN_DELETE); - if (*wd == -1) { - sError("%s, failed to add %s since %s", pPeer->id, name, strerror(errno)); +static int32_t syncGetWalVersion(SSyncNode *pNode, SSyncPeer *pPeer) { + uint64_t fver, wver; + int32_t code = (*pNode->getVersion)(pNode->vgId, &fver, &wver); + if (code != 0) { + sDebug("%s, vnode is commiting while retrieve, last wver:%" PRIu64, pPeer->id, pPeer->lastWalVer); return -1; - } else { - sDebug("%s, monitor %s, wd:%d watchNum:%d", pPeer->id, name, *wd, pPeer->watchNum); } - pPeer->watchNum = (pPeer->watchNum + 1) % tsMaxWatchFiles; - - return 0; + pPeer->lastWalVer = wver; + return code; } -static int32_t syncAreFilesModified(SSyncPeer *pPeer) { - if (pPeer->notifyFd <= 0) return 0; +static bool syncIsWalModified(SSyncNode *pNode, SSyncPeer *pPeer) { + uint64_t fver, wver; + int32_t code = (*pNode->getVersion)(pNode->vgId, &fver, &wver); + if (code != 0) { + sDebug("%s, vnode is commiting while retrieve, last wver:%" PRIu64, pPeer->id, pPeer->lastWalVer); + return true; + } - char buf[2048]; - int32_t len = read(pPeer->notifyFd, buf, sizeof(buf)); - if (len < 0 && errno != EAGAIN) { - sError("%s, failed to read notify FD since %s", pPeer->id, strerror(errno)); + if (wver != pPeer->lastWalVer) { + sDebug("%s, wal is modified while retrieve, wver:%" PRIu64 ", last:%" PRIu64, pPeer->id, wver, pPeer->lastWalVer); + return true; + } + + return false; +} + +static int32_t syncGetFileVersion(SSyncNode *pNode, SSyncPeer *pPeer) { + uint64_t fver, wver; + int32_t code = (*pNode->getVersion)(pNode->vgId, &fver, &wver); + if (code != 0) { + sDebug("%s, vnode is commiting while retrieve, last fver:%" PRIu64, pPeer->id, pPeer->lastFileVer); return -1; } - int32_t code = 0; - if (len > 0) { - const struct inotify_event *event; - char *ptr; - for (ptr = buf; ptr < buf + len; ptr += sizeof(struct inotify_event) + event->len) { - event = (const struct inotify_event *)ptr; - if ((event->mask & IN_MODIFY) || (event->mask & IN_DELETE)) { - sDebug("%s, processed file is changed", pPeer->id); - pPeer->fileChanged = 1; - code = 1; - break; - } - } + pPeer->lastFileVer = fver; + return code; +} + +static bool syncAreFilesModified(SSyncNode *pNode, SSyncPeer *pPeer) { + uint64_t fver, wver; + int32_t code = (*pNode->getVersion)(pNode->vgId, &fver, &wver); + if (code != 0) { + sDebug("%s, vnode is commiting while retrieve, last fver:%" PRIu64, pPeer->id, pPeer->lastFileVer); + pPeer->fileChanged = 1; + return true; } - return code; + if (fver != pPeer->lastFileVer) { + sDebug("%s, files are modified while retrieve, fver:%" PRIu64 ", last:%" PRIu64, pPeer->id, fver, pPeer->lastFileVer); + pPeer->fileChanged = 1; + return true; + } + + pPeer->fileChanged = 0; + return false; } static int32_t syncRetrieveFile(SSyncPeer *pPeer) { SSyncNode *pNode = pPeer->pSyncNode; - SFileInfo fileInfo; - SFileAck fileAck; + SFileInfo fileInfo; memset(&fileInfo, 0, sizeof(SFileInfo)); + SFileAck fileAck = {0}; int32_t code = -1; char name[TSDB_FILENAME_LEN * 2] = {0}; - memset(&fileInfo, 0, sizeof(fileInfo)); - memset(&fileAck, 0, sizeof(fileAck)); + if (syncGetFileVersion(pNode, pPeer) < 0) return -1; while (1) { // retrieve file info fileInfo.name[0] = 0; + fileInfo.size = 0; fileInfo.magic = (*pNode->getFileInfo)(pNode->vgId, fileInfo.name, &fileInfo.index, TAOS_SYNC_MAX_INDEX, &fileInfo.size, &fileInfo.fversion); // fileInfo.size = htonl(size); + sDebug("%s, file:%s info is sent, size:%" PRId64, pPeer->id, fileInfo.name, fileInfo.size); // send the file info int32_t ret = taosWriteMsg(pPeer->syncFd, &(fileInfo), sizeof(fileInfo)); if (ret < 0) { + code = -1; sError("%s, failed to write file:%s info while retrieve file since %s", pPeer->id, fileInfo.name, strerror(errno)); break; } // if no file anymore, break if (fileInfo.magic == 0 || fileInfo.name[0] == 0) { - sDebug("%s, no more files to sync", pPeer->id); code = 0; + sDebug("%s, no more files to sync", pPeer->id); break; } // wait for the ack from peer ret = taosReadMsg(pPeer->syncFd, &fileAck, sizeof(fileAck)); if (ret < 0) { + code = -1; sError("%s, failed to read file:%s ack while retrieve file since %s", pPeer->id, fileInfo.name, strerror(errno)); break; } @@ -136,15 +129,6 @@ static int32_t syncRetrieveFile(SSyncPeer *pPeer) { // set the peer sync version pPeer->sversion = fileInfo.fversion; - // get the full path to file - snprintf(name, sizeof(name), "%s/%s", pNode->path, fileInfo.name); - - // add the file into watch list - if (syncAddIntoWatchList(pPeer, name) < 0) { - sError("%s, failed to watch file:%s while retrieve file since %s", pPeer->id, fileInfo.name, strerror(errno)); - break; - } - // if sync is not required, continue if (fileAck.sync == 0) { fileInfo.index++; @@ -152,9 +136,13 @@ static int32_t syncRetrieveFile(SSyncPeer *pPeer) { continue; } + // get the full path to file + snprintf(name, sizeof(name), "%s/%s", pNode->path, fileInfo.name); + // send the file to peer int32_t sfd = open(name, O_RDONLY); if (sfd < 0) { + code = -1; sError("%s, failed to open file:%s while retrieve file since %s", pPeer->id, fileInfo.name, strerror(errno)); break; } @@ -162,138 +150,112 @@ static int32_t syncRetrieveFile(SSyncPeer *pPeer) { ret = taosSendFile(pPeer->syncFd, sfd, NULL, fileInfo.size); close(sfd); if (ret < 0) { + code = -1; sError("%s, failed to send file:%s while retrieve file since %s", pPeer->id, fileInfo.name, strerror(errno)); break; } - sDebug("%s, %s is sent, size:%" PRId64, pPeer->id, name, fileInfo.size); + sDebug("%s, file:%s is sent, size:%" PRId64, pPeer->id, fileInfo.name, fileInfo.size); fileInfo.index++; // check if processed files are modified - if (syncAreFilesModified(pPeer) != 0) { - sInfo("%s, file:%s are modified while retrieve file since %s", pPeer->id, fileInfo.name, strerror(errno)); + if (syncAreFilesModified(pNode, pPeer)) { + code = -1; break; } } - if (code < 0) { - sError("%s, failed to retrieve file", pPeer->id); + if (code != TSDB_CODE_SUCCESS) { + sError("%s, failed to retrieve file, code:0x%x", pPeer->id, code); } return code; } -/* if only a partial record is read out, set the IN_MODIFY flag in event, - so upper layer will reload the file to get a complete record */ -static int32_t syncReadOneWalRecord(int32_t sfd, SWalHead *pHead, uint32_t *pEvent) { - int32_t ret; +// if only a partial record is read out, upper layer will reload the file to get a complete record +static int32_t syncReadOneWalRecord(int32_t sfd, SWalHead *pHead) { + int32_t ret = read(sfd, pHead, sizeof(SWalHead)); + if (ret < 0) { + sError("sfd:%d, failed to read wal head since %s, ret:%d", sfd, strerror(errno), ret); + return -1; + } - ret = read(sfd, pHead, sizeof(SWalHead)); - if (ret < 0) return -1; - if (ret == 0) return 0; + if (ret == 0) { + sTrace("sfd:%d, read to the end of file, ret:%d", sfd, ret); + return 0; + } if (ret != sizeof(SWalHead)) { // file is not at end yet, it shall be reloaded - *pEvent = *pEvent | IN_MODIFY; + sDebug("sfd:%d, a partial wal head is read out, ret:%d", sfd, ret); return 0; } assert(pHead->len <= TSDB_MAX_WAL_SIZE); ret = read(sfd, pHead->cont, pHead->len); - if (ret < 0) return -1; + if (ret < 0) { + sError("sfd:%d, failed to read wal content since %s, ret:%d", sfd, strerror(errno), ret); + return -1; + } if (ret != pHead->len) { // file is not at end yet, it shall be reloaded - *pEvent = *pEvent | IN_MODIFY; + sDebug("sfd:%d, a partial wal conetnt is read out, ret:%d", sfd, ret); return 0; } return sizeof(SWalHead) + pHead->len; } -static int32_t syncMonitorLastWal(SSyncPeer *pPeer, char *name) { - pPeer->watchNum = 0; - taosClose(pPeer->notifyFd); - pPeer->notifyFd = inotify_init1(IN_NONBLOCK); - if (pPeer->notifyFd < 0) { - sError("%s, failed to init inotify since %s", pPeer->id, strerror(errno)); - return -1; - } - - if (pPeer->watchFd == NULL) pPeer->watchFd = malloc(sizeof(int32_t) * tsMaxWatchFiles); - if (pPeer->watchFd == NULL) { - sError("%s, failed to allocate watchFd", pPeer->id); - return -1; - } - - memset(pPeer->watchFd, -1, sizeof(int32_t) * tsMaxWatchFiles); - int32_t *wd = pPeer->watchFd; - - *wd = inotify_add_watch(pPeer->notifyFd, name, IN_MODIFY | IN_CLOSE_WRITE); - if (*wd == -1) { - sError("%s, failed to watch last wal since %s", pPeer->id, strerror(errno)); - return -1; - } - - return 0; -} - -static int32_t syncCheckLastWalChanges(SSyncPeer *pPeer, uint32_t *pEvent) { - char buf[2048]; - int32_t len = read(pPeer->notifyFd, buf, sizeof(buf)); - if (len < 0 && errno != EAGAIN) { - sError("%s, failed to read notify FD since %s", pPeer->id, strerror(errno)); - return -1; - } - - if (len == 0) return 0; - - struct inotify_event *event; - for (char *ptr = buf; ptr < buf + len; ptr += sizeof(struct inotify_event) + event->len) { - event = (struct inotify_event *)ptr; - if (event->mask & IN_MODIFY) *pEvent = *pEvent | IN_MODIFY; - if (event->mask & IN_CLOSE_WRITE) *pEvent = *pEvent | IN_CLOSE_WRITE; - } - - if (pEvent != 0) sDebug("%s, last wal event:0x%x", pPeer->id, *pEvent); - - return 0; -} - -static int32_t syncRetrieveLastWal(SSyncPeer *pPeer, char *name, uint64_t fversion, int64_t offset, uint32_t *pEvent) { - SWalHead *pHead = malloc(SYNC_MAX_SIZE); - int32_t code = -1; - int32_t bytes = 0; - int32_t sfd; - - sfd = open(name, O_RDONLY); +static int32_t syncRetrieveLastWal(SSyncPeer *pPeer, char *name, uint64_t fversion, int64_t offset) { + int32_t sfd = open(name, O_RDONLY); if (sfd < 0) { - free(pHead); + sError("%s, failed to open wal:%s for retrieve since:%s", pPeer->id, name, tstrerror(errno)); return -1; } - (void)lseek(sfd, offset, SEEK_SET); - sDebug("%s, retrieve last wal, offset:%" PRId64 " fver:%" PRIu64, pPeer->id, offset, fversion); + int32_t code = taosLSeek(sfd, offset, SEEK_SET); + if (code < 0) { + sError("%s, failed to seek %" PRId64 " in wal:%s for retrieve since:%s", pPeer->id, offset, name, tstrerror(errno)); + close(sfd); + return -1; + } + + sDebug("%s, retrieve last wal:%s, offset:%" PRId64 " fver:%" PRIu64, pPeer->id, name, offset, fversion); + + SWalHead *pHead = malloc(SYNC_MAX_SIZE); + int32_t bytes = 0; while (1) { - int32_t wsize = syncReadOneWalRecord(sfd, pHead, pEvent); - if (wsize < 0) break; - if (wsize == 0) { - code = 0; + code = syncReadOneWalRecord(sfd, pHead); + if (code < 0) { + sError("%s, failed to read one record from wal:%s", pPeer->id, name); + break; + } + + if (code == 0) { + code = bytes; + sDebug("%s, read to the end of wal, bytes:%d", pPeer->id, bytes); break; } sTrace("%s, last wal is forwarded, hver:%" PRIu64, pPeer->id, pHead->version); - int32_t ret = taosWriteMsg(pPeer->syncFd, pHead, wsize); - if (ret != wsize) break; - pPeer->sversion = pHead->version; + int32_t wsize = code; + int32_t ret = taosWriteMsg(pPeer->syncFd, pHead, wsize); + if (ret != wsize) { + code = -1; + sError("%s, failed to forward wal since %s, hver:%" PRIu64, pPeer->id, strerror(errno), pHead->version); + break; + } + + pPeer->sversion = pHead->version; bytes += wsize; if (pHead->version >= fversion && fversion > 0) { code = 0; - bytes = 0; + sDebug("%s, retrieve wal finished, hver:%" PRIu64 " fver:%" PRIu64, pPeer->id, pHead->version, fversion); break; } } @@ -301,92 +263,62 @@ static int32_t syncRetrieveLastWal(SSyncPeer *pPeer, char *name, uint64_t fversi free(pHead); close(sfd); - if (code == 0) return bytes; - return -1; + return code; } static int32_t syncProcessLastWal(SSyncPeer *pPeer, char *wname, int64_t index) { SSyncNode *pNode = pPeer->pSyncNode; - int32_t code = -1; - char fname[TSDB_FILENAME_LEN * 2]; // full path to wal file + int32_t once = 0; // last WAL has once ever been processed + int64_t offset = 0; + uint64_t fversion = 0; + char fname[TSDB_FILENAME_LEN * 2] = {0}; // full path to wal file - if (syncAreFilesModified(pPeer) != 0) return -1; + // get full path to wal file + snprintf(fname, sizeof(fname), "%s/%s", pNode->path, wname); + sDebug("%s, start to retrieve last wal:%s", pPeer->id, fname); while (1) { - int32_t once = 0; // last WAL has once ever been processed - int64_t offset = 0; - uint64_t fversion = 0; - uint32_t event = 0; + if (syncAreFilesModified(pNode, pPeer)) return -1; + if (syncGetWalVersion(pNode, pPeer) < 0) return -1; - // get full path to wal file - snprintf(fname, sizeof(fname), "%s/%s", pNode->path, wname); - sDebug("%s, start to retrieve last wal:%s", pPeer->id, fname); - - // monitor last wal - if (syncMonitorLastWal(pPeer, fname) < 0) break; - - while (1) { - int32_t bytes = syncRetrieveLastWal(pPeer, fname, fversion, offset, &event); - if (bytes < 0) break; - - // check file changes - if (syncCheckLastWalChanges(pPeer, &event) < 0) break; - - // if file is not updated or updated once, set the fversion and sstatus - if (((event & IN_MODIFY) == 0) || once) { - if (fversion == 0) { - pPeer->sstatus = TAOS_SYNC_STATUS_CACHE; // start to forward pkt - sDebug("%s, fversion is 0 then set sstatus:%s", pPeer->id, syncStatus[pPeer->sstatus]); - fversion = nodeVersion; // must read data to fversion - } - } - - // if all data up to fversion is read out, it is over - if (pPeer->sversion >= fversion && fversion > 0) { - code = 0; - sDebug("%s, data up to fver:%" PRIu64 " has been read out, bytes:%d", pPeer->id, fversion, bytes); - break; - } - - // if all data are read out, and no update - if ((bytes == 0) && ((event & IN_MODIFY) == 0)) { - // wal file is closed, break - if (event & IN_CLOSE_WRITE) { - code = 0; - sDebug("%s, current wal is closed", pPeer->id); - break; - } - - // wal not closed, it means some data not flushed to disk, wait for a while - usleep(10000); - } - - // if bytes>0, file is updated, or fversion is not reached but file still open, read again - once = 1; - offset += bytes; - sDebug("%s, retrieve last wal, bytes:%d", pPeer->id, bytes); - event = event & (~IN_MODIFY); // clear IN_MODIFY flag + int32_t bytes = syncRetrieveLastWal(pPeer, fname, fversion, offset); + if (bytes < 0) { + sDebug("%s, failed to retrieve last wal", pPeer->id); + return bytes; } - if (code < 0) break; - if (pPeer->sversion >= fversion && fversion > 0) break; + // check file changes + bool walModified = syncIsWalModified(pNode, pPeer); - index++; - wname[0] = 0; - code = (*pNode->getWalInfo)(pNode->vgId, wname, &index); - if (code < 0) break; - if (wname[0] == 0) { - code = 0; - break; + // if file is not updated or updated once, set the fversion and sstatus + if (!walModified || once) { + if (fversion == 0) { + pPeer->sstatus = TAOS_SYNC_STATUS_CACHE; // start to forward pkt + fversion = nodeVersion; // must read data to fversion + sDebug("%s, set sstatus:%s and fver:%" PRIu64, pPeer->id, syncStatus[pPeer->sstatus], fversion); + } } - // current last wal is closed, there is a new one - sDebug("%s, last wal is closed, try new one", pPeer->id); + // if all data up to fversion is read out, it is over + if (pPeer->sversion >= fversion && fversion > 0) { + sDebug("%s, data up to fver:%" PRIu64 " has been read out, bytes:%d sver:%" PRIu64, pPeer->id, fversion, bytes, + pPeer->sversion); + return 0; + } + + // if all data are read out, and no update + if (bytes == 0 && !walModified) { + // wal not closed, it means some data not flushed to disk, wait for a while + usleep(10000); + } + + // if bytes > 0, file is updated, or fversion is not reached but file still open, read again + once = 1; + offset += bytes; + sDebug("%s, continue retrieve last wal, bytes:%d offset:%" PRId64, pPeer->id, bytes, offset); } - taosClose(pPeer->notifyFd); - - return code; + return -1; } static int32_t syncRetrieveWal(SSyncPeer *pPeer) { @@ -394,7 +326,6 @@ static int32_t syncRetrieveWal(SSyncPeer *pPeer) { char fname[TSDB_FILENAME_LEN * 3]; char wname[TSDB_FILENAME_LEN * 2]; int32_t size; - struct stat fstat; int32_t code = -1; int64_t index = 0; @@ -402,9 +333,14 @@ static int32_t syncRetrieveWal(SSyncPeer *pPeer) { // retrieve wal info wname[0] = 0; code = (*pNode->getWalInfo)(pNode->vgId, wname, &index); - if (code < 0) break; // error + if (code < 0) { + sError("%s, failed to get wal info since:%s, code:0x%x", pPeer->id, strerror(errno), code); + break; + } + if (wname[0] == 0) { // no wal file - sDebug("%s, no wal file", pPeer->id); + code = 0; + sDebug("%s, no wal file anymore", pPeer->id); break; } @@ -416,22 +352,35 @@ static int32_t syncRetrieveWal(SSyncPeer *pPeer) { // get the full path to wal file snprintf(fname, sizeof(fname), "%s/%s", pNode->path, wname); - // send wal file, - // inotify is not required, old wal file won't be modified, even remove is ok - if (stat(fname, &fstat) < 0) break; - size = fstat.st_size; + // send wal file, old wal file won't be modified, even remove is ok + struct stat fstat; + if (stat(fname, &fstat) < 0) { + code = -1; + sDebug("%s, failed to stat wal:%s for retrieve since %s, code:0x%x", pPeer->id, fname, strerror(errno), code); + break; + } + size = fstat.st_size; sDebug("%s, retrieve wal:%s size:%d", pPeer->id, fname, size); + int32_t sfd = open(fname, O_RDONLY); - if (sfd < 0) break; + if (sfd < 0) { + code = -1; + sError("%s, failed to open wal:%s for retrieve since %s, code:0x%x", pPeer->id, fname, strerror(errno), code); + break; + } code = taosSendFile(pPeer->syncFd, sfd, NULL, size); close(sfd); - if (code < 0) break; + if (code < 0) { + sError("%s, failed to send wal:%s for retrieve since %s, code:0x%x", pPeer->id, fname, strerror(errno), code); + break; + } - index++; - - if (syncAreFilesModified(pPeer) != 0) break; + if (syncAreFilesModified(pNode, pPeer)) { + code = -1; + break; + } } if (code == 0) { @@ -440,9 +389,9 @@ static int32_t syncRetrieveWal(SSyncPeer *pPeer) { SWalHead walHead; memset(&walHead, 0, sizeof(walHead)); - code = taosWriteMsg(pPeer->syncFd, &walHead, sizeof(walHead)); + taosWriteMsg(pPeer->syncFd, &walHead, sizeof(walHead)); } else { - sError("%s, failed to send wal since %s", pPeer->id, strerror(errno)); + sError("%s, failed to send wal since %s, code:0x%x", pPeer->id, strerror(errno), code); } return code; @@ -481,18 +430,19 @@ static int32_t syncRetrieveDataStepByStep(SSyncPeer *pPeer) { pPeer->sversion = 0; pPeer->sstatus = TAOS_SYNC_STATUS_FILE; - sInfo("%s, start to retrieve file, set sstatus:%s", pPeer->id, syncStatus[pPeer->sstatus]); - if (syncRetrieveFile(pPeer) < 0) { - sError("%s, failed to retrieve file", pPeer->id); + sInfo("%s, start to retrieve files, set sstatus:%s", pPeer->id, syncStatus[pPeer->sstatus]); + if (syncRetrieveFile(pPeer) != 0) { + sError("%s, failed to retrieve files", pPeer->id); return -1; } // if no files are synced, there must be wal to sync, sversion must be larger than one if (pPeer->sversion == 0) pPeer->sversion = 1; - sInfo("%s, start to retrieve wal", pPeer->id); - if (syncRetrieveWal(pPeer) < 0) { - sError("%s, failed to retrieve wal", pPeer->id); + sInfo("%s, start to retrieve wals", pPeer->id); + int32_t code = syncRetrieveWal(pPeer); + if (code != 0) { + sError("%s, failed to retrieve wals, code:0x%x", pPeer->id, code); return -1; } @@ -506,7 +456,6 @@ void *syncRetrieveData(void *param) { if (pNode->notifyFlowCtrl) (*pNode->notifyFlowCtrl)(pNode->vgId, pPeer->numOfRetrieves); - pPeer->fileChanged = 0; pPeer->syncFd = taosOpenTcpClientSocket(pPeer->ip, pPeer->port, 0); if (pPeer->syncFd < 0) { sError("%s, failed to open socket to sync", pPeer->id); @@ -529,7 +478,6 @@ void *syncRetrieveData(void *param) { } pPeer->fileChanged = 0; - taosClose(pPeer->notifyFd); taosClose(pPeer->syncFd); syncDecPeerRef(pPeer); diff --git a/src/tsdb/src/tsdbFile.c b/src/tsdb/src/tsdbFile.c index 03c50d42f7..5d8933d141 100644 --- a/src/tsdb/src/tsdbFile.c +++ b/src/tsdb/src/tsdbFile.c @@ -248,8 +248,13 @@ SFileGroup *tsdbCreateFGroupIfNeed(STsdbRepo *pRepo, char *dataDir, int fid) { if (pGroup == NULL) { // if not exists, create one pFGroup->fileId = fid; for (int type = 0; type < TSDB_FILE_TYPE_MAX; type++) { - if (tsdbCreateFile(&pFGroup->files[type], pRepo, fid, type) < 0) - goto _err; + if (tsdbCreateFile(&pFGroup->files[type], pRepo, fid, type) < 0) { + for (int i = type; i >= 0; i--) { + remove(pFGroup->files[i].fname); + } + + return NULL; + } } pthread_rwlock_wrlock(&pFileH->fhlock); @@ -261,10 +266,6 @@ SFileGroup *tsdbCreateFGroupIfNeed(STsdbRepo *pRepo, char *dataDir, int fid) { } return pGroup; - -_err: - for (int type = 0; type < TSDB_FILE_TYPE_MAX; type++) tsdbDestroyFile(&pGroup->files[type]); - return NULL; } void tsdbInitFileGroupIter(STsdbFileH *pFileH, SFileGroupIter *pIter, int direction) { diff --git a/src/tsdb/src/tsdbRWHelper.c b/src/tsdb/src/tsdbRWHelper.c index 5b65b2185a..8e57066d27 100644 --- a/src/tsdb/src/tsdbRWHelper.c +++ b/src/tsdb/src/tsdbRWHelper.c @@ -917,6 +917,8 @@ static int tsdbInsertSuperBlock(SRWHelper *pHelper, SCompBlock *pCompBlock, int ASSERT(pHelper->pCompInfo->blocks[0].keyLast < pHelper->pCompInfo->blocks[1].keyFirst); } + ASSERT((blkIdx == pIdx->numOfBlocks -1) || (!pCompBlock->last)); + tsdbDebug("vgId:%d tid:%d a super block is inserted at index %d", REPO_ID(pHelper->pRepo), pHelper->tableInfo.tid, blkIdx); @@ -1042,6 +1044,8 @@ static int tsdbUpdateSuperBlock(SRWHelper *pHelper, SCompBlock *pCompBlock, int pIdx->maxKey = blockAtIdx(pHelper, pIdx->numOfBlocks - 1)->keyLast; pIdx->hasLast = (uint32_t)blockAtIdx(pHelper, pIdx->numOfBlocks - 1)->last; + ASSERT((blkIdx == pIdx->numOfBlocks-1) || (!pCompBlock->last)); + tsdbDebug("vgId:%d tid:%d a super block is updated at index %d", REPO_ID(pHelper->pRepo), pHelper->tableInfo.tid, blkIdx); @@ -1622,11 +1626,7 @@ static int tsdbProcessMergeCommit(SRWHelper *pHelper, SCommitIter *pCommitIter, pCfg->update); if (pDataCols->numOfRows == 0) break; - if (tblkIdx == pIdx->numOfBlocks - 1) { - if (tsdbWriteBlockToProperFile(pHelper, pDataCols, &compBlock) < 0) return -1; - } else { - if (tsdbWriteBlockToFile(pHelper, helperDataF(pHelper), pDataCols, &compBlock, false, true) < 0) return -1; - } + if (tsdbWriteBlockToFile(pHelper, helperDataF(pHelper), pDataCols, &compBlock, false, true) < 0) return -1; if (round == 0) { if (oBlock.last && pHelper->hasOldLastBlock) pHelper->hasOldLastBlock = false; diff --git a/src/util/inc/hash.h b/src/util/inc/hash.h index 42bc136584..b0319d3e13 100644 --- a/src/util/inc/hash.h +++ b/src/util/inc/hash.h @@ -31,16 +31,18 @@ extern "C" { typedef void (*_hash_free_fn_t)(void *param); typedef struct SHashNode { -// char *key; struct SHashNode *next; uint32_t hashVal; // the hash value of key uint32_t keyLen; // length of the key -// char *data; + size_t dataLen; // length of data + int8_t count; // reference count + int8_t removed; // flag to indicate removed + char data[]; } SHashNode; -#define GET_HASH_NODE_KEY(_n) ((char*)(_n) + sizeof(SHashNode)) -#define GET_HASH_NODE_DATA(_n) ((char*)(_n) + sizeof(SHashNode) + (_n)->keyLen) - +#define GET_HASH_NODE_KEY(_n) ((char*)(_n) + sizeof(SHashNode) + (_n)->dataLen) +#define GET_HASH_NODE_DATA(_n) ((char*)(_n) + sizeof(SHashNode)) +#define GET_HASH_PNODE(_n) ((char*)(_n) - sizeof(SHashNode)); typedef enum SHashLockTypeE { HASH_NO_LOCK = 0, HASH_ENTRY_LOCK = 1, @@ -65,15 +67,6 @@ typedef struct SHashObj { SArray *pMemBlock; // memory block allocated for SHashEntry } SHashObj; -typedef struct SHashMutableIterator { - SHashObj *pHashObj; - int32_t entryIndex; - SHashNode *pCur; - SHashNode *pNext; // current node can be deleted for mutable iterator, so keep the next one before return current - size_t numOfChecked; // already check number of elements in hash table - size_t numOfEntries; // number of entries while the iterator is created -} SHashMutableIterator; - /** * init the hash table * @@ -142,33 +135,9 @@ int32_t taosHashCondTraverse(SHashObj *pHashObj, bool (*fp)(void *, void *), voi */ void taosHashCleanup(SHashObj *pHashObj); -/** - * - * @param pHashObj - * @return - */ -SHashMutableIterator* taosHashCreateIter(SHashObj *pHashObj); - -/** - * - * @param iter - * @return - */ -bool taosHashIterNext(SHashMutableIterator *iter); - -/** - * - * @param iter - * @return - */ -void *taosHashIterGet(SHashMutableIterator *iter); - -/** - * - * @param iter - * @return - */ -void* taosHashDestroyIter(SHashMutableIterator* iter); +/* +void *SHashMutableIterator* taosHashCreateIter(SHashObj *pHashObj, void *); +*/ /** * @@ -179,6 +148,9 @@ int32_t taosHashGetMaxOverflowLinkLength(const SHashObj *pHashObj); size_t taosHashGetMemSize(const SHashObj *pHashObj); +void *taosHashIterate(SHashObj *pHashObj, void *p); +void taosHashCancelIterate(SHashObj *pHashObj, void *p); + #ifdef __cplusplus } #endif diff --git a/src/util/inc/tfile.h b/src/util/inc/tfile.h index 10b7c1df35..066040170e 100644 --- a/src/util/inc/tfile.h +++ b/src/util/inc/tfile.h @@ -20,23 +20,26 @@ extern "C" { #endif -#include - // init taos file module -int32_t tfinit(); +int32_t tfInit(); // clean up taos file module -void tfcleanup(); +void tfCleanup(); // the same syntax as UNIX standard open/close/read/write // but FD is int64_t and will never be reused -int64_t tfopen(const char *pathname, int32_t flags); -int64_t tfclose(int64_t tfd); -int64_t tfwrite(int64_t tfd, void *buf, int64_t count); -int64_t tfread(int64_t tfd, void *buf, int64_t count); +int64_t tfOpen(const char *pathname, int32_t flags); +int64_t tfOpenM(const char *pathname, int32_t flags, mode_t mode); +int64_t tfClose(int64_t tfd); +int64_t tfWrite(int64_t tfd, void *buf, int64_t count); +int64_t tfRead(int64_t tfd, void *buf, int64_t count); +int32_t tfFsync(int64_t tfd); +bool tfValid(int64_t tfd); +int64_t tfLseek(int64_t tfd, int64_t offset, int32_t whence); +int32_t tfFtruncate(int64_t tfd, int64_t length); #ifdef __cplusplus } #endif -#endif // TDENGINE_TREF_H +#endif // TDENGINE_TFILE_H diff --git a/src/util/src/hash.c b/src/util/src/hash.c index 03a7342497..0e3e0d3e24 100644 --- a/src/util/src/hash.c +++ b/src/util/src/hash.c @@ -76,7 +76,7 @@ static FORCE_INLINE int32_t taosHashCapacity(int32_t length) { static FORCE_INLINE SHashNode *doSearchInEntryList(SHashEntry *pe, const void *key, size_t keyLen, uint32_t hashVal) { SHashNode *pNode = pe->next; while (pNode) { - if ((pNode->keyLen == keyLen) && (memcmp(GET_HASH_NODE_KEY(pNode), key, keyLen) == 0)) { + if ((pNode->keyLen == keyLen) && (memcmp(GET_HASH_NODE_KEY(pNode), key, keyLen) == 0) && pNode->removed == 0) { assert(pNode->hashVal == hashVal); break; } @@ -114,15 +114,25 @@ static SHashNode *doCreateHashNode(const void *key, size_t keyLen, const void *p * @param dsize size of actual data * @return hash node */ -static FORCE_INLINE SHashNode *doUpdateHashNode(SHashEntry* pe, SHashNode* prev, SHashNode *pNode, SHashNode *pNewNode) { +static FORCE_INLINE SHashNode *doUpdateHashNode(SHashObj *pHashObj, SHashEntry* pe, SHashNode* prev, SHashNode *pNode, SHashNode *pNewNode) { assert(pNode->keyLen == pNewNode->keyLen); + + pNode->count--; if (prev != NULL) { prev->next = pNewNode; } else { pe->next = pNewNode; } - pNewNode->next = pNode->next; + if (pNode->count <= 0) { + pNewNode->next = pNode->next; + DO_FREE_HASH_NODE(pNode); + } else { + pNewNode->next = pNode; + pe->num++; + atomic_add_fetch_64(&pHashObj->size, 1); + } + return pNewNode; } @@ -139,11 +149,11 @@ static void pushfrontNodeInEntryList(SHashEntry *pEntry, SHashNode *pNode); * @param pIter * @return */ -static SHashNode *getNextHashNode(SHashMutableIterator *pIter); SHashObj *taosHashInit(size_t capacity, _hash_fn_t fn, bool update, SHashLockTypeE type) { - if (capacity == 0 || fn == NULL) { - return NULL; + assert(fn != NULL); + if (capacity == 0) { + capacity = 4; } SHashObj *pHashObj = (SHashObj *)calloc(1, sizeof(SHashObj)); @@ -213,7 +223,7 @@ int32_t taosHashPut(SHashObj *pHashObj, const void *key, size_t keyLen, void *da SHashNode* prev = NULL; while (pNode) { - if ((pNode->keyLen == keyLen) && (memcmp(GET_HASH_NODE_KEY(pNode), key, keyLen) == 0)) { + if ((pNode->keyLen == keyLen) && (memcmp(GET_HASH_NODE_KEY(pNode), key, keyLen) == 0) && pNode->removed == 0) { assert(pNode->hashVal == hashVal); break; } @@ -244,8 +254,7 @@ int32_t taosHashPut(SHashObj *pHashObj, const void *key, size_t keyLen, void *da } else { // not support the update operation, return error if (pHashObj->enableUpdate) { - doUpdateHashNode(pe, prev, pNode, pNewNode); - DO_FREE_HASH_NODE(pNode); + doUpdateHashNode(pHashObj, pe, prev, pNode, pNewNode); } else { DO_FREE_HASH_NODE(pNewNode); } @@ -335,22 +344,10 @@ int32_t taosHashRemoveWithData(SHashObj *pHashObj, const void *key, size_t keyLe int32_t slot = HASH_INDEX(hashVal, pHashObj->capacity); SHashEntry *pe = pHashObj->hashList[slot]; - // no data, return directly - if (pe->num == 0) { - __rd_unlock(&pHashObj->lock, pHashObj->type); - return -1; - } - if (pHashObj->type == HASH_ENTRY_LOCK) { taosWLockLatch(&pe->latch); } - if (pe->num == 0) { - assert(pe->next == NULL); - } else { - assert(pe->next != NULL); - } - // double check after locked if (pe->num == 0) { assert(pe->next == NULL); @@ -360,37 +357,37 @@ int32_t taosHashRemoveWithData(SHashObj *pHashObj, const void *key, size_t keyLe return -1; } + int code = -1; SHashNode *pNode = pe->next; - SHashNode *pRes = NULL; + SHashNode *prevNode = NULL; - // remove it - if ((pNode->keyLen == keyLen) && (memcmp(GET_HASH_NODE_KEY(pNode), key, keyLen) == 0)) { - pe->num -= 1; - pRes = pNode; - pe->next = pNode->next; - } else { - while (pNode->next != NULL) { - if (((pNode->next)->keyLen == keyLen) && (memcmp(GET_HASH_NODE_KEY((pNode->next)), key, keyLen) == 0)) { - assert((pNode->next)->hashVal == hashVal); - break; + while (pNode) { + if ((pNode->keyLen == keyLen) && (memcmp(GET_HASH_NODE_KEY(pNode), key, keyLen) == 0) && pNode->removed == 0) + break; + + prevNode = pNode; + pNode = pNode->next; + } + + if (pNode) { + code = 0; // it is found + + pNode->count--; + pNode->removed = 1; + if (pNode->count <= 0) { + if (prevNode) { + prevNode->next = pNode->next; + } else { + pe->next = pNode->next; } + + if (data) memcpy(data, GET_HASH_NODE_DATA(pNode), dsize); - pNode = pNode->next; + pe->num--; + atomic_sub_fetch_64(&pHashObj->size, 1); + FREE_HASH_NODE(pHashObj, pNode); } - - - if (pNode->next != NULL) { - pe->num -= 1; - pRes = pNode->next; - pNode->next = pNode->next->next; - } - } - - if (pe->num == 0) { - assert(pe->next == NULL); - } else { - assert(pe->next != NULL); - } + } if (pHashObj->type == HASH_ENTRY_LOCK) { taosWUnLockLatch(&pe->latch); @@ -398,17 +395,7 @@ int32_t taosHashRemoveWithData(SHashObj *pHashObj, const void *key, size_t keyLe __rd_unlock(&pHashObj->lock, pHashObj->type); - if (data != NULL && pRes != NULL) { - memcpy(data, GET_HASH_NODE_DATA(pRes), dsize); - } - - if (pRes != NULL) { - atomic_sub_fetch_64(&pHashObj->size, 1); - FREE_HASH_NODE(pHashObj, pRes); - return 0; - } else { - return -1; - } + return code; } int32_t taosHashCondTraverse(SHashObj *pHashObj, bool (*fp)(void *, void *), void *param) { @@ -531,98 +518,6 @@ void taosHashCleanup(SHashObj *pHashObj) { free(pHashObj); } -SHashMutableIterator *taosHashCreateIter(SHashObj *pHashObj) { - SHashMutableIterator *pIter = calloc(1, sizeof(SHashMutableIterator)); - if (pIter == NULL) { - return NULL; - } - - pIter->pHashObj = pHashObj; - - // keep it in local variable, in case the resize operation expand the size - pIter->numOfEntries = pHashObj->capacity; - return pIter; -} - -bool taosHashIterNext(SHashMutableIterator *pIter) { - if (pIter == NULL) { - return false; - } - - size_t size = taosHashGetSize(pIter->pHashObj); - if (size == 0) { - return false; - } - - // check the first one - if (pIter->numOfChecked == 0) { - assert(pIter->pCur == NULL && pIter->pNext == NULL); - - while (1) { - SHashEntry *pEntry = pIter->pHashObj->hashList[pIter->entryIndex]; - if (pEntry->num == 0) { - assert(pEntry->next == NULL); - - pIter->entryIndex++; - continue; - } - - if (pIter->pHashObj->type == HASH_ENTRY_LOCK) { - taosRLockLatch(&pEntry->latch); - } - - pIter->pCur = pEntry->next; - - if (pIter->pCur->next) { - pIter->pNext = pIter->pCur->next; - - if (pIter->pHashObj->type == HASH_ENTRY_LOCK) { - taosRUnLockLatch(&pEntry->latch); - } - } else { - if (pIter->pHashObj->type == HASH_ENTRY_LOCK) { - taosRUnLockLatch(&pEntry->latch); - } - - pIter->pNext = getNextHashNode(pIter); - } - - break; - } - - pIter->numOfChecked++; - return true; - } else { - assert(pIter->pCur != NULL); - if (pIter->pNext) { - pIter->pCur = pIter->pNext; - } else { // no more data in the hash list - return false; - } - - pIter->numOfChecked++; - - if (pIter->pCur->next) { - pIter->pNext = pIter->pCur->next; - } else { - pIter->pNext = getNextHashNode(pIter); - } - - return true; - } -} - -void *taosHashIterGet(SHashMutableIterator *iter) { return (iter == NULL) ? NULL : GET_HASH_NODE_DATA(iter->pCur); } - -void *taosHashDestroyIter(SHashMutableIterator *iter) { - if (iter == NULL) { - return NULL; - } - - free(iter); - return NULL; -} - // for profile only int32_t taosHashGetMaxOverflowLinkLength(const SHashObj *pHashObj) { if (pHashObj == NULL || pHashObj->size == 0) { @@ -759,6 +654,8 @@ SHashNode *doCreateHashNode(const void *key, size_t keyLen, const void *pData, s pNewNode->keyLen = (uint32_t)keyLen; pNewNode->hashVal = hashVal; + pNewNode->dataLen = dsize; + pNewNode->count = 1; memcpy(GET_HASH_NODE_DATA(pNewNode), pData, dsize); memcpy(GET_HASH_NODE_KEY(pNewNode), key, keyLen); @@ -775,35 +672,6 @@ void pushfrontNodeInEntryList(SHashEntry *pEntry, SHashNode *pNode) { pEntry->num += 1; } -SHashNode *getNextHashNode(SHashMutableIterator *pIter) { - assert(pIter != NULL); - - pIter->entryIndex++; - SHashNode *p = NULL; - - while (pIter->entryIndex < pIter->numOfEntries) { - SHashEntry *pEntry = pIter->pHashObj->hashList[pIter->entryIndex]; - if (pEntry->num == 0) { - pIter->entryIndex++; - continue; - } - - if (pIter->pHashObj->type == HASH_ENTRY_LOCK) { - taosRLockLatch(&pEntry->latch); - } - - p = pEntry->next; - - if (pIter->pHashObj->type == HASH_ENTRY_LOCK) { - taosRUnLockLatch(&pEntry->latch); - } - - return p; - } - - return NULL; -} - size_t taosHashGetMemSize(const SHashObj *pHashObj) { if (pHashObj == NULL) { return 0; @@ -811,3 +679,129 @@ size_t taosHashGetMemSize(const SHashObj *pHashObj) { return (pHashObj->capacity * (sizeof(SHashEntry) + POINTER_BYTES)) + sizeof(SHashNode) * taosHashGetSize(pHashObj) + sizeof(SHashObj); } + +// release the pNode, return next pNode, and lock the current entry +static void *taosHashReleaseNode(SHashObj *pHashObj, void *p, int *slot) { + + SHashNode *pOld = (SHashNode *)GET_HASH_PNODE(p); + SHashNode *prevNode = NULL; + + *slot = HASH_INDEX(pOld->hashVal, pHashObj->capacity); + SHashEntry *pe = pHashObj->hashList[*slot]; + + // lock entry + if (pHashObj->type == HASH_ENTRY_LOCK) { + taosWLockLatch(&pe->latch); + } + + SHashNode *pNode = pe->next; + + while (pNode) { + if (pNode == pOld) + break; + + prevNode = pNode; + pNode = pNode->next; + } + + if (pNode) { + pNode = pNode->next; + while (pNode) { + if (pNode->removed == 0) break; + pNode = pNode->next; + } + + pOld->count--; + if (pOld->count <=0) { + if (prevNode) { + prevNode->next = pOld->next; + } else { + pe->next = pOld->next; + } + + pe->num--; + atomic_sub_fetch_64(&pHashObj->size, 1); + FREE_HASH_NODE(pHashObj, pOld); + } + } else { + uError("pNode:%p data:%p is not there!!!", pNode, p); + } + + return pNode; +} + +void *taosHashIterate(SHashObj *pHashObj, void *p) { + if (pHashObj == NULL) return NULL; + + int slot = 0; + char *data = NULL; + + // only add the read lock to disable the resize process + __rd_lock(&pHashObj->lock, pHashObj->type); + + SHashNode *pNode = NULL; + if (p) { + pNode = taosHashReleaseNode(pHashObj, p, &slot); + if (pNode == NULL) { + SHashEntry *pe = pHashObj->hashList[slot]; + if (pHashObj->type == HASH_ENTRY_LOCK) { + taosWUnLockLatch(&pe->latch); + } + + slot = slot + 1; + } + } + + if (pNode == NULL) { + for (; slot < pHashObj->capacity; ++slot) { + SHashEntry *pe = pHashObj->hashList[slot]; + + // lock entry + if (pHashObj->type == HASH_ENTRY_LOCK) { + taosWLockLatch(&pe->latch); + } + + pNode = pe->next; + while (pNode) { + if (pNode->removed == 0) break; + pNode = pNode->next; + } + + if (pNode) break; + + if (pHashObj->type == HASH_ENTRY_LOCK) { + taosWUnLockLatch(&pe->latch); + } + } + } + + if (pNode) { + SHashEntry *pe = pHashObj->hashList[slot]; + pNode->count++; + data = GET_HASH_NODE_DATA(pNode); + if (pHashObj->type == HASH_ENTRY_LOCK) { + taosWUnLockLatch(&pe->latch); + } + } + + __rd_unlock(&pHashObj->lock, pHashObj->type); + return data; + +} + +void taosHashCancelIterate(SHashObj *pHashObj, void *p) { + if (pHashObj == NULL || p == NULL) return; + + // only add the read lock to disable the resize process + __rd_lock(&pHashObj->lock, pHashObj->type); + + int slot; + taosHashReleaseNode(pHashObj, p, &slot); + + SHashEntry *pe = pHashObj->hashList[slot]; + if (pHashObj->type == HASH_ENTRY_LOCK) { + taosWUnLockLatch(&pe->latch); + } + + __rd_unlock(&pHashObj->lock, pHashObj->type); +} diff --git a/src/util/src/tfile.c b/src/util/src/tfile.c index 27ba30fe81..64fea2843b 100644 --- a/src/util/src/tfile.c +++ b/src/util/src/tfile.c @@ -13,6 +13,7 @@ * along with this program. If not, see . */ +#define _DEFAULT_SOURCE #include "os.h" #include "taoserror.h" #include "tulog.h" @@ -21,40 +22,52 @@ static int32_t tsFileRsetId = -1; -static void taosCloseFile(void *p) { +static void tfCloseFile(void *p) { close((int32_t)(uintptr_t)p); } -int32_t tfinit() { - tsFileRsetId = taosOpenRef(2000, taosCloseFile); - return tsFileRsetId; +int32_t tfInit() { + tsFileRsetId = taosOpenRef(2000, tfCloseFile); + if (tsFileRsetId > 0) { + return 0; + } else { + return -1; + } } -void tfcleanup() { +void tfCleanup() { if (tsFileRsetId >= 0) taosCloseRef(tsFileRsetId); tsFileRsetId = -1; } -int64_t tfopen(const char *pathname, int32_t flags) { - int32_t fd = open(pathname, flags); - +static int64_t tfOpenImp(int32_t fd) { if (fd < 0) { terrno = TAOS_SYSTEM_ERROR(errno); return -1; - } + } - void *p = (void *)(int64_t)fd; + void * p = (void *)(int64_t)fd; int64_t rid = taosAddRef(tsFileRsetId, p); if (rid < 0) close(fd); return rid; } -int64_t tfclose(int64_t tfd) { +int64_t tfOpen(const char *pathname, int32_t flags) { + int32_t fd = open(pathname, flags); + return tfOpenImp(fd); +} + +int64_t tfOpenM(const char *pathname, int32_t flags, mode_t mode) { + int32_t fd = open(pathname, flags, mode); + return tfOpenImp(fd); +} + +int64_t tfClose(int64_t tfd) { return taosRemoveRef(tsFileRsetId, tfd); } -int64_t tfwrite(int64_t tfd, void *buf, int64_t count) { +int64_t tfWrite(int64_t tfd, void *buf, int64_t count) { void *p = taosAcquireRef(tsFileRsetId, tfd); if (p == NULL) return -1; @@ -67,7 +80,7 @@ int64_t tfwrite(int64_t tfd, void *buf, int64_t count) { return ret; } -int64_t tfread(int64_t tfd, void *buf, int64_t count) { +int64_t tfRead(int64_t tfd, void *buf, int64_t count) { void *p = taosAcquireRef(tsFileRsetId, tfd); if (p == NULL) return -1; @@ -79,3 +92,44 @@ int64_t tfread(int64_t tfd, void *buf, int64_t count) { taosReleaseRef(tsFileRsetId, tfd); return ret; } + +int32_t tfFsync(int64_t tfd) { + void *p = taosAcquireRef(tsFileRsetId, tfd); + if (p == NULL) return -1; + + int32_t fd = (int32_t)(uintptr_t)p; + int32_t code = fsync(fd); + + taosReleaseRef(tsFileRsetId, tfd); + return code; +} + +bool tfValid(int64_t tfd) { + void *p = taosAcquireRef(tsFileRsetId, tfd); + if (p == NULL) return false; + + taosReleaseRef(tsFileRsetId, tfd); + return true; +} + +int64_t tfLseek(int64_t tfd, int64_t offset, int32_t whence) { + void *p = taosAcquireRef(tsFileRsetId, tfd); + if (p == NULL) return -1; + + int32_t fd = (int32_t)(uintptr_t)p; + int64_t ret = taosLSeek(fd, offset, whence); + + taosReleaseRef(tsFileRsetId, tfd); + return ret; +} + +int32_t tfFtruncate(int64_t tfd, int64_t length) { + void *p = taosAcquireRef(tsFileRsetId, tfd); + if (p == NULL) return -1; + + int32_t fd = (int32_t)(uintptr_t)p; + int32_t code = taosFtruncate(fd, length); + + taosReleaseRef(tsFileRsetId, tfd); + return code; +} diff --git a/src/util/src/tkvstore.c b/src/util/src/tkvstore.c index 31641ac9a7..2b1d13c78b 100644 --- a/src/util/src/tkvstore.c +++ b/src/util/src/tkvstore.c @@ -529,7 +529,7 @@ static int tdRestoreKVStore(SKVStore *pStore) { void * buf = NULL; int64_t maxBufSize = 0; SKVRecord rInfo = {0}; - SHashMutableIterator *pIter = NULL; + SKVRecord *pRecord = NULL; ASSERT(TD_KVSTORE_HEADER_SIZE == lseek(pStore->fd, 0, SEEK_CUR)); ASSERT(pStore->info.size == TD_KVSTORE_HEADER_SIZE); @@ -582,16 +582,8 @@ static int tdRestoreKVStore(SKVStore *pStore) { goto _err; } - pIter = taosHashCreateIter(pStore->map); - if (pIter == NULL) { - uError("failed to create hash iter while opening KV store %s", pStore->fname); - terrno = TSDB_CODE_COM_OUT_OF_MEMORY; - goto _err; - } - - while (taosHashIterNext(pIter)) { - SKVRecord *pRecord = taosHashIterGet(pIter); - + pRecord = taosHashIterate(pStore->map, NULL); + while (pRecord) { if (lseek(pStore->fd, (off_t)(pRecord->offset + sizeof(SKVRecord)), SEEK_SET) < 0) { uError("failed to lseek file %s since %s, offset %" PRId64, pStore->fname, strerror(errno), pRecord->offset); terrno = TAOS_SYSTEM_ERROR(errno); @@ -613,16 +605,17 @@ static int tdRestoreKVStore(SKVStore *pStore) { goto _err; } } + + pRecord = taosHashIterate(pStore->map, pRecord); } if (pStore->aFunc) (*pStore->aFunc)(pStore->appH); - taosHashDestroyIter(pIter); tfree(buf); return 0; _err: - taosHashDestroyIter(pIter); + taosHashCancelIterate(pStore->map, pRecord); tfree(buf); return -1; } diff --git a/src/util/src/tref.c b/src/util/src/tref.c index 4c1a87c960..1f83abcb84 100644 --- a/src/util/src/tref.c +++ b/src/util/src/tref.c @@ -329,7 +329,7 @@ void *taosIterateRef(int rsetId, int64_t rid) { pNode->count++; // acquire it newP = pNode->p; taosUnlockList(pSet->lockedBy+hash); - uTrace("rsetId:%d p:%p rid:%" PRId64 " is returned", rsetId, newP, rid); + uTrace("rsetId:%d p:%p rid:%" PRId64 " is returned", rsetId, newP, rid); } else { uTrace("rsetId:%d the list is over", rsetId); } @@ -423,24 +423,25 @@ static int taosDecRefCount(int rsetId, int64_t rid, int remove) { if (pNode->next) { pNode->next->prev = pNode->prev; } - - (*pSet->fp)(pNode->p); - - uTrace("rsetId:%d p:%p rid:%" PRId64 " is removed, count:%d, free mem: %p", rsetId, pNode->p, rid, pSet->count, pNode); - free(pNode); released = 1; } else { - uTrace("rsetId:%d p:%p rid:%" PRId64 " is released, count:%d", rsetId, pNode->p, rid, pNode->count); + uTrace("rsetId:%d p:%p rid:%" PRId64 " is released", rsetId, pNode->p, rid); } } else { - uTrace("rsetId:%d rid:%" PRId64 " is not there, failed to release/remove", rsetId, rid); + uTrace("rsetId:%d rid:%" PRId64 " is not there, failed to release/remove", rsetId, rid); terrno = TSDB_CODE_REF_NOT_EXIST; code = -1; } taosUnlockList(pSet->lockedBy+hash); - if (released) taosDecRsetCount(pSet); + if (released) { + uTrace("rsetId:%d p:%p rid:%" PRId64 " is removed, count:%d, free mem: %p", rsetId, pNode->p, rid, pSet->count, pNode); + (*pSet->fp)(pNode->p); + free(pNode); + + taosDecRsetCount(pSet); + } return code; } diff --git a/src/vnode/inc/vnodeInt.h b/src/vnode/inc/vnodeInt.h index 7fc9b100ef..e468c2e83e 100644 --- a/src/vnode/inc/vnodeInt.h +++ b/src/vnode/inc/vnodeInt.h @@ -44,6 +44,7 @@ typedef struct { int8_t role; int8_t accessState; int8_t isFull; + int8_t isCommiting; uint64_t version; // current version uint64_t fversion; // version on saved data file void *wqueue; diff --git a/src/vnode/src/vnodeMain.c b/src/vnode/src/vnodeMain.c index cd6d2ea7c0..b516c9d90e 100644 --- a/src/vnode/src/vnodeMain.c +++ b/src/vnode/src/vnodeMain.c @@ -38,6 +38,7 @@ static void vnodeCtrlFlow(int32_t vgId, int32_t level); static int32_t vnodeNotifyFileSynced(int32_t vgId, uint64_t fversion); static void vnodeConfirmForard(int32_t vgId, void *wparam, int32_t code); static int32_t vnodeWriteToCache(int32_t vgId, void *wparam, int32_t qtype, void *rparam); +static int32_t vnodeGetVersion(int32_t vgId, uint64_t *fver, uint64_t *wver); #ifndef _SYNC int64_t syncStart(const SSyncInfo *info) { return NULL; } @@ -266,16 +267,18 @@ int32_t vnodeOpen(int32_t vnode, char *rootDir) { return terrno; } - SCqCfg cqCfg = {0}; - sprintf(cqCfg.user, "_root"); - strcpy(cqCfg.pass, tsInternalPass); - strcpy(cqCfg.db, pVnode->db); - cqCfg.vgId = vnode; - cqCfg.cqWrite = vnodeWriteToWQueue; - pVnode->cq = cqOpen(pVnode, &cqCfg); - if (pVnode->cq == NULL) { - vnodeCleanUp(pVnode); - return terrno; + if (tsEnableStream) { + SCqCfg cqCfg = {0}; + sprintf(cqCfg.user, "_root"); + strcpy(cqCfg.pass, tsInternalPass); + strcpy(cqCfg.db, pVnode->db); + cqCfg.vgId = vnode; + cqCfg.cqWrite = vnodeWriteToCache; + pVnode->cq = cqOpen(pVnode, &cqCfg); + if (pVnode->cq == NULL) { + vnodeCleanUp(pVnode); + return terrno; + } } STsdbAppH appH = {0}; @@ -352,6 +355,7 @@ int32_t vnodeOpen(int32_t vnode, char *rootDir) { syncInfo.notifyRole = vnodeNotifyRole; syncInfo.notifyFlowCtrl = vnodeCtrlFlow; syncInfo.notifyFileSynced = vnodeNotifyFileSynced; + syncInfo.getVersion = vnodeGetVersion; pVnode->sync = syncStart(&syncInfo); #ifndef _SYNC @@ -520,11 +524,10 @@ static void vnodeBuildVloadMsg(SVnodeObj *pVnode, SStatusMsg *pStatus) { } int32_t vnodeGetVnodeList(int32_t vnodeList[], int32_t *numOfVnodes) { - SHashMutableIterator *pIter = taosHashCreateIter(tsVnodesHash); - while (taosHashIterNext(pIter)) { - SVnodeObj **pVnode = taosHashIterGet(pIter); - if (pVnode == NULL) continue; - if (*pVnode == NULL) continue; + void *pIter = taosHashIterate(tsVnodesHash, NULL); + while (pIter) { + SVnodeObj **pVnode = pIter; + if (*pVnode) { (*numOfVnodes)++; if (*numOfVnodes >= TSDB_MAX_VNODES) { @@ -533,25 +536,25 @@ int32_t vnodeGetVnodeList(int32_t vnodeList[], int32_t *numOfVnodes) { } else { vnodeList[*numOfVnodes - 1] = (*pVnode)->vgId; } - } - taosHashDestroyIter(pIter); + } + + pIter = taosHashIterate(tsVnodesHash, pIter); + } return TSDB_CODE_SUCCESS; } void vnodeBuildStatusMsg(void *param) { SStatusMsg *pStatus = param; - SHashMutableIterator *pIter = taosHashCreateIter(tsVnodesHash); - while (taosHashIterNext(pIter)) { - SVnodeObj **pVnode = taosHashIterGet(pIter); - if (pVnode == NULL) continue; - if (*pVnode == NULL) continue; - - vnodeBuildVloadMsg(*pVnode, pStatus); + void *pIter = taosHashIterate(tsVnodesHash, NULL); + while (pIter) { + SVnodeObj **pVnode = pIter; + if (*pVnode) { + vnodeBuildVloadMsg(*pVnode, pStatus); + } + pIter = taosHashIterate(tsVnodesHash, pIter); } - - taosHashDestroyIter(pIter); } void vnodeSetAccess(SVgroupAccess *pAccess, int32_t numOfVnodes) { @@ -597,18 +600,19 @@ static void vnodeCleanUp(SVnodeObj *pVnode) { vnodeRelease(pVnode); } -// TODO: this is a simple implement static int32_t vnodeProcessTsdbStatus(void *arg, int32_t status, int32_t eno) { SVnodeObj *pVnode = arg; if (eno != TSDB_CODE_SUCCESS) { vError("vgId:%d, failed to commit since %s, fver:%" PRIu64 " vver:%" PRIu64, pVnode->vgId, tstrerror(eno), pVnode->fversion, pVnode->version); + pVnode->isCommiting = 0; pVnode->isFull = 1; return 0; } if (status == TSDB_STATUS_COMMIT_START) { + pVnode->isCommiting = 1; pVnode->fversion = pVnode->version; vDebug("vgId:%d, start commit, fver:%" PRIu64 " vver:%" PRIu64, pVnode->vgId, pVnode->fversion, pVnode->version); if (pVnode->status != TAOS_VN_STATUS_INIT) { @@ -619,6 +623,7 @@ static int32_t vnodeProcessTsdbStatus(void *arg, int32_t status, int32_t eno) { if (status == TSDB_STATUS_COMMIT_OVER) { vDebug("vgId:%d, commit over, fver:%" PRIu64 " vver:%" PRIu64, pVnode->vgId, pVnode->fversion, pVnode->version); + pVnode->isCommiting = 0; pVnode->isFull = 0; if (pVnode->status != TAOS_VN_STATUS_INIT) { walRemoveOneOldFile(pVnode->wal); @@ -684,8 +689,10 @@ static void vnodeCtrlFlow(int32_t vgId, int32_t level) { return; } - pVnode->flowctrlLevel = level; - vDebug("vgId:%d, set flowctrl level:%d", pVnode->vgId, level); + if (pVnode->flowctrlLevel != level) { + vDebug("vgId:%d, set flowctrl level from %d to %d", pVnode->vgId, pVnode->flowctrlLevel, level); + pVnode->flowctrlLevel = level; + } vnodeRelease(pVnode); } @@ -765,3 +772,23 @@ static int32_t vnodeWriteToCache(int32_t vgId, void *wparam, int32_t qtype, void vnodeRelease(pVnode); return code; } + +static int32_t vnodeGetVersion(int32_t vgId, uint64_t *fver, uint64_t *wver) { + SVnodeObj *pVnode = vnodeAcquire(vgId); + if (pVnode == NULL) { + vError("vgId:%d, vnode not found while write to cache", vgId); + return -1; + } + + int32_t code = 0; + if (pVnode->isCommiting) { + vDebug("vgId:%d, vnode is commiting while get version", vgId); + code = -1; + } else { + *fver = pVnode->fversion; + *wver = pVnode->version; + } + + vnodeRelease(pVnode); + return code; +} diff --git a/src/vnode/src/vnodeWrite.c b/src/vnode/src/vnodeWrite.c index 57bd407cd1..268d1fb53b 100644 --- a/src/vnode/src/vnodeWrite.c +++ b/src/vnode/src/vnodeWrite.c @@ -282,13 +282,15 @@ static void vnodeFlowCtrlMsgToWQueue(void *param, void *tmrId) { pWrite->processedCount++; if (pWrite->processedCount > 100) { - vError("vgId:%d, msg:%p, failed to process since %s", pVnode->vgId, pWrite, tstrerror(code)); + vError("vgId:%d, msg:%p, failed to process since %s, retry:%d", pVnode->vgId, pWrite, tstrerror(code), + pWrite->processedCount); pWrite->processedCount = 1; dnodeSendRpcVWriteRsp(pWrite->pVnode, pWrite, code); } else { code = vnodePerformFlowCtrl(pWrite); if (code == 0) { - vTrace("vgId:%d, write into vwqueue after flowctrl", pVnode->vgId); + vDebug("vgId:%d, msg:%p, write into vwqueue after flowctrl, retry:%d", pVnode->vgId, pWrite, + pWrite->processedCount); pWrite->processedCount = 0; taosWriteQitem(pVnode->wqueue, pWrite->qtype, pWrite); } @@ -310,7 +312,7 @@ static int32_t vnodePerformFlowCtrl(SVWriteMsg *pWrite) { void *unUsed = NULL; taosTmrReset(vnodeFlowCtrlMsgToWQueue, 100, pWrite, tsDnodeTmr, &unUsed); - vTrace("vgId:%d, msg:%p, app:%p, perform flowctrl, count:%d", pVnode->vgId, pWrite, pWrite->rpcMsg.ahandle, + vTrace("vgId:%d, msg:%p, app:%p, perform flowctrl, retry:%d", pVnode->vgId, pWrite, pWrite->rpcMsg.ahandle, pWrite->processedCount); return TSDB_CODE_VND_ACTION_IN_PROGRESS; } diff --git a/src/wal/inc/walInt.h b/src/wal/inc/walInt.h index b0edabfbd8..06748d885f 100644 --- a/src/wal/inc/walInt.h +++ b/src/wal/inc/walInt.h @@ -44,8 +44,8 @@ typedef struct { uint64_t version; int64_t fileId; int64_t rid; + int64_t tfd; int32_t vgId; - int32_t fd; int32_t keep; int32_t level; int32_t fsyncPeriod; diff --git a/src/wal/src/walMgmt.c b/src/wal/src/walMgmt.c index 36c190be3e..72ea239817 100644 --- a/src/wal/src/walMgmt.c +++ b/src/wal/src/walMgmt.c @@ -17,6 +17,7 @@ #include "os.h" #include "taoserror.h" #include "tref.h" +#include "tfile.h" #include "twal.h" #include "walInt.h" @@ -61,7 +62,7 @@ void *walOpen(char *path, SWalCfg *pCfg) { } pWal->vgId = pCfg->vgId; - pWal->fd = -1; + pWal->tfd = -1; pWal->fileId = -1; pWal->level = pCfg->walLevel; pWal->keep = pCfg->keep; @@ -124,7 +125,7 @@ void walClose(void *handle) { SWal *pWal = handle; pthread_mutex_lock(&pWal->mutex); - taosClose(pWal->fd); + tfClose(pWal->tfd); pthread_mutex_unlock(&pWal->mutex); taosRemoveRef(tsWal.refId, pWal->rid); } @@ -143,7 +144,7 @@ static void walFreeObj(void *wal) { SWal *pWal = wal; wDebug("vgId:%d, wal:%p is freed", pWal->vgId, pWal); - taosClose(pWal->fd); + tfClose(pWal->tfd); pthread_mutex_destroy(&pWal->mutex); tfree(pWal); } @@ -172,7 +173,7 @@ static void walFsyncAll() { while (pWal) { if (walNeedFsync(pWal)) { wTrace("vgId:%d, do fsync, level:%d seq:%d rseq:%d", pWal->vgId, pWal->level, pWal->fsyncSeq, tsWal.seq); - int32_t code = fsync(pWal->fd); + int32_t code = tfFsync(pWal->tfd); if (code != 0) { wError("vgId:%d, file:%s, failed to fsync since %s", pWal->vgId, pWal->name, strerror(code)); } diff --git a/src/wal/src/walWrite.c b/src/wal/src/walWrite.c index 36b3dba165..10e1b4dd61 100644 --- a/src/wal/src/walWrite.c +++ b/src/wal/src/walWrite.c @@ -18,6 +18,7 @@ #include "os.h" #include "taoserror.h" #include "tchecksum.h" +#include "tfile.h" #include "twal.h" #include "walInt.h" @@ -36,8 +37,8 @@ int32_t walRenew(void *handle) { pthread_mutex_lock(&pWal->mutex); - if (pWal->fd >= 0) { - tclose(pWal->fd); + if (tfValid(pWal->tfd)) { + tfClose(pWal->tfd); wDebug("vgId:%d, file:%s, it is closed", pWal->vgId, pWal->name); } @@ -49,9 +50,9 @@ int32_t walRenew(void *handle) { } snprintf(pWal->name, sizeof(pWal->name), "%s/%s%" PRId64, pWal->path, WAL_PREFIX, pWal->fileId); - pWal->fd = open(pWal->name, O_WRONLY | O_CREAT, S_IRWXU | S_IRWXG | S_IRWXO); + pWal->tfd = tfOpenM(pWal->name, O_WRONLY | O_CREAT, S_IRWXU | S_IRWXG | S_IRWXO); - if (pWal->fd < 0) { + if (!tfValid(pWal->tfd)) { code = TAOS_SYSTEM_ERROR(errno); wError("vgId:%d, file:%s, failed to open since %s", pWal->vgId, pWal->name, strerror(errno)); } else { @@ -67,7 +68,7 @@ void walRemoveOneOldFile(void *handle) { SWal *pWal = handle; if (pWal == NULL) return; if (pWal->keep == TAOS_WAL_KEEP) return; - if (pWal->fd <= 0) return; + if (!tfValid(pWal->tfd)) return; pthread_mutex_lock(&pWal->mutex); @@ -113,7 +114,7 @@ int32_t walWrite(void *handle, SWalHead *pHead) { int32_t code = 0; // no wal - if (pWal->fd <= 0) return 0; + if (!tfValid(pWal->tfd)) return 0; if (pWal->level == TAOS_WAL_NOLOG) return 0; if (pHead->version <= pWal->version) return 0; @@ -123,12 +124,12 @@ int32_t walWrite(void *handle, SWalHead *pHead) { pthread_mutex_lock(&pWal->mutex); - if (taosWrite(pWal->fd, pHead, contLen) != contLen) { + if (tfWrite(pWal->tfd, pHead, contLen) != contLen) { code = TAOS_SYSTEM_ERROR(errno); wError("vgId:%d, file:%s, failed to write since %s", pWal->vgId, pWal->name, strerror(errno)); } else { - wTrace("vgId:%d, write wal, fileId:%" PRId64 " fd:%d hver:%" PRId64 " wver:%" PRIu64 " len:%d", pWal->vgId, - pWal->fileId, pWal->fd, pHead->version, pWal->version, pHead->len); + wTrace("vgId:%d, write wal, fileId:%" PRId64 " tfd:%" PRId64 " hver:%" PRId64 " wver:%" PRIu64 " len:%d", pWal->vgId, + pWal->fileId, pWal->tfd, pHead->version, pWal->version, pHead->len); pWal->version = pHead->version; } @@ -141,11 +142,11 @@ int32_t walWrite(void *handle, SWalHead *pHead) { void walFsync(void *handle, bool forceFsync) { SWal *pWal = handle; - if (pWal == NULL || pWal->fd < 0) return; + if (pWal == NULL || !tfValid(pWal->tfd)) return; if (forceFsync || (pWal->level == TAOS_WAL_FSYNC && pWal->fsyncPeriod == 0)) { wTrace("vgId:%d, fileId:%" PRId64 ", do fsync", pWal->vgId, pWal->fileId); - if (fsync(pWal->fd) < 0) { + if (tfFsync(pWal->tfd) < 0) { wError("vgId:%d, fileId:%" PRId64 ", fsync failed since %s", pWal->vgId, pWal->fileId, strerror(errno)); } } @@ -186,8 +187,8 @@ int32_t walRestore(void *handle, void *pVnode, FWalWrite writeFp) { // open the existing WAL file in append mode pWal->fileId = 0; snprintf(pWal->name, sizeof(pWal->name), "%s/%s%" PRId64, pWal->path, WAL_PREFIX, pWal->fileId); - pWal->fd = open(pWal->name, O_WRONLY | O_CREAT | O_APPEND, S_IRWXU | S_IRWXG | S_IRWXO); - if (pWal->fd < 0) { + pWal->tfd = tfOpenM(pWal->name, O_WRONLY | O_CREAT | O_APPEND, S_IRWXU | S_IRWXG | S_IRWXO); + if (!tfValid(pWal->tfd)) { wError("vgId:%d, file:%s, failed to open since %s", pWal->vgId, pWal->name, strerror(errno)); return TAOS_SYSTEM_ERROR(errno); } @@ -211,28 +212,28 @@ int32_t walGetWalFile(void *handle, char *fileName, int64_t *fileId) { code = (*fileId == pWal->fileId) ? 0 : 1; } - wTrace("vgId:%d, get wal file, code:%d curId:%" PRId64 " outId:%" PRId64, pWal->vgId, code, pWal->fileId, *fileId); + wDebug("vgId:%d, get wal file, code:%d curId:%" PRId64 " outId:%" PRId64, pWal->vgId, code, pWal->fileId, *fileId); pthread_mutex_unlock(&(pWal->mutex)); return code; } -static void walFtruncate(SWal *pWal, int32_t fd, int64_t offset) { - taosFtruncate(fd, offset); - fsync(fd); +static void walFtruncate(SWal *pWal, int64_t tfd, int64_t offset) { + tfFtruncate(tfd, offset); + tfFsync(tfd); } -static int32_t walSkipCorruptedRecord(SWal *pWal, SWalHead *pHead, int32_t fd, int64_t *offset) { +static int32_t walSkipCorruptedRecord(SWal *pWal, SWalHead *pHead, int64_t tfd, int64_t *offset) { int64_t pos = *offset; while (1) { pos++; - if (lseek(fd, pos, SEEK_SET) < 0) { + if (tfLseek(tfd, pos, SEEK_SET) < 0) { wError("vgId:%d, failed to seek from corrupted wal file since %s", pWal->vgId, strerror(errno)); return TSDB_CODE_WAL_FILE_CORRUPTED; } - if (taosRead(fd, pHead, sizeof(SWalHead)) <= 0) { + if (tfRead(tfd, pHead, sizeof(SWalHead)) <= 0) { wError("vgId:%d, read to end of corrupted wal file, offset:%" PRId64, pWal->vgId, pos); return TSDB_CODE_WAL_FILE_CORRUPTED; } @@ -259,8 +260,8 @@ static int32_t walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp, ch return TAOS_SYSTEM_ERROR(errno); } - int32_t fd = open(name, O_RDWR); - if (fd < 0) { + int64_t tfd = tfOpen(name, O_RDWR); + if (!tfValid(tfd)) { wError("vgId:%d, file:%s, failed to open for restore since %s", pWal->vgId, name, strerror(errno)); tfree(buffer); return TAOS_SYSTEM_ERROR(errno); @@ -273,7 +274,7 @@ static int32_t walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp, ch SWalHead *pHead = buffer; while (1) { - int32_t ret = taosRead(fd, pHead, sizeof(SWalHead)); + int32_t ret = tfRead(tfd, pHead, sizeof(SWalHead)); if (ret == 0) break; if (ret < 0) { @@ -284,16 +285,16 @@ static int32_t walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp, ch if (ret < sizeof(SWalHead)) { wError("vgId:%d, file:%s, failed to read wal head, ret is %d", pWal->vgId, name, ret); - walFtruncate(pWal, fd, offset); + walFtruncate(pWal, tfd, offset); break; } if (!taosCheckChecksumWhole((uint8_t *)pHead, sizeof(SWalHead))) { wError("vgId:%d, file:%s, wal head cksum is messed up, hver:%" PRIu64 " len:%d offset:%" PRId64, pWal->vgId, name, pHead->version, pHead->len, offset); - code = walSkipCorruptedRecord(pWal, pHead, fd, &offset); + code = walSkipCorruptedRecord(pWal, pHead, tfd, &offset); if (code != TSDB_CODE_SUCCESS) { - walFtruncate(pWal, fd, offset); + walFtruncate(pWal, tfd, offset); break; } } @@ -310,7 +311,7 @@ static int32_t walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp, ch pHead = buffer; } - ret = taosRead(fd, pHead->cont, pHead->len); + ret = tfRead(tfd, pHead->cont, pHead->len); if (ret < 0) { wError("vgId:%d, file:%s, failed to read wal body since %s", pWal->vgId, name, strerror(errno)); code = TAOS_SYSTEM_ERROR(errno); @@ -325,14 +326,14 @@ static int32_t walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp, ch offset = offset + sizeof(SWalHead) + pHead->len; - wTrace("vgId:%d, restore wal, fileId:%" PRId64 " hver:%" PRIu64 " wver:%" PRIu64 " len:%d", pWal->vgId, + wDebug("vgId:%d, restore wal, fileId:%" PRId64 " hver:%" PRIu64 " wver:%" PRIu64 " len:%d", pWal->vgId, fileId, pHead->version, pWal->version, pHead->len); pWal->version = pHead->version; (*writeFp)(pVnode, pHead, TAOS_QTYPE_WAL, NULL); } - tclose(fd); + tfClose(tfd); tfree(buffer); return code; diff --git a/tests/examples/JDBC/JDBCDemo/pom.xml b/tests/examples/JDBC/JDBCDemo/pom.xml index 121a3b5cd6..98f908b77e 100644 --- a/tests/examples/JDBC/JDBCDemo/pom.xml +++ b/tests/examples/JDBC/JDBCDemo/pom.xml @@ -56,7 +56,7 @@ com.taosdata.jdbc taos-jdbcdriver - 2.0.8 + 2.0.12 log4j diff --git a/tests/examples/JDBC/JDBCDemo/readme.md b/tests/examples/JDBC/JDBCDemo/readme.md index 9b8790adad..e348e458fe 100644 --- a/tests/examples/JDBC/JDBCDemo/readme.md +++ b/tests/examples/JDBC/JDBCDemo/readme.md @@ -6,10 +6,24 @@ TDengine's JDBC demo project is organized in a Maven way so that users can easil Make sure you have already installed a tdengine client on your current develop environment. Download the tdengine package on our website: ``https://www.taosdata.com/cn/all-downloads/`` and install the client. +## How to run jdbcChecker +
mvn clean compile exec:java -Dexec.mainClass="com.taosdata.example.JdbcChecker" -Dexec.args="-host localhost"
+ +## How to run jdbcTaosDemo +run command: +
 mvn clean compile exec:java -Dexec.mainClass="com.taosdata.example.jdbcTaosdemo.JdbcTaosdemo"
+and run with your customed args +
mvn clean compile exec:java -Dexec.mainClass="com.taosdata.example.jdbcTaosdemo.JdbcTaosdemo" -Dexec.args="-host localhost"
+ ## Compile the Demo Code and Run It + To compile the demo project, go to the source directory ``TDengine/tests/examples/JDBC/JDBCDemo`` and execute -
mvn clean package assembly:single
+ +
+mvn clean package assembly:single
+
+ The ``pom.xml`` is configured to package all the dependencies into one executable jar file. To run it, go to ``examples/JDBC/JDBCDemo/target`` and execute -
java -jar jdbcChecker-SNAPSHOT-jar-with-dependencies.jar -host localhost
\ No newline at end of file +
java -jar jdbcChecker-SNAPSHOT-jar-with-dependencies.jar -host localhost
diff --git a/tests/examples/JDBC/JDBCDemo/src/main/java/com/taosdata/example/jdbcTaosdemo/domain/JdbcTaosdemoConfig.java b/tests/examples/JDBC/JDBCDemo/src/main/java/com/taosdata/example/jdbcTaosdemo/domain/JdbcTaosdemoConfig.java index 82613037db..36745a9394 100644 --- a/tests/examples/JDBC/JDBCDemo/src/main/java/com/taosdata/example/jdbcTaosdemo/domain/JdbcTaosdemoConfig.java +++ b/tests/examples/JDBC/JDBCDemo/src/main/java/com/taosdata/example/jdbcTaosdemo/domain/JdbcTaosdemoConfig.java @@ -14,9 +14,9 @@ public final class JdbcTaosdemoConfig { //Destination database. Default is 'test' private String dbName = "test"; //keep - private int keep = 3650; + private int keep = 36500; //days - private int days = 10; + private int days = 120; //Super table Name. Default is 'meters' private String stbName = "meters"; diff --git a/tests/examples/JDBC/JDBCDemo/src/main/java/com/taosdata/example/jdbcTaosdemo/task/InsertTableTask.java b/tests/examples/JDBC/JDBCDemo/src/main/java/com/taosdata/example/jdbcTaosdemo/task/InsertTableTask.java index a35628bb58..644de52dd3 100644 --- a/tests/examples/JDBC/JDBCDemo/src/main/java/com/taosdata/example/jdbcTaosdemo/task/InsertTableTask.java +++ b/tests/examples/JDBC/JDBCDemo/src/main/java/com/taosdata/example/jdbcTaosdemo/task/InsertTableTask.java @@ -41,7 +41,7 @@ public class InsertTableTask implements Runnable { long ts = start.toEpochMilli() + (j * timeGap); // insert data into echo table for (int i = startTbIndex; i < startTbIndex + tableNumber; i++) { - String sql = SqlSpeller.insertOneRowSQL(config.getDbName(), config.getTbPrefix(), i + 1, ts); + String sql = SqlSpeller.insertBatchSizeRowsSQL(config.getDbName(), config.getTbPrefix(), i + 1, ts, config.getNumberOfRecordsPerRequest()); logger.info(Thread.currentThread().getName() + ">>> " + sql); Statement statement = connection.createStatement(); statement.execute(sql); diff --git a/tests/pytest/client/alterDatabase.py b/tests/pytest/client/alterDatabase.py index fa397d16c5..8191312cc0 100644 --- a/tests/pytest/client/alterDatabase.py +++ b/tests/pytest/client/alterDatabase.py @@ -32,9 +32,9 @@ class TDTestCase: tdSql.query("show databases") tdSql.checkData(0, 14, 2) - tdSql.execute("alter database db keep 365") + tdSql.execute("alter database db keep 365,365,365") tdSql.query("show databases") - tdSql.checkData(0, 7, "3650,3650,365") + tdSql.checkData(0, 7, "365,365,365") tdSql.execute("alter database db quorum 2") tdSql.query("show databases") diff --git a/tests/pytest/crash_gen/crash_gen_main.py b/tests/pytest/crash_gen/crash_gen_main.py index 8d68457ec8..e2ce4b26fa 100755 --- a/tests/pytest/crash_gen/crash_gen_main.py +++ b/tests/pytest/crash_gen/crash_gen_main.py @@ -352,6 +352,12 @@ class ThreadCoordinator: self._execStats.registerFailure("Broken DB Connection") # continue # don't do that, need to tap all threads at # end, and maybe signal them to stop + if isinstance(err, CrashGenError): # our own transition failure + Logging.info("State transition error") + traceback.print_stack() + transitionFailed = True + self._te = None # Not running any more + self._execStats.registerFailure("State transition error") else: raise # return transitionFailed # Why did we have this??!! @@ -388,12 +394,20 @@ class ThreadCoordinator: self._syncAtBarrier() # For now just cross the barrier Progress.emit(Progress.END_THREAD_STEP) except threading.BrokenBarrierError as err: - Logging.info("Main loop aborted, caused by worker thread(s) time-out") self._execStats.registerFailure("Aborted due to worker thread timeout") - print("\n\nWorker Thread time-out detected, TAOS related threads are:") + Logging.error("\n") + Logging.error("Main loop aborted, caused by worker thread(s) time-out of {} seconds".format( + ThreadCoordinator.WORKER_THREAD_TIMEOUT)) + Logging.error("TAOS related threads blocked at (stack frames top-to-bottom):") ts = ThreadStacks() ts.print(filterInternal=True) workerTimeout = True + + # Enable below for deadlock debugging, using gdb to attach to process + # while True: + # Logging.error("Deadlock detected") + # time.sleep(60.0) + break # At this point, all threads should be pass the overall "barrier" and before the per-thread "gate" @@ -701,7 +715,7 @@ class AnyState: # task.logDebug("Task success found") sCnt += 1 if (sCnt >= 2): - raise RuntimeError( + raise CrashGenError( "Unexpected more than 1 success with task: {}".format(cls)) def assertIfExistThenSuccess(self, tasks, cls): @@ -714,7 +728,7 @@ class AnyState: if task.isSuccess(): sCnt += 1 if (exists and sCnt <= 0): - raise RuntimeError("Unexpected zero success for task type: {}, from tasks: {}" + raise CrashGenError("Unexpected zero success for task type: {}, from tasks: {}" .format(cls, tasks)) def assertNoTask(self, tasks, cls): @@ -727,7 +741,7 @@ class AnyState: for task in tasks: if isinstance(task, cls): if task.isSuccess(): - raise RuntimeError( + raise CrashGenError( "Unexpected successful task: {}".format(cls)) def hasSuccess(self, tasks, cls): @@ -926,8 +940,9 @@ class StateMechine: Logging.debug("[STT] DB_ONLY found, between {} and {}".format(ts, time.time())) return StateDbOnly() + # For sure we have tables, which means we must have the super table. # TODO: are we sure? sTable = self._db.getFixedSuperTable() - if sTable.hasRegTables(dbc, dbName): # no regular tables + if sTable.hasRegTables(dbc): # no regular tables Logging.debug("[STT] SUPER_TABLE_ONLY found, between {} and {}".format(ts, time.time())) return StateSuperTableOnly() else: # has actual tables @@ -1050,9 +1065,8 @@ class Database: def getFixedSuperTableName(cls): return "fs_table" - @classmethod - def getFixedSuperTable(cls) -> TdSuperTable: - return TdSuperTable(cls.getFixedSuperTableName()) + def getFixedSuperTable(self) -> TdSuperTable: + return TdSuperTable(self.getFixedSuperTableName(), self.getName()) # We aim to create a starting time tick, such that, whenever we run our test here once # We should be able to safely create 100,000 records, which will not have any repeated time stamp @@ -1107,6 +1121,11 @@ class Database: # print("Float obtained: {}".format(ret)) return ret + ALL_COLORS = ['red', 'white', 'blue', 'green', 'purple'] + + def getNextColor(self): + return random.choice(self.ALL_COLORS) + class TaskExecutor(): class BoundedList: @@ -1240,7 +1259,7 @@ class Task(): if errno in [ 0x05, # TSDB_CODE_RPC_NOT_READY 0x0B, # Unable to establish connection, more details in TD-1648 - 0x200, # invalid SQL, TODO: re-examine with TD-934 + # 0x200, # invalid SQL, TODO: re-examine with TD-934 0x20F, # query terminated, possibly due to vnoding being dropped, see TD-1776 0x213, # "Disconnected from service", result of "kill connection ???" 0x217, # "db not selected", client side defined error code @@ -1569,8 +1588,8 @@ class TaskCreateSuperTable(StateTransitionTask): sTable = self._db.getFixedSuperTable() # type: TdSuperTable # wt.execSql("use db") # should always be in place - sTable.create(wt.getDbConn(), self._db.getName(), - {'ts':'timestamp', 'speed':'int'}, {'b':'binary(200)', 'f':'float'}, + sTable.create(wt.getDbConn(), + {'ts':'TIMESTAMP', 'speed':'INT', 'color':'BINARY(16)'}, {'b':'BINARY(200)', 'f':'FLOAT'}, dropIfExists = True ) # self.execWtSql(wt,"create table db.{} (ts timestamp, speed int) tags (b binary(200), f float) ".format(tblName)) @@ -1579,30 +1598,33 @@ class TaskCreateSuperTable(StateTransitionTask): class TdSuperTable: - def __init__(self, stName): + def __init__(self, stName, dbName): self._stName = stName + self._dbName = dbName def getName(self): return self._stName - def drop(self, dbc, dbName, skipCheck = False): - if self.exists(dbc, dbName) : # if myself exists + def drop(self, dbc, skipCheck = False): + dbName = self._dbName + if self.exists(dbc) : # if myself exists fullTableName = dbName + '.' + self._stName dbc.execute("DROP TABLE {}".format(fullTableName)) else: if not skipCheck: raise CrashGenError("Cannot drop non-existant super table: {}".format(self._stName)) - def exists(self, dbc, dbName): - dbc.execute("USE " + dbName) + def exists(self, dbc): + dbc.execute("USE " + self._dbName) return dbc.existsSuperTable(self._stName) # TODO: odd semantic, create() method is usually static? - def create(self, dbc, dbName, cols: dict, tags: dict, + def create(self, dbc, cols: dict, tags: dict, dropIfExists = False ): - '''Creating a super table''' + + dbName = self._dbName dbc.execute("USE " + dbName) fullTableName = dbName + '.' + self._stName if dbc.existsSuperTable(self._stName): @@ -1623,7 +1645,8 @@ class TdSuperTable: ) dbc.execute(sql) - def getRegTables(self, dbc: DbConn, dbName: str): + def getRegTables(self, dbc: DbConn): + dbName = self._dbName try: dbc.query("select TBNAME from {}.{}".format(dbName, self._stName)) # TODO: analyze result set later except taos.error.ProgrammingError as err: @@ -1634,10 +1657,11 @@ class TdSuperTable: qr = dbc.getQueryResult() return [v[0] for v in qr] # list transformation, ref: https://stackoverflow.com/questions/643823/python-list-transformation - def hasRegTables(self, dbc: DbConn, dbName: str): - return dbc.query("SELECT * FROM {}.{}".format(dbName, self._stName)) > 0 + def hasRegTables(self, dbc: DbConn): + return dbc.query("SELECT * FROM {}.{}".format(self._dbName, self._stName)) > 0 - def ensureTable(self, task: Task, dbc: DbConn, dbName: str, regTableName: str): + def ensureTable(self, task: Task, dbc: DbConn, regTableName: str): + dbName = self._dbName sql = "select tbname from {}.{} where tbname in ('{}')".format(dbName, self._stName, regTableName) if dbc.query(sql) >= 1 : # reg table exists already return @@ -1650,15 +1674,15 @@ class TdSuperTable: # print("(" + fullTableName[-3:] + ")", end="", flush=True) try: sql = "CREATE TABLE {} USING {}.{} tags ({})".format( - fullTableName, dbName, self._stName, self._getTagStrForSql(dbc, dbName) + fullTableName, dbName, self._stName, self._getTagStrForSql(dbc) ) dbc.execute(sql) finally: if task is not None: task.unlockTable(fullTableName) # no matter what - def _getTagStrForSql(self, dbc, dbName: str) : - tags = self._getTags(dbc, dbName) + def _getTagStrForSql(self, dbc) : + tags = self._getTags(dbc) tagStrs = [] for tagName in tags: tagType = tags[tagName] @@ -1672,36 +1696,86 @@ class TdSuperTable: raise RuntimeError("Unexpected tag type: {}".format(tagType)) return ", ".join(tagStrs) - def _getTags(self, dbc, dbName) -> dict: - dbc.query("DESCRIBE {}.{}".format(dbName, self._stName)) + def _getTags(self, dbc) -> dict: + dbc.query("DESCRIBE {}.{}".format(self._dbName, self._stName)) stCols = dbc.getQueryResult() # print(stCols) ret = {row[0]:row[1] for row in stCols if row[3]=='TAG'} # name:type # print("Tags retrieved: {}".format(ret)) return ret - def addTag(self, dbc, dbName, tagName, tagType): - if tagName in self._getTags(dbc, dbName): # already + def addTag(self, dbc, tagName, tagType): + if tagName in self._getTags(dbc): # already return # sTable.addTag("extraTag", "int") - sql = "alter table {}.{} add tag {} {}".format(dbName, self._stName, tagName, tagType) + sql = "alter table {}.{} add tag {} {}".format( + self._dbName, self._stName, tagName, tagType) dbc.execute(sql) - def dropTag(self, dbc, dbName, tagName): - if not tagName in self._getTags(dbc, dbName): # don't have this tag + def dropTag(self, dbc, tagName): + if not tagName in self._getTags(dbc): # don't have this tag return - sql = "alter table {}.{} drop tag {}".format(dbName, self._stName, tagName) + sql = "alter table {}.{} drop tag {}".format(self._dbName, self._stName, tagName) dbc.execute(sql) - def changeTag(self, dbc, dbName, oldTag, newTag): - tags = self._getTags(dbc, dbName) + def changeTag(self, dbc, oldTag, newTag): + tags = self._getTags(dbc) if not oldTag in tags: # don't have this tag return if newTag in tags: # already have this tag return - sql = "alter table {}.{} change tag {} {}".format(dbName, self._stName, oldTag, newTag) + sql = "alter table {}.{} change tag {} {}".format(self._dbName, self._stName, oldTag, newTag) dbc.execute(sql) + def generateQueries(self, dbc: DbConn) -> List[SqlQuery]: + ''' Generate queries to test/exercise this super table ''' + ret = [] # type: List[SqlQuery] + + for rTbName in self.getRegTables(dbc): # regular tables + + filterExpr = Dice.choice([ # TODO: add various kind of WHERE conditions + None + ]) + + # Run the query against the regular table first + doAggr = (Dice.throw(2) == 0) # 1 in 2 chance + if not doAggr: # don't do aggregate query, just simple one + ret.append(SqlQuery( # reg table + "select {} from {}.{}".format('*', self._dbName, rTbName))) + ret.append(SqlQuery( # super table + "select {} from {}.{}".format('*', self._dbName, self.getName()))) + else: # Aggregate query + aggExpr = Dice.choice([ + 'count(*)', + 'avg(speed)', + # 'twa(speed)', # TODO: this one REQUIRES a where statement, not reasonable + 'sum(speed)', + 'stddev(speed)', + # SELECTOR functions + 'min(speed)', + 'max(speed)', + 'first(speed)', + 'last(speed)', + 'top(speed, 50)', # TODO: not supported? + 'bottom(speed, 50)', # TODO: not supported? + 'apercentile(speed, 10)', # TODO: TD-1316 + 'last_row(speed)', + # Transformation Functions + # 'diff(speed)', # TODO: no supported?! + 'spread(speed)' + ]) # TODO: add more from 'top' + + + if aggExpr not in ['stddev(speed)']: #TODO: STDDEV not valid for super tables?! + sql = "select {} from {}.{}".format(aggExpr, self._dbName, self.getName()) + if Dice.throw(3) == 0: # 1 in X chance + sql = sql + ' GROUP BY color' + Progress.emit(Progress.QUERY_GROUP_BY) + # Logging.info("Executing GROUP-BY query: " + sql) + ret.append(SqlQuery(sql)) + + return ret + class TaskReadData(StateTransitionTask): @classmethod def getEndState(cls): @@ -1716,10 +1790,8 @@ class TaskReadData(StateTransitionTask): # return True # always # return gSvcMgr.isActive() # only if it's running TODO: race condition here - def _executeInternal(self, te: TaskExecutor, wt: WorkerThread): - sTable = self._db.getFixedSuperTable() - - # 1 in 5 chance, simulate a broken connection, only if service stable (not restarting) + def _reconnectIfNeeded(self, wt): + # 1 in 20 chance, simulate a broken connection, only if service stable (not restarting) if random.randrange(20)==0: # and self._canRestartService(): # TODO: break connection in all situations # Logging.info("Attempting to reconnect to server") # TODO: change to DEBUG Progress.emit(Progress.SERVICE_RECONNECT_START) @@ -1744,43 +1816,36 @@ class TaskReadData(StateTransitionTask): return # TODO: fix server restart status race condtion + def _executeInternal(self, te: TaskExecutor, wt: WorkerThread): + self._reconnectIfNeeded(wt) + dbc = wt.getDbConn() - dbName = self._db.getName() - for rTbName in sTable.getRegTables(dbc, dbName): # regular tables - aggExpr = Dice.choice([ - '*', - 'count(*)', - 'avg(speed)', - # 'twa(speed)', # TODO: this one REQUIRES a where statement, not reasonable - 'sum(speed)', - 'stddev(speed)', - # SELECTOR functions - 'min(speed)', - 'max(speed)', - 'first(speed)', - 'last(speed)', - 'top(speed, 50)', # TODO: not supported? - 'bottom(speed, 50)', # TODO: not supported? - 'apercentile(speed, 10)', # TODO: TD-1316 - 'last_row(speed)', - # Transformation Functions - # 'diff(speed)', # TODO: no supported?! - 'spread(speed)' - ]) # TODO: add more from 'top' - filterExpr = Dice.choice([ # TODO: add various kind of WHERE conditions - None - ]) + sTable = self._db.getFixedSuperTable() + + for q in sTable.generateQueries(dbc): # regular tables try: - # Run the query against the regular table first - dbc.execute("select {} from {}.{}".format(aggExpr, dbName, rTbName)) - # Then run it against the super table - if aggExpr not in ['stddev(speed)']: #TODO: STDDEV not valid for super tables?! - dbc.execute("select {} from {}.{}".format(aggExpr, dbName, sTable.getName())) + sql = q.getSql() + # if 'GROUP BY' in sql: + # Logging.info("Executing GROUP-BY query: " + sql) + dbc.execute(sql) except taos.error.ProgrammingError as err: errno2 = Helper.convertErrno(err.errno) Logging.debug("[=] Read Failure: errno=0x{:X}, msg: {}, SQL: {}".format(errno2, err, dbc.getLastSql())) raise +class SqlQuery: + @classmethod + def buildRandom(cls, db: Database): + '''Build a random query against a certain database''' + + dbName = db.getName() + + def __init__(self, sql:str = None): + self._sql = sql + + def getSql(self): + return self._sql + class TaskDropSuperTable(StateTransitionTask): @classmethod def getEndState(cls): @@ -1837,19 +1902,18 @@ class TaskAlterTags(StateTransitionTask): # tblName = self._dbManager.getFixedSuperTableName() dbc = wt.getDbConn() sTable = self._db.getFixedSuperTable() - dbName = self._db.getName() dice = Dice.throw(4) if dice == 0: - sTable.addTag(dbc, dbName, "extraTag", "int") + sTable.addTag(dbc, "extraTag", "int") # sql = "alter table db.{} add tag extraTag int".format(tblName) elif dice == 1: - sTable.dropTag(dbc, dbName, "extraTag") + sTable.dropTag(dbc, "extraTag") # sql = "alter table db.{} drop tag extraTag".format(tblName) elif dice == 2: - sTable.dropTag(dbc, dbName, "newTag") + sTable.dropTag(dbc, "newTag") # sql = "alter table db.{} drop tag newTag".format(tblName) else: # dice == 3 - sTable.changeTag(dbc, dbName, "extraTag", "newTag") + sTable.changeTag(dbc, "extraTag", "newTag") # sql = "alter table db.{} change tag extraTag newTag".format(tblName) class TaskRestartService(StateTransitionTask): @@ -1920,15 +1984,17 @@ class TaskAddData(StateTransitionTask): for j in range(numRecords): # number of records per table nextInt = db.getNextInt() nextTick = db.getNextTick() - sql += "('{}', {});".format(nextTick, nextInt) + nextColor = db.getNextColor() + sql += "('{}', {}, '{}');".format(nextTick, nextInt, nextColor) dbc.execute(sql) - def _addData(self, db, dbc, regTableName, te: TaskExecutor): # implied: NOT in batches + def _addData(self, db: Database, dbc, regTableName, te: TaskExecutor): # implied: NOT in batches numRecords = self.LARGE_NUMBER_OF_RECORDS if gConfig.larger_data else self.SMALL_NUMBER_OF_RECORDS for j in range(numRecords): # number of records per table nextInt = db.getNextInt() nextTick = db.getNextTick() + nextColor = db.getNextColor() if gConfig.record_ops: self.prepToRecordOps() self.fAddLogReady.write("Ready to write {} to {}\n".format(nextInt, regTableName)) @@ -1942,11 +2008,11 @@ class TaskAddData(StateTransitionTask): # print("_w" + str(nextInt % 100), end="", flush=True) # Trace what was written try: - sql = "insert into {} values ('{}', {});".format( # removed: tags ('{}', {}) + sql = "insert into {} values ('{}', {}, '{}');".format( # removed: tags ('{}', {}) fullTableName, # ds.getFixedSuperTableName(), # ds.getNextBinary(), ds.getNextFloat(), - nextTick, nextInt) + nextTick, nextInt, nextColor) dbc.execute(sql) except: # Any exception at all if gConfig.verify_data: @@ -1964,10 +2030,10 @@ class TaskAddData(StateTransitionTask): .format(nextInt, readBack), 0x999) except taos.error.ProgrammingError as err: errno = Helper.convertErrno(err.errno) - if errno in [0x991, 0x992] : # not a single result + if errno in [CrashGenError.INVALID_EMPTY_RESULT, CrashGenError.INVALID_MULTIPLE_RESULT] : # not a single result raise taos.error.ProgrammingError( "Failed to read back same data for tick: {}, wrote: {}, read: {}" - .format(nextTick, nextInt, "Empty Result" if errno==0x991 else "Multiple Result"), + .format(nextTick, nextInt, "Empty Result" if errno == CrashGenError.INVALID_EMPTY_RESULT else "Multiple Result"), errno) elif errno in [0x218, 0x362]: # table doesn't exist # do nothing @@ -2000,11 +2066,12 @@ class TaskAddData(StateTransitionTask): else: self.activeTable.add(i) # marking it active + dbName = db.getName() sTable = db.getFixedSuperTable() regTableName = self.getRegTableName(i) # "db.reg_table_{}".format(i) - fullTableName = db.getName() + '.' + regTableName + fullTableName = dbName + '.' + regTableName # self._lockTable(fullTableName) # "create table" below. Stop it if the table is "locked" - sTable.ensureTable(self, wt.getDbConn(), db.getName(), regTableName) # Ensure the table exists + sTable.ensureTable(self, wt.getDbConn(), regTableName) # Ensure the table exists # self._unlockTable(fullTableName) if Dice.throw(1) == 0: # 1 in 2 chance @@ -2024,7 +2091,7 @@ class ThreadStacks: # stack info for all threads self._allStacks[th.native_id] = stack def print(self, filteredEndName = None, filterInternal = False): - for thNid, stack in self._allStacks.items(): # for each thread + for thNid, stack in self._allStacks.items(): # for each thread, stack frames top to bottom lastFrame = stack[-1] if filteredEndName: # we need to filter out stacks that match this name if lastFrame.name == filteredEndName : # end did not match @@ -2036,9 +2103,9 @@ class ThreadStacks: # stack info for all threads '__init__']: # the thread that extracted the stack continue # ignore # Now print - print("\n<----- Thread Info for LWP/ID: {} (Execution stopped at Bottom Frame) <-----".format(thNid)) + print("\n<----- Thread Info for LWP/ID: {} (most recent call last) <-----".format(thNid)) stackFrame = 0 - for frame in stack: + for frame in stack: # was using: reversed(stack) # print(frame) print("[{sf}] File {filename}, line {lineno}, in {name}".format( sf=stackFrame, filename=frame.filename, lineno=frame.lineno, name=frame.name)) diff --git a/tests/pytest/crash_gen/db.py b/tests/pytest/crash_gen/db.py index 2a4b362f82..855e18be55 100644 --- a/tests/pytest/crash_gen/db.py +++ b/tests/pytest/crash_gen/db.py @@ -78,7 +78,7 @@ class DbConn: if nRows != 1: raise taos.error.ProgrammingError( "Unexpected result for query: {}, rows = {}".format(sql, nRows), - (0x991 if nRows==0 else 0x992) + (CrashGenError.INVALID_EMPTY_RESULT if nRows==0 else CrashGenError.INVALID_MULTIPLE_RESULT) ) if self.getResultRows() != 1 or self.getResultCols() != 1: raise RuntimeError("Unexpected result set for query: {}".format(sql)) @@ -349,7 +349,8 @@ class DbConnNative(DbConn): def execute(self, sql): if (not self.isOpen): - raise RuntimeError("Cannot execute database commands until connection is open") + raise CrashGenError( + "Cannot exec SQL unless db connection is open", CrashGenError.DB_CONNECTION_NOT_OPEN) Logging.debug("[SQL] Executing SQL: {}".format(sql)) self._lastSql = sql nRows = self._tdSql.execute(sql) @@ -360,8 +361,8 @@ class DbConnNative(DbConn): def query(self, sql): # return rows affected if (not self.isOpen): - raise RuntimeError( - "Cannot query database until connection is open") + raise CrashGenError( + "Cannot query database until connection is open, restarting?", CrashGenError.DB_CONNECTION_NOT_OPEN) Logging.debug("[SQL] Executing SQL: {}".format(sql)) self._lastSql = sql nRows = self._tdSql.query(sql) diff --git a/tests/pytest/crash_gen/misc.py b/tests/pytest/crash_gen/misc.py index 2d2ce99d95..a374ed943b 100644 --- a/tests/pytest/crash_gen/misc.py +++ b/tests/pytest/crash_gen/misc.py @@ -3,14 +3,20 @@ import random import logging import os +import taos -class CrashGenError(Exception): - def __init__(self, msg=None, errno=None): - self.msg = msg - self.errno = errno - def __str__(self): - return self.msg +class CrashGenError(taos.error.ProgrammingError): + INVALID_EMPTY_RESULT = 0x991 + INVALID_MULTIPLE_RESULT = 0x992 + DB_CONNECTION_NOT_OPEN = 0x993 + # def __init__(self, msg=None, errno=None): + # self.msg = msg + # self.errno = errno + + # def __str__(self): + # return self.msg + pass class LoggingFilter(logging.Filter): @@ -168,6 +174,7 @@ class Progress: SERVICE_RECONNECT_FAILURE = 6 SERVICE_START_NAP = 7 CREATE_TABLE_ATTEMPT = 8 + QUERY_GROUP_BY = 9 tokens = { STEP_BOUNDARY: '.', @@ -178,7 +185,8 @@ class Progress: SERVICE_RECONNECT_SUCCESS: '.r>', SERVICE_RECONNECT_FAILURE: '.xr>', SERVICE_START_NAP: '_zz', - CREATE_TABLE_ATTEMPT: '_c', + CREATE_TABLE_ATTEMPT: 'c', + QUERY_GROUP_BY: 'g', } @classmethod diff --git a/tests/pytest/crash_gen/service_manager.py b/tests/pytest/crash_gen/service_manager.py index d249abc439..ae6f8d5d3a 100644 --- a/tests/pytest/crash_gen/service_manager.py +++ b/tests/pytest/crash_gen/service_manager.py @@ -51,10 +51,12 @@ class TdeInstance(): def prepareGcovEnv(cls, env): # Ref: https://gcc.gnu.org/onlinedocs/gcc/Cross-profiling.html bPath = cls._getBuildPath() # build PATH - numSegments = len(bPath.split('/')) - 1 # "/x/TDengine/build" should yield 3 - numSegments = numSegments - 1 # DEBUG only - env['GCOV_PREFIX'] = bPath + '/svc_gcov' + numSegments = len(bPath.split('/')) # "/x/TDengine/build" should yield 3 + # numSegments += 2 # cover "/src" after build + # numSegments = numSegments - 1 # DEBUG only + env['GCOV_PREFIX'] = bPath + '/src_s' # Server side source env['GCOV_PREFIX_STRIP'] = str(numSegments) # Strip every element, plus, ENV needs strings + # VERY VERY important note: GCOV data collection NOT effective upon SIG_KILL Logging.info("Preparing GCOV environement to strip {} elements and use path: {}".format( numSegments, env['GCOV_PREFIX'] )) @@ -258,14 +260,15 @@ class TdeSubProcess: TdeInstance.prepareGcovEnv(myEnv) # print(myEnv) - # print(myEnv.items()) + # print("Starting TDengine with env: ", myEnv.items()) # print("Starting TDengine via Shell: {}".format(cmdLineStr)) useShell = True self.subProcess = subprocess.Popen( - ' '.join(cmdLine) if useShell else cmdLine, - shell=useShell, - # svcCmdSingle, shell=True, # capture core dump? + # ' '.join(cmdLine) if useShell else cmdLine, + # shell=useShell, + ' '.join(cmdLine), + shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, # bufsize=1, # not supported in binary mode @@ -273,7 +276,8 @@ class TdeSubProcess: env=myEnv ) # had text=True, which interferred with reading EOF - STOP_SIGNAL = signal.SIGKILL # What signal to use (in kill) to stop a taosd process? + STOP_SIGNAL = signal.SIGKILL # signal.SIGKILL/SIGINT # What signal to use (in kill) to stop a taosd process? + SIG_KILL_RETCODE = 137 # ref: https://stackoverflow.com/questions/43268156/process-finished-with-exit-code-137-in-pycharm def stop(self): """ @@ -320,8 +324,12 @@ class TdeSubProcess: retCode = self.subProcess.returncode # should always be there # May throw subprocess.TimeoutExpired exception above, therefore # The process is guranteed to have ended by now - self.subProcess = None - if retCode != 0: # != (- signal.SIGINT): + self.subProcess = None + if retCode == self.SIG_KILL_RETCODE: + Logging.info("TSP.stop(): sub proc KILLED, as expected") + elif retCode == (- self.STOP_SIGNAL): + Logging.info("TSP.stop(), sub process STOPPED, as expected") + elif retCode != 0: # != (- signal.SIGINT): Logging.error("TSP.stop(): Failed to stop sub proc properly w/ SIG {}, retCode={}".format( self.STOP_SIGNAL, retCode)) else: diff --git a/tests/pytest/fulltest.sh b/tests/pytest/fulltest.sh index 0e3b482e3d..042fd826e8 100755 --- a/tests/pytest/fulltest.sh +++ b/tests/pytest/fulltest.sh @@ -19,6 +19,7 @@ python3 ./test.py -f insert/randomNullCommit.py python3 insert/retentionpolicy.py python3 ./test.py -f insert/alterTableAndInsert.py python3 ./test.py -f insert/insertIntoTwoTables.py +python3 ./test.py -f insert/before_1970.py python3 ./test.py -f table/alter_wal0.py python3 ./test.py -f table/column_name.py @@ -228,6 +229,7 @@ python3 ./test.py -f update/merge_commit_data2.py python3 ./test.py -f update/merge_commit_data2_update0.py python3 ./test.py -f update/merge_commit_last-0.py python3 ./test.py -f update/merge_commit_last.py +python3 ./test.py -f update/bug_td2279.py # wal python3 ./test.py -f wal/addOldWalTest.py \ No newline at end of file diff --git a/tests/pytest/hivemq-extension-test.py b/tests/pytest/hivemq-extension-test.py new file mode 100644 index 0000000000..3d0b1ef83f --- /dev/null +++ b/tests/pytest/hivemq-extension-test.py @@ -0,0 +1,249 @@ +#!/usr/bin/python3 +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### +# install pip +# pip install src/connector/python/linux/python2/ +import sys +import os +import os.path +import time +import glob +import getopt +import subprocess +from shutil import which +from multipledispatch import dispatch + + +@dispatch(str, str) +def v_print(msg: str, arg: str): + if verbose: + print(msg % arg) + + +@dispatch(str, int) +def v_print(msg: str, arg: int): + if verbose: + print(msg % int(arg)) + + +@dispatch(str, int, int) +def v_print(msg: str, arg1: int, arg2: int): + if verbose: + print(msg % (int(arg1), int(arg2))) + + +@dispatch(str, int, int, int) +def v_print(msg: str, arg1: int, arg2: int, arg3: int): + if verbose: + print(msg % (int(arg1), int(arg2), int(arg3))) + + +@dispatch(str, int, int, int, int) +def v_print(msg: str, arg1: int, arg2: int, arg3: int, arg4: int): + if verbose: + print(msg % (int(arg1), int(arg2), int(arg3), int(arg4))) + + +def isHiveMQInstalled(): + v_print("%s", "Check if HiveMQ installed") + defaultHiveMQPath = "/opt/hivemq*" + hiveMQDir = glob.glob(defaultHiveMQPath) + if (len(hiveMQDir) == 0): + v_print("%s", "ERROR: HiveMQ NOT found") + return False + else: + v_print("HiveMQ installed at %s", hiveMQDir[0]) + return True + + +def isMosquittoInstalled(): + v_print("%s", "Check if mosquitto installed") + if not which('mosquitto_pub'): + v_print("%s", "ERROR: mosquitto is NOT installed") + return False + else: + return True + + +def installExtension(): + currentDir = os.getcwd() + extDir = 'src/connector/hivemq-tdengine-extension' + os.chdir('../..') + os.system('git submodule update --init -- %s' % extDir) + os.chdir(extDir) + v_print("%s", "build extension..") + os.system('mvn clean package') + + tdExtensionZip = 'target/hivemq-tdengine-extension*.zip' + tdExtensionZipDir = glob.glob(tdExtensionZip) + + defaultHiveMQPath = "/opt/hivemq*" + hiveMQDir = glob.glob(defaultHiveMQPath) + extPath = hiveMQDir[0] + '/extensions' + + tdExtDir = glob.glob(extPath + '/hivemq-tdengine-extension') + if len(tdExtDir): + v_print("%s", "delete exist extension..") + os.system('rm -rf %s' % tdExtDir[0]) + + v_print("%s", "unzip extension..") + os.system('unzip %s -d %s' % (tdExtensionZipDir[0], extPath)) + + os.chdir(currentDir) + + +def stopProgram(prog: str): + psCmd = "ps ax|grep -w %s| grep -v grep | awk '{print $1}'" % prog + + processID = subprocess.check_output( + psCmd, shell=True).decode("utf-8") + + while(processID): + killCmd = "kill -TERM %s > /dev/null 2>&1" % processID + os.system(killCmd) + time.sleep(1) + processID = subprocess.check_output( + psCmd, shell=True).decode("utf-8") + pass + + +def stopHiveMQ(): + stopProgram("hivemq.jar") + v_print("%s", "ERROR: HiveMQ is NOT running") + + +def checkProgramRunning(prog: str): + psCmd = "ps ax|grep -w %s| grep -v grep | awk '{print $1}'" % prog + + processID = subprocess.check_output( + psCmd, shell=True).decode("utf-8") + + if not processID: + v_print("ERROR: %s is NOT running", prog) + return False + else: + return True + + +def runHiveMQ(): + defaultHiveMQPath = "/opt/hivemq*" + hiveMQDir = glob.glob(defaultHiveMQPath) + runPath = hiveMQDir[0] + '/bin/run.sh > /dev/null &' + os.system(runPath) + time.sleep(10) + + if not checkProgramRunning("hivemq.jar"): + return False + else: + v_print("%s", "hivemq is running") + return True + + +def getBuildPath(): + selfPath = os.path.dirname(os.path.realpath(__file__)) + + binPath = '' + + if ("community" in selfPath): + projPath = selfPath[:selfPath.find("community")] + else: + projPath = selfPath[:selfPath.find("tests")] + + for root, dirs, files in os.walk(projPath): + if ("taosd" in files): + rootRealPath = os.path.dirname(os.path.realpath(root)) + if ("packaging" not in rootRealPath): + binPath = root[:len(root) - len("/build/bin")] + break + return binPath + + +def runTDengine(): + stopProgram("taosd") + + buildPath = getBuildPath() + + if (buildPath == ""): + v_print("%s", "ERROR: taosd NOT found!") + sys.exit(1) + else: + v_print("%s", "taosd found in %s" % buildPath) + + binPath = buildPath + "/build/bin/taosd" + + os.system('%s > /dev/null &' % binPath) + time.sleep(10) + if not checkProgramRunning("taosd"): + return False + else: + v_print("%s", "TDengine is running") + return True + + + +def reCreateDatabase(): + buildPath = getBuildPath() + binPath = buildPath + "/build/bin/taos" + + os.system('%s -s "DROP DATABASE IF EXISTS hivemq"' % binPath) + os.system('%s -s "CREATE DATABASE IF NOT EXISTS hivemq"' % binPath) + + +def sendMqttMsg(topic: str, payload: str): + testStr = 'mosquitto_pub -t %s -m "%s"' % (topic, payload) + os.system(testStr) + time.sleep(3) + + +def checkTDengineData(topic: str, payload: str): + buildPath = getBuildPath() + binPath = buildPath + "/build/bin/taos" + + output = subprocess.check_output( + '%s -s "select * from hivemq.mqtt_payload"' % + binPath, shell=True).decode('utf-8') + if (topic in output) and (payload in output): + v_print("%s", output) + return True + else: + v_print("%s", "ERROR: mqtt topic or payload NOT found") + return False + + +if __name__ == "__main__": + verbose = True + testTopic = 'test' + testPayload = 'hello world' + + if not isHiveMQInstalled(): + sys.exit(1) + + if not isMosquittoInstalled(): + sys.exit(1) + + stopHiveMQ() + + installExtension() + + if not runTDengine(): + sys.exit(1) + + reCreateDatabase() + + if not runHiveMQ(): + sys.exit(1) + + sendMqttMsg(testTopic, testPayload) + + if not checkTDengineData(testTopic, testPayload): + sys.exit(1) + + sys.exit(0) diff --git a/tests/pytest/insert/insertDemo.py b/tests/pytest/insert/insertDemo.py new file mode 100644 index 0000000000..d18206e7a4 --- /dev/null +++ b/tests/pytest/insert/insertDemo.py @@ -0,0 +1,47 @@ +import taos +import datetime +import random +import multiprocessing + +def taos_excute(table, connect_host): + conn = taos.connect(host=connect_host, user="root", password="taosdata", config="/etc/taos", database='test') + cursor = conn.cursor() + for i in range(1000000): + pk = random.randint(100001, 300000) + time_now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3] + col1 = random.randint(1, 10000) + col2 = random.randint(1, 10000) + col3 = random.randint(1, 10000) + col4 = random.randint(1, 10000) + col5 = random.randint(1, 10000) + col6 = random.randint(1, 10000) + sql = f"INSERT INTO {table}_{pk} USING {table} TAGS ({pk}) VALUES ('{time_now}', {col1}, {col2}, {col3}, {col4}, {col5}, {col6})" + cursor.execute(sql) + cursor.close() + conn.close() + +def taos_init(table, connect_host, pk): + conn = taos.connect(host=connect_host, user="root", password="taosdata", config="/etc/taos", database='test') + cursor = conn.cursor() + sql = f"CREATE TABLE {table}_{pk} USING {table} TAGS ({pk})" + cursor.execute(sql) + cursor.close() + conn.close() + +print("init time:", datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) + +connect_list = ["node1", "node2", "node3", "node4", "node5"] +pool = multiprocessing.Pool(processes=108) + +for pk in range(100001, 300000): + pool.apply_async(func=taos_init, args=("test", connect_list[pk % 5], pk, )) + +print("start time:", datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) + +for i in range(10000): + pool.apply_async(func=taos_excute, args=("test", connect_list[i % 5],)) + +pool.close() +pool.join() + +print("end time:", datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) \ No newline at end of file diff --git a/tests/pytest/insert/insertFromCSVOurofOrder.py b/tests/pytest/insert/insertFromCSVOurofOrder.py new file mode 100644 index 0000000000..d4de85b7e9 --- /dev/null +++ b/tests/pytest/insert/insertFromCSVOurofOrder.py @@ -0,0 +1,71 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import sys +import taos +from util.log import tdLog +from util.cases import tdCases +from util.sql import tdSql +import time +import datetime +import csv +import random +import pandas as pd + + +class TDTestCase: + def init(self, conn, logSql): + tdLog.debug("start to execute %s" % __file__) + tdSql.init(conn.cursor(), logSql) + + self.ts = 1500074556514 + + def writeCSV(self): + with open('test3.csv','w', encoding='utf-8', newline='') as csvFile: + writer = csv.writer(csvFile, dialect='excel') + for i in range(1000000): + newTimestamp = self.ts + random.randint(10000000, 10000000000) + random.randint(1000, 10000000) + random.randint(1, 1000) + d = datetime.datetime.fromtimestamp(newTimestamp / 1000) + dt = str(d.strftime("%Y-%m-%d %H:%M:%S.%f")) + writer.writerow(["'%s'" % dt, random.randint(1, 100), random.uniform(1, 100), random.randint(1, 100), random.randint(1, 100)]) + + def removCSVHeader(self): + data = pd.read_csv("ordered.csv") + data = data.drop([0]) + data.to_csv("ordered.csv", header = False, index = False) + + def run(self): + tdSql.prepare() + + tdSql.execute("create table t1(ts timestamp, c1 int, c2 float, c3 int, c4 int)") + startTime = time.time() + tdSql.execute("insert into t1 file 'outoforder.csv'") + duration = time.time() - startTime + print("Out of Order - Insert time: %d" % duration) + tdSql.query("select count(*) from t1") + rows = tdSql.getData(0, 0) + + tdSql.execute("create table t2(ts timestamp, c1 int, c2 float, c3 int, c4 int)") + startTime = time.time() + tdSql.execute("insert into t2 file 'ordered.csv'") + duration = time.time() - startTime + print("Ordered - Insert time: %d" % duration) + tdSql.query("select count(*) from t2") + tdSql.checkData(0,0, rows) + + def stop(self): + tdSql.close() + tdLog.success("%s successfully executed" % __file__) + +tdCases.addWindows(__file__, TDTestCase()) +tdCases.addLinux(__file__, TDTestCase()) \ No newline at end of file diff --git a/tests/pytest/insert/restfulInsert.py b/tests/pytest/insert/restfulInsert.py index e3a963f1d4..da797f788f 100644 --- a/tests/pytest/insert/restfulInsert.py +++ b/tests/pytest/insert/restfulInsert.py @@ -18,7 +18,7 @@ import time import argparse class RestfulInsert: - def __init__(self, host, startTimestamp, dbname, threads, tables, records, batchSize, tbNamePerfix, outOfOrder): + def __init__(self, host, startTimestamp, dbname, threads, tables, records, batchSize, tbNamePerfix, outOfOrder,tablePerbatch): self.header = {'Authorization': 'Basic cm9vdDp0YW9zZGF0YQ=='} self.url = "http://%s:6041/rest/sql" % host self.ts = startTimestamp @@ -29,32 +29,71 @@ class RestfulInsert: self.batchSize = batchSize self.tableNamePerfix = tbNamePerfix self.outOfOrder = outOfOrder + self.tablePerbatch = tablePerbatch def createTable(self, threadID): - tablesPerThread = int (self.numOfTables / self.numOfThreads) - print("create table %d to %d" % (tablesPerThread * threadID, tablesPerThread * (threadID + 1) - 1)) - for i in range(tablesPerThread): + tablesPerThread = int (self.numOfTables / self.numOfThreads) + loop = tablesPerThread if threadID != self.numOfThreads - 1 else self.numOfTables - tablesPerThread * threadID + print("create table %d to %d" % (tablesPerThread * threadID, tablesPerThread * threadID + loop - 1)) + for i in range(loop): tableID = threadID * tablesPerThread + if tableID + i >= self.numOfTables : break name = 'beijing' if tableID % 2 == 0 else 'shanghai' data = "create table if not exists %s.%s%d using %s.meters tags(%d, '%s')" % (self.dbname, self.tableNamePerfix, tableID + i, self.dbname, tableID + i, name) response = requests.post(self.url, data, headers = self.header) if response.status_code != 200: print(response.content) + + def insertData(self, threadID): print("thread %d started" % threadID) - tablesPerThread = int (self.numOfTables / self.numOfThreads) - for i in range(tablesPerThread): - tableID = i + threadID * tablesPerThread - start = self.ts - for j in range(int(self.recordsPerTable / self.batchSize)): - data = "insert into %s.%s%d values" % (self.dbname, self.tableNamePerfix, tableID) - values = [] - for k in range(self.batchSize): - data += "(%d, %d, %d, %d)" % (start + j * self.batchSize + k, random.randint(1, 100), random.randint(1, 100), random.randint(1, 100)) - response = requests.post(self.url, data, headers = self.header) - if response.status_code != 200: - print(response.content) + tablesPerThread = int (self.numOfTables / self.numOfThreads) + loop = int(self.recordsPerTable / self.batchSize) + if self.tablePerbatch == 1 : + for i in range(tablesPerThread+1): + tableID = i + threadID * tablesPerThread + if tableID >= self.numOfTables: return + start = self.ts + start1=time.time() + for k in range(loop): + data = "insert into %s.%s%d values" % (self.dbname, self.tableNamePerfix, tableID) + values = [] + bloop = self.batchSize if k != loop - 1 else self.recordsPerTable - self.batchSize * k + for l in range(bloop): + values.append("(%d, %d, %d, %d)" % (start + k * self.batchSize + l, random.randint(1, 100), random.randint(1, 100), random.randint(1, 100))) + if len(data) > 1048576 : + print ('batch size is larger than 1M') + exit(-1) + if self.outOfOrder : + random.shuffle(values) + data+=''.join(values) + response = requests.post(self.url, data, headers = self.header) + if response.status_code != 200: + print(response.content) + else: + for i in range(0,tablesPerThread+self.tablePerbatch,self.tablePerbatch): + for k in range(loop): + data = "insert into " + for j in range(self.tablePerbatch): + tableID = i + threadID * tablesPerThread+j + if tableID >= self.numOfTables: return + start = self.ts + data += "%s.%s%d values" % (self.dbname, self.tableNamePerfix, tableID) + values = [] + bloop = self.batchSize if k != loop - 1 else self.recordsPerTable - self.batchSize * k + for l in range(bloop): + values.append("(%d, %d, %d, %d)" % (start + k * self.batchSize + l, random.randint(1, 100), random.randint(1, 100), random.randint(1, 100))) + if self.outOfOrder : + random.shuffle(values) + data+=''.join(values) + if len(data) > 1024*1024 : + print ('batch size is larger than 1M') + exit(-1) + response = requests.post(self.url, data, headers = self.header) + if response.status_code != 200: + print(response.content) + def insertUnlimitedData(self, threadID): print("thread %d started" % threadID) @@ -85,7 +124,7 @@ class RestfulInsert: if response.status_code != 200: print(response.content) - def run(self): + def run(self): data = "create database if not exists %s" % self.dbname requests.post(self.url, data, headers = self.header) data = "create table if not exists %s.meters(ts timestamp, f1 int, f2 int, f3 int) tags(id int, loc nchar(20))" % self.dbname @@ -114,7 +153,7 @@ class RestfulInsert: for i in range(self.numOfThreads): threads[i].join() - print("inserting %d records takes %d seconds" % (self.numOfTables * self.recordsPerTable, (time.time() - startTime))) + print("inserting %s records takes %d seconds" % (self.numOfTables * self.recordsPerTable, (time.time() - startTime))) parser = argparse.ArgumentParser() parser.add_argument( @@ -149,14 +188,14 @@ parser.add_argument( '-T', '--number-of-tables', action='store', - default=1000, + default=10000, type=int, help='Number of tables to be created (default: 1000)') parser.add_argument( '-r', '--number-of-records', action='store', - default=1000, + default=10000, type=int, help='Number of record to be created for each table (default: 1000, -1 for unlimited records)') parser.add_argument( @@ -178,7 +217,18 @@ parser.add_argument( '--out-of-order', action='store_true', help='The order of test data (default: False)') +parser.add_argument( + '-b', + '--table-per-batch', + action='store', + default=1, + type=int, + help='the table per batch (default: 1)') + + args = parser.parse_args() -ri = RestfulInsert(args.host_name, args.start_timestamp, args.db_name, args.number_of_threads, args.number_of_tables, args.number_of_records, args.batch_size, args.table_name_prefix, args.out_of_order) +ri = RestfulInsert( + args.host_name, args.start_timestamp, args.db_name, args.number_of_threads, args.number_of_tables, + args.number_of_records, args.batch_size, args.table_name_prefix, args.out_of_order, args.table_per_batch) ri.run() \ No newline at end of file diff --git a/tests/pytest/pytest_1.sh b/tests/pytest/pytest_1.sh new file mode 100755 index 0000000000..52f5a30f4e --- /dev/null +++ b/tests/pytest/pytest_1.sh @@ -0,0 +1,218 @@ +#!/bin/bash +ulimit -c unlimited + +python3 ./test.py -f insert/basic.py +python3 ./test.py -f insert/int.py +python3 ./test.py -f insert/float.py +python3 ./test.py -f insert/bigint.py +python3 ./test.py -f insert/bool.py +python3 ./test.py -f insert/double.py +python3 ./test.py -f insert/smallint.py +python3 ./test.py -f insert/tinyint.py +python3 ./test.py -f insert/date.py +python3 ./test.py -f insert/binary.py +python3 ./test.py -f insert/nchar.py +#python3 ./test.py -f insert/nchar-boundary.py +python3 ./test.py -f insert/nchar-unicode.py +python3 ./test.py -f insert/multi.py +python3 ./test.py -f insert/randomNullCommit.py +python3 insert/retentionpolicy.py +python3 ./test.py -f insert/alterTableAndInsert.py +python3 ./test.py -f insert/insertIntoTwoTables.py + +python3 ./test.py -f table/alter_wal0.py +python3 ./test.py -f table/column_name.py +python3 ./test.py -f table/column_num.py +python3 ./test.py -f table/db_table.py +python3 ./test.py -f table/create_sensitive.py +#python3 ./test.py -f table/tablename-boundary.py + +# tag +python3 ./test.py -f tag_lite/filter.py +python3 ./test.py -f tag_lite/create-tags-boundary.py +python3 ./test.py -f tag_lite/3.py +python3 ./test.py -f tag_lite/4.py +python3 ./test.py -f tag_lite/5.py +python3 ./test.py -f tag_lite/6.py +python3 ./test.py -f tag_lite/add.py +python3 ./test.py -f tag_lite/bigint.py +python3 ./test.py -f tag_lite/binary_binary.py +python3 ./test.py -f tag_lite/binary.py +python3 ./test.py -f tag_lite/bool_binary.py +python3 ./test.py -f tag_lite/bool_int.py +python3 ./test.py -f tag_lite/bool.py +python3 ./test.py -f tag_lite/change.py +python3 ./test.py -f tag_lite/column.py +python3 ./test.py -f tag_lite/commit.py +python3 ./test.py -f tag_lite/create.py +python3 ./test.py -f tag_lite/datatype.py +python3 ./test.py -f tag_lite/datatype-without-alter.py +python3 ./test.py -f tag_lite/delete.py +python3 ./test.py -f tag_lite/double.py +python3 ./test.py -f tag_lite/float.py +python3 ./test.py -f tag_lite/int_binary.py +python3 ./test.py -f tag_lite/int_float.py +python3 ./test.py -f tag_lite/int.py +python3 ./test.py -f tag_lite/set.py +python3 ./test.py -f tag_lite/smallint.py +python3 ./test.py -f tag_lite/tinyint.py + +#python3 ./test.py -f dbmgmt/database-name-boundary.py + +python3 ./test.py -f import_merge/importBlock1HO.py +python3 ./test.py -f import_merge/importBlock1HPO.py +python3 ./test.py -f import_merge/importBlock1H.py +python3 ./test.py -f import_merge/importBlock1S.py +python3 ./test.py -f import_merge/importBlock1Sub.py +python3 ./test.py -f import_merge/importBlock1TO.py +python3 ./test.py -f import_merge/importBlock1TPO.py +python3 ./test.py -f import_merge/importBlock1T.py +python3 ./test.py -f import_merge/importBlock2HO.py +python3 ./test.py -f import_merge/importBlock2HPO.py +python3 ./test.py -f import_merge/importBlock2H.py +python3 ./test.py -f import_merge/importBlock2S.py +python3 ./test.py -f import_merge/importBlock2Sub.py +python3 ./test.py -f import_merge/importBlock2TO.py +python3 ./test.py -f import_merge/importBlock2TPO.py +python3 ./test.py -f import_merge/importBlock2T.py +python3 ./test.py -f import_merge/importBlockbetween.py +python3 ./test.py -f import_merge/importCacheFileHO.py +python3 ./test.py -f import_merge/importCacheFileHPO.py +python3 ./test.py -f import_merge/importCacheFileH.py +python3 ./test.py -f import_merge/importCacheFileS.py +python3 ./test.py -f import_merge/importCacheFileSub.py +python3 ./test.py -f import_merge/importCacheFileTO.py +python3 ./test.py -f import_merge/importCacheFileTPO.py +python3 ./test.py -f import_merge/importCacheFileT.py +python3 ./test.py -f import_merge/importDataH2.py +python3 ./test.py -f import_merge/importDataHO2.py +python3 ./test.py -f import_merge/importDataHO.py +python3 ./test.py -f import_merge/importDataHPO.py +python3 ./test.py -f import_merge/importDataLastHO.py +python3 ./test.py -f import_merge/importDataLastHPO.py +python3 ./test.py -f import_merge/importDataLastH.py +python3 ./test.py -f import_merge/importDataLastS.py +python3 ./test.py -f import_merge/importDataLastSub.py +python3 ./test.py -f import_merge/importDataLastTO.py +python3 ./test.py -f import_merge/importDataLastTPO.py +python3 ./test.py -f import_merge/importDataLastT.py +python3 ./test.py -f import_merge/importDataS.py +python3 ./test.py -f import_merge/importDataSub.py +python3 ./test.py -f import_merge/importDataTO.py +python3 ./test.py -f import_merge/importDataTPO.py +python3 ./test.py -f import_merge/importDataT.py +python3 ./test.py -f import_merge/importHeadOverlap.py +python3 ./test.py -f import_merge/importHeadPartOverlap.py +python3 ./test.py -f import_merge/importHead.py +python3 ./test.py -f import_merge/importHORestart.py +python3 ./test.py -f import_merge/importHPORestart.py +python3 ./test.py -f import_merge/importHRestart.py +python3 ./test.py -f import_merge/importLastHO.py +python3 ./test.py -f import_merge/importLastHPO.py +python3 ./test.py -f import_merge/importLastH.py +python3 ./test.py -f import_merge/importLastS.py +python3 ./test.py -f import_merge/importLastSub.py +python3 ./test.py -f import_merge/importLastTO.py +python3 ./test.py -f import_merge/importLastTPO.py +python3 ./test.py -f import_merge/importLastT.py +python3 ./test.py -f import_merge/importSpan.py +python3 ./test.py -f import_merge/importSRestart.py +python3 ./test.py -f import_merge/importSubRestart.py +python3 ./test.py -f import_merge/importTailOverlap.py +python3 ./test.py -f import_merge/importTailPartOverlap.py +python3 ./test.py -f import_merge/importTail.py +python3 ./test.py -f import_merge/importToCommit.py +python3 ./test.py -f import_merge/importTORestart.py +python3 ./test.py -f import_merge/importTPORestart.py +python3 ./test.py -f import_merge/importTRestart.py +python3 ./test.py -f import_merge/importInsertThenImport.py +python3 ./test.py -f import_merge/importCSV.py +# user +python3 ./test.py -f user/user_create.py +python3 ./test.py -f user/pass_len.py + +# stable +python3 ./test.py -f stable/query_after_reset.py + +# table +python3 ./test.py -f table/del_stable.py + +#query +python3 ./test.py -f query/filter.py +python3 ./test.py -f query/filterCombo.py +python3 ./test.py -f query/queryNormal.py +python3 ./test.py -f query/queryError.py +python3 ./test.py -f query/filterAllIntTypes.py +python3 ./test.py -f query/filterFloatAndDouble.py +python3 ./test.py -f query/filterOtherTypes.py +python3 ./test.py -f query/querySort.py +python3 ./test.py -f query/queryJoin.py +python3 ./test.py -f query/select_last_crash.py +python3 ./test.py -f query/queryNullValueTest.py +python3 ./test.py -f query/queryInsertValue.py +python3 ./test.py -f query/queryConnection.py +python3 ./test.py -f query/queryCountCSVData.py +python3 ./test.py -f query/natualInterval.py +python3 ./test.py -f query/bug1471.py +#python3 ./test.py -f query/dataLossTest.py +python3 ./test.py -f query/bug1874.py +python3 ./test.py -f query/bug1875.py +python3 ./test.py -f query/bug1876.py +python3 ./test.py -f query/bug2218.py + +#stream +python3 ./test.py -f stream/metric_1.py +python3 ./test.py -f stream/new.py +python3 ./test.py -f stream/stream1.py +python3 ./test.py -f stream/stream2.py +#python3 ./test.py -f stream/parser.py +python3 ./test.py -f stream/history.py + +#alter table +python3 ./test.py -f alter/alter_table_crash.py + +# client +python3 ./test.py -f client/client.py +python3 ./test.py -f client/version.py +python3 ./test.py -f client/alterDatabase.py + +# Misc +python3 testCompress.py +python3 testNoCompress.py +python3 testMinTablesPerVnode.py + +# functions +python3 ./test.py -f functions/function_avg.py -r 1 +python3 ./test.py -f functions/function_bottom.py -r 1 +python3 ./test.py -f functions/function_count.py -r 1 +python3 ./test.py -f functions/function_diff.py -r 1 +python3 ./test.py -f functions/function_first.py -r 1 +python3 ./test.py -f functions/function_last.py -r 1 +python3 ./test.py -f functions/function_last_row.py -r 1 +python3 ./test.py -f functions/function_leastsquares.py -r 1 +python3 ./test.py -f functions/function_max.py -r 1 +python3 ./test.py -f functions/function_min.py -r 1 +python3 ./test.py -f functions/function_operations.py -r 1 +python3 ./test.py -f functions/function_percentile.py -r 1 +python3 ./test.py -f functions/function_spread.py -r 1 +python3 ./test.py -f functions/function_stddev.py -r 1 +python3 ./test.py -f functions/function_sum.py -r 1 +python3 ./test.py -f functions/function_top.py -r 1 +#python3 ./test.py -f functions/function_twa.py -r 1 +python3 queryCount.py +python3 ./test.py -f query/queryGroupbyWithInterval.py +python3 client/twoClients.py +python3 test.py -f query/queryInterval.py +python3 test.py -f query/queryFillTest.py + +# tools +python3 test.py -f tools/taosdemoTest.py +python3 test.py -f tools/taosdumpTest.py +python3 test.py -f tools/lowaTest.py + +# subscribe +python3 test.py -f subscribe/singlemeter.py +#python3 test.py -f subscribe/stability.py +python3 test.py -f subscribe/supertable.py + + diff --git a/tests/pytest/pytest_2.sh b/tests/pytest/pytest_2.sh new file mode 100755 index 0000000000..fededea3bb --- /dev/null +++ b/tests/pytest/pytest_2.sh @@ -0,0 +1,17 @@ + + +# update +python3 ./test.py -f update/allow_update.py +python3 ./test.py -f update/allow_update-0.py +python3 ./test.py -f update/append_commit_data.py +python3 ./test.py -f update/append_commit_last-0.py +python3 ./test.py -f update/append_commit_last.py +python3 ./test.py -f update/merge_commit_data.py +python3 ./test.py -f update/merge_commit_data-0.py +python3 ./test.py -f update/merge_commit_data2.py +python3 ./test.py -f update/merge_commit_data2_update0.py +python3 ./test.py -f update/merge_commit_last-0.py +python3 ./test.py -f update/merge_commit_last.py + +# wal +python3 ./test.py -f wal/addOldWalTest.py \ No newline at end of file diff --git a/tests/pytest/query/bug2117.py b/tests/pytest/query/bug2117.py new file mode 100644 index 0000000000..1158b78a2a --- /dev/null +++ b/tests/pytest/query/bug2117.py @@ -0,0 +1,50 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import sys +from util.log import * +from util.cases import * +from util.sql import * +from util.dnodes import * +class TDTestCase: + def init(self, conn, logSql): + tdLog.debug("start to execute %s" % __file__) + tdSql.init(conn.cursor(), logSql) + + def run(self): + tdSql.prepare() + print("==========step1") + print("create table && insert data") + + tdSql.execute("create table mt0 (ts timestamp, c1 int, c2 float, c3 bigint, c4 smallint, c5 tinyint, c6 double, c7 bool,c8 binary(20),c9 nchar(20))") + insertRows = 1000 + t0 = 1604298064000 + tdLog.info("insert %d rows" % (insertRows)) + for i in range(insertRows): + ret = tdSql.execute( + "insert into mt0 values (%d , %d,%d,%d,%d,%d,%d,%d,'%s','%s')" % + (t0+i,i%100,i/2,i%41,i%100,i%100,i*1.0,i%2,'taos'+str(i%100),'涛思'+str(i%100))) + print("==========step2") + print("test last with group by normal_col ") + tdSql.query('select last(c1) from mt0 group by c3') + tdSql.checkData(0,0,84) + tdSql.checkData(0,1,85) + + + + def stop(self): + tdSql.close() + tdLog.success("%s successfully executed" % __file__) + +tdCases.addWindows(__file__, TDTestCase()) +tdCases.addLinux(__file__, TDTestCase()) \ No newline at end of file diff --git a/tests/pytest/query/bug2218.py b/tests/pytest/query/bug2218.py index bb92e5d9ce..080472383d 100644 --- a/tests/pytest/query/bug2218.py +++ b/tests/pytest/query/bug2218.py @@ -38,12 +38,12 @@ class TDTestCase: print("test col*1*1 desc ") tdSql.query('select c1,c1*1*1,c2*1*1,c3*1*1,c4*1*1,c5*1*1,c6*1*1 from mt0 order by ts desc limit 2') tdSql.checkData(0,0,99) - tdSql.checkData(0,1,0.0) - tdSql.checkData(0,2,0.0) - tdSql.checkData(0,3,0.0) - tdSql.checkData(0,4,0.0) - tdSql.checkData(0,5,0.0) - tdSql.checkData(0,6,0.0) + tdSql.checkData(0,1,99.0) + tdSql.checkData(0,2,499.0) + tdSql.checkData(0,3,99.0) + tdSql.checkData(0,4,99.0) + tdSql.checkData(0,5,99.0) + tdSql.checkData(0,6,999.0) def stop(self): diff --git a/tests/pytest/update/bug_td2279.py b/tests/pytest/update/bug_td2279.py new file mode 100644 index 0000000000..7e8640dfa0 --- /dev/null +++ b/tests/pytest/update/bug_td2279.py @@ -0,0 +1,67 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import sys +import taos +from util.log import * +from util.cases import * +from util.sql import * +from util.dnodes import * + + +class TDTestCase: + def init(self, conn, logSql): + tdLog.debug("start to execute %s" % __file__) + tdSql.init(conn.cursor()) + + self.ts = 1606700000000 + + def restartTaosd(self): + tdDnodes.stop(1) + tdDnodes.startWithoutSleep(1) + tdSql.execute("use db") + + def run(self): + tdSql.prepare() + + print("==============step1") + tdSql.execute("create table t (ts timestamp, a int)") + + for i in range(3276): + tdSql.execute("insert into t values(%d, 0)" % (self.ts + i)) + + newTs = 1606700010000 + for i in range(3275): + tdSql.execute("insert into t values(%d, 0)" % (self.ts + i)) + tdSql.execute("insert into t values(%d, 0)" % 1606700013280) + + self.restartTaosd() + + for i in range(1606700003275, 1606700006609): + tdSql.execute("insert into t values(%d, 0)" % i) + tdSql.execute("insert into t values(%d, 0)" % 1606700006612) + + self.restartTaosd() + + tdSql.execute("insert into t values(%d, 0)" % 1606700006610) + tdSql.query("select * from t") + tdSql.checkRows(6612) + + tdDnodes.stop(1) + + def stop(self): + tdSql.close() + tdLog.success("%s successfully executed" % __file__) + +tdCases.addWindows(__file__, TDTestCase()) +tdCases.addLinux(__file__, TDTestCase()) diff --git a/tests/script/basicSuite.sim b/tests/script/basicSuite.sim index 61a9d68d26..5e22e02297 100644 --- a/tests/script/basicSuite.sim +++ b/tests/script/basicSuite.sim @@ -14,7 +14,6 @@ run general/table/vgroup.sim run general/user/authority.sim run general/vector/metrics_mix.sim run general/vector/table_field.sim -run general/user/authority.sim run general/tag/set.sim run general/table/delete_writing.sim run general/stable/disk.sim diff --git a/tests/script/general/alter/dnode.sim b/tests/script/general/alter/dnode.sim index 20ce879979..73a095ec05 100644 --- a/tests/script/general/alter/dnode.sim +++ b/tests/script/general/alter/dnode.sim @@ -24,7 +24,7 @@ sql alter dnode 1 debugFlag 135 sql alter dnode 1 debugFlag 131 sql alter dnode 1 monitor 0 sql alter dnode 1 debugFlag 135 -sql alter dnode 1 monitorDebugFlag 135 +sql alter dnode 1 monDebugFlag 135 sql alter dnode 1 vDebugFlag 135 sql alter dnode 1 mDebugFlag 135 sql alter dnode 1 cDebugFlag 135 @@ -44,15 +44,15 @@ sql_error alter dnode 2 tmrDebugFlag 135 print ======== step3 sql_error alter $hostname1 debugFlag 135 -sql_error alter $hostname1 monitorDebugFlag 135 +sql_error alter $hostname1 monDebugFlag 135 sql_error alter $hostname1 vDebugFlag 135 sql_error alter $hostname1 mDebugFlag 135 sql_error alter dnode $hostname2 debugFlag 135 -sql_error alter dnode $hostname2 monitorDebugFlag 135 +sql_error alter dnode $hostname2 monDebugFlag 135 sql_error alter dnode $hostname2 vDebugFlag 135 sql_error alter dnode $hostname2 mDebugFlag 135 sql alter dnode $hostname1 debugFlag 135 -sql alter dnode $hostname1 monitorDebugFlag 135 +sql alter dnode $hostname1 monDebugFlag 135 sql alter dnode $hostname1 vDebugFlag 135 sql alter dnode $hostname1 tmrDebugFlag 131 diff --git a/tests/script/general/db/alter_option.sim b/tests/script/general/db/alter_option.sim index c8aa2480c5..1c3f543ffd 100644 --- a/tests/script/general/db/alter_option.sim +++ b/tests/script/general/db/alter_option.sim @@ -115,31 +115,31 @@ if $data7_db != 20,20,20 then return -1 endi -sql alter database db keep 10 -sql show databases -print keep $data7_db -if $data7_db != 20,20,10 then - return -1 -endi - sql alter database db keep 20 sql show databases print keep $data7_db -if $data7_db != 20,20,20 then +if $data7_db != 20,20,20 then return -1 endi sql alter database db keep 30 sql show databases print keep $data7_db -if $data7_db != 20,20,30 then +if $data7_db != 20,20,30 then + return -1 +endi + +sql alter database db keep 40 +sql show databases +print keep $data7_db +if $data7_db != 20,20,40 then return -1 endi sql alter database db keep 40 sql alter database db keep 30 sql alter database db keep 20 -sql alter database db keep 10 +sql_error alter database db keep 10 sql_error alter database db keep 9 sql_error alter database db keep 1 sql alter database db keep 0 @@ -277,4 +277,4 @@ sql_error alter database db prec 'us' print ============== step status sql_error alter database db status 'delete' -system sh/exec.sh -n dnode1 -s stop -x SIGINT \ No newline at end of file +system sh/exec.sh -n dnode1 -s stop -x SIGINT diff --git a/tests/script/general/parser/col_arithmetic_operation.sim b/tests/script/general/parser/col_arithmetic_operation.sim index 7adae8ef81..0cc02d088b 100644 --- a/tests/script/general/parser/col_arithmetic_operation.sim +++ b/tests/script/general/parser/col_arithmetic_operation.sim @@ -117,16 +117,17 @@ run general/parser/col_arithmetic_query.sim # ================================================================================================ print ====================> crash -# sql select spread(ts )/(1000*3600*24) from ca_stb0 interval(1y) +sql use $db +sql select spread(ts )/(1000*3600*24) from $stb interval(1y) -sql_error select first(c1, c2) - last(c1, c2) from stb interval(1y) -sql_error select first(ts) - last(ts) from stb interval(1y) -sql_error select top(c1, 2) - last(c1) from stb; -sql_error select stddev(c1) - last(c1) from stb; -sql_error select diff(c1) - last(c1) from stb; -sql_error select first(c7) - last(c7) from stb; -sql_error select first(c8) - last(c8) from stb; -sql_error select first(c9) - last(c9) from stb; +sql_error select first(c1, c2) - last(c1, c2) from $stb interval(1y) +sql_error select first(ts) - last(ts) from $stb interval(1y) +sql_error select top(c1, 2) - last(c1) from $stb; +sql_error select stddev(c1) - last(c1) from $stb; +sql_error select diff(c1) - last(c1) from $stb; +sql_error select first(c7) - last(c7) from $stb; +sql_error select first(c8) - last(c8) from $stb; +sql_error select first(c9) - last(c9) from $stb; sql_error select max(c2*2) from $tb sql_error select max(c1-c2) from $tb diff --git a/tests/script/general/parser/col_arithmetic_query.sim b/tests/script/general/parser/col_arithmetic_query.sim index 408b039144..53e2c98b56 100644 --- a/tests/script/general/parser/col_arithmetic_query.sim +++ b/tests/script/general/parser/col_arithmetic_query.sim @@ -62,24 +62,73 @@ if $data91 != 1.000000000 then return -1 endi -sql select (c1 * 2) % 7.9 from $tb order by ts desc; +sql select (c1 * 2) % 7.9, c1*1, c1*1*1, c1*c1, c1*c1*c1 from $tb order by ts desc; if $rows != 10000 then return -1 endi -if $data00 != 0.100000000 then - print expect 0.100000000, acutal:$data00 +if $data00 != 2.200000000 then + print expect 2.200000000, actual:$data00 return -1 endi -if $data10 != 2.100000000 then +if $data01 != 9.000000000 then return -1 endi -if $data90 != 6.000000000 then +if $data02 != 9.000000000 then + return -1 +endi + +if $data03 != 81.000000000 then + return -1 +endi + +if $data04 != 729.000000000 then + return -1 +endi + + +if $data10 != 0.200000000 then + return -1 +endi + +if $data11 != 8.000000000 then + return -1 +endi + +if $data12 != 8.000000000 then + return -1 +endi + +if $data13 != 64.000000000 then + return -1 +endi + +if $data14 != 512.000000000 then + return -1 +endi + +if $data90 != 0.000000000 then return -1 endi +if $data91 != 0.000000000 then + return -1 +endi + +if $data92 != 0.000000000 then + return -1 +endi + +if $data93 != 0.000000000 then + return -1 +endi + +if $data94 != 0.000000000 then + return -1 +endi + # [d.3] sql select c1 * c2 /4 from $tb where ts < 1537166000000 and ts > 1537156000000 if $rows != 17 then @@ -95,7 +144,7 @@ if $data10 != 16.000000000 then endi if $data20 != 20.250000000 then - print expect 20.250000000, acutal:$data21 + print expect 20.250000000, actual:$data21 return -1 endi @@ -320,7 +369,9 @@ sql_error select c7-c9 from $tb interval(2y) # multiple retrieve [d.20]=============================================================== sql select c2-c2, 911 from $tb -#======================================= aggregation function arithmetic query cases ================ +#======================================= aggregation function arithmetic query cases =================================== +# on $tb percentile() spread(ts) bug + # asc/desc order [d.2] sql select first(c1) * ( 2 / 3 ) from $stb order by ts asc; if $rows != 1 then @@ -349,11 +400,11 @@ if $data00 != 1.800000000 then return -1 endi -if $data01 != 100000 then +if $data01 != 100000.000000000 then return -1 endi -if $data02 != 200000 then +if $data02 != 200000.000000000 then return -1 endi @@ -374,77 +425,192 @@ if $data02 != 9.000000020 then return -1 endi -# all possible function in the arithmetic expressioin -sql select min(c1) * max(c2) /4, sum(c1) * percentile(c2, 20), apercentile(c4, 33) + 52/9, spread(c5)/min(c2) from $stb where ts < and ts > +# all possible function in the arithmetic expression, add more +sql select min(c1) * max(c2) /4, sum(c1) * apercentile(c2, 20), apercentile(c4, 33) + 52/9, spread(c5)/min(c2), count(1)/sum(c1), avg(c2)*count(c2) from $stb where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-11-25 19:30:00.000'; +if $rows != 1 then + return -1 +endi -# no result return [d.3] +if $data00 != 0.000000000 then + return -1 +endi + +if $data01 != 225000.000000000 then + return -1 +endi + +if $data02 != 8.077777778 then + return -1 +endi + +if $data03 != inf then + return -1 +endi + +if $data04 != 0.444444444 then + return -1 +endi + +if $data05 != 450000.000000000 then + return -1 +endi + +# no result return [d.3]=============================================================== sql select first(c1) * 91 - 7, last(c3) from $stb where ts < 1537146000000 if $rows != 0 then return -1 endi # no result return [d.3] -sql select sum(c2) - avg(c2) from $tb where ts>xxx +sql select sum(c2) - avg(c2) from $stb where ts > '2018-11-25 19:30:00.000' if $rows != 0 then return -1 endi -# single row result aggregation [d.4] -sql select +# single row result aggregation [d.4]=================================================== +# all those cases are aggregation test cases. # error cases -sql_error select first(c1, c2) - last(c1, c2) from $tb +sql_error select first(c1, c2) - last(c1, c2) from $stb +sql_error select top(c1, 5) - bottom(c1, 5) from $stb +sql_error select first(*) - 99 from $stb # multi row result aggregation [d.4] -sql select top(c1, 1) - bottom(c1, 1) from $tb -sql select top(c1, 99) - bottom(c1, 99) from $tb +sql_error select top(c1, 1) - bottom(c1, 1) from $stb +sql_error select top(c1, 99) - bottom(c1, 99) from $stb -# all data types [d.6] -sql select c2-c1, c3/c2, c4*c3, c5%c4, c6+99%22 from $tb +# query on super table [d.5]============================================================= +# all cases in this part are query on super table + +# all data types [d.6]=================================================================== +sql select c2-c1, c3/c2, c4*c3, c5%c4, c6+99%22 from $stb # error case, ts/bool/binary/nchar not support arithmetic expression -sql_error select ts+ts from $tb -sql_error select ts+22 from $tb -sql_error select c7*12 from $tb -sql_error select c8/55 from $tb -sql_error select c9+c8 from $tb +sql_error select first(c7)*12 from $stb +sql_error select last(c8)/55 from $stb +sql_error select last_row(c9) + last_row(c8) from $stb -# arithmetic expression in join [d.7] +# arithmetic expression in join [d.7]=============================================================== -# arithmetic expression in union [d.8] +# arithmetic expression in union [d.8]=============================================================== -# arithmetic expression in group by [d.9] +# arithmetic expression in group by [d.9]=============================================================== # in group by tag -# not support for normal table -sql_error select c5*99 from $tb group by t1 +sql select avg(c4)*99 from $stb group by t1 +if $rows != 10 then + return -1 +endi + +if $data00 != 445.500000000 then + return -1 +endi + +if $data01 != 0 then + return -1 +endi + +if $data90 != 445.500000000 then + return -1 +endi + +if $data91 != 9 then + return -1 +endi # in group by column -sql_error select c6-c6+c3*12 from $tb group by c3; +sql select apercentile(c6, 50)-first(c6)+last(c5)*12, last(c5)*12 from ca_stb0 group by c2; +if $rows != 10 then + return -1 +endi -sql select first(c6) - last(c6) *12 / count(*) from $tb group by c3; +if $data00 != 0.000000000 then + return -1 +endi -# limit offset [d.10] -sql select c6-c6+12 from $tb limit 12 offset 99; -sql select c4/99.123 from $tb limit 1 offset 9999; +if $data01 != 0.000000000 then + return -1 +endi -# slimit/soffset not suport for normal table query. [d.11] -sql_error select sum(c1) from $tb slimit 1 soffset 19; +if $data10 != 12.000000000 then + return -1 +endi -# fill [d.12] -sql_error select c2-c2, c3-c4, c5%c6 from $tb fill(value, 12); +if $data11 != 12.000000000 then + return -1 +endi -# constant column. [d.13] +if $data20 != 24.000000000 then + return -1 +endi + +if $data21 != 24.000000000 then + return -1 +endi + +sql_error select first(c6) - last(c6) *12 / count(*) from $stb group by c3; + +sql select first(c6) - last(c6) *12 / count(*) from $stb group by c5; +if $rows != 10 then + return -1 +endi + +if $data00 != 0.000000000 then + return -1 +endi + +if $data10 != 0.997600000 then + return -1 +endi + +if $data90 != 8.978400000 then + return -1 +endi + +# limit offset [d.10]=============================================================== +sql select first(c6) - sum(c6) + 12 from $stb limit 12 offset 0; +if $rows != 1 then + return -1 +endi + +if $data00 != -449988.000000000 then + return -1 +endi + +sql select apercentile(c4, 21) / 99.123 from $stb limit 1 offset 1; +if $rows != 0 then + return -1 +endi + +sql select apercentile(c4, 21) / sum(c4) from $stb interval(1s) limit 1 offset 1; +if $rows != 1 then + return -1 +endi + +# slimit/soffset not support for normal table query. [d.11]=============================================================== +sql select sum(c1) from $stb slimit 1 soffset 19; +if $rows != 0 then + return -1 +endi + +sql select sum(c1) from $stb interval(1s) group by tbname slimit 1 soffset 1 +sql select sum(c1) from ca_stb0 interval(1s) group by tbname slimit 2 soffset 4 limit 10 offset 1 + +# fill [d.12]=============================================================== +sql_error select first(c1)-last(c1), sum(c3)*count(c3), spread(c5 ) % count(*) from $stb interval(1s) fill(prev); +sql_error select first(c1) from $stb fill(value, 20); + +# constant column. [d.13]=============================================================== -# column value filter [d.14] +# column value filter [d.14]=============================================================== -# tag filter(not support for normal table). [d.15] -sql_error select sum(c2)+99 from $tb where t1=12; +# tag filter. [d.15]=============================================================== +sql select sum(c2)+99 from $stb where t1=12; -# multi-field output [d.16] +# multi-field output [d.16]=============================================================== sql select count(*), sum(c1)*avg(c2), avg(c3)*count(c3), sum(c3), sum(c4), first(c7), last(c8), first(c9), first(c7), last(c8) from $tb sql select c4*1+1/2 from $tb @@ -461,18 +627,30 @@ if $data90 != 9.500000000 then return -1 endi -# interval query [d.17] -sql_error select c2*c2, c3-c3, c4+9 from $tb interval(1s) -sql_error select c7-c9 from $tb interval(2y) +# interval query [d.17]=============================================================== +sql select avg(c2)*count(c2), sum(c3)-first(c3), last(c4)+9 from $stb interval(1s) +if $rows != 10000 then + return -1 +endi -# aggregation query [d.18] -# see test cases below +if $data00 != @18-09-17 09:00:00.000@ then + return -1 +endi -# first/last query [d.19] -# see test cases below +sql_error select first(c7)- last(c1) from $tb interval(2y) -# multiple retrieve [d.20] -sql select c2-c2 from $tb; +# aggregation query [d.18]=============================================================== +# all cases in this part are aggregation query test. + +# first/last query [d.19]=============================================================== + + +# multiple retrieve [d.20]=============================================================== +sql select c2-c2 from $tb sql select first(c1)-last(c1), spread(c2), max(c3) - min(c3), avg(c4)*count(c4) from $tb + + +#====================================================super table query================================================== + diff --git a/tests/script/general/parser/first_last.sim b/tests/script/general/parser/first_last.sim index a934d3bcab..773f92afcf 100644 --- a/tests/script/general/parser/first_last.sim +++ b/tests/script/general/parser/first_last.sim @@ -46,7 +46,8 @@ while $i < $tbNum endw $i = $i + 1 -endw +endw + $ts = $ts + 60000 $tb = $tbPrefix . 0 sql insert into $tb (ts) values ( $ts ) @@ -84,4 +85,43 @@ sleep 500 run general/parser/first_last_query.sim +print =================> insert data regression test +sql create database test keep 36500 +sql use test +sql create table tm0 (ts timestamp, k int) + +print =========================> td-2298 +$ts0 = 1537146000000 +$xs = 6000 + +$x = 0 +while $x < 5000 + $ts = $ts0 + $xs + $ts1 = $ts + $xs + $x1 = $x + 1 + + sql insert into tm0 values ( $ts , $x ) ( $ts1 , $x1 ) + $x = $x1 + $ts0 = $ts1 +endw + +system sh/exec.sh -n dnode1 -s stop -x SIGINT +sleep 3000 +system sh/exec.sh -n dnode1 -s start +print ================== server restart completed +sql connect +sleep 500 + +sql use test +sql select count(*), last(ts) from tm0 interval(1s) +if $rows != 10000 then + print expect 10000, actual: $rows + return -1 +endi + +sql select last(ts) from tm0 interval(1s) +if $rows != 10000 then + return -1 +endi + system sh/exec.sh -n dnode1 -s stop -x SIGINT \ No newline at end of file diff --git a/tests/script/general/parser/first_last_query.sim b/tests/script/general/parser/first_last_query.sim index a982f10362..52d888b04d 100644 --- a/tests/script/general/parser/first_last_query.sim +++ b/tests/script/general/parser/first_last_query.sim @@ -266,4 +266,6 @@ endi if $data14 != @test2@ then print expect test2 , actual: $data14 return -1 -endi \ No newline at end of file +endi + +sql drop table stest \ No newline at end of file diff --git a/tests/script/general/parser/function.sim b/tests/script/general/parser/function.sim new file mode 100644 index 0000000000..34e9844f71 --- /dev/null +++ b/tests/script/general/parser/function.sim @@ -0,0 +1,228 @@ +system sh/stop_dnodes.sh + +system sh/deploy.sh -n dnode1 -i 1 +system sh/cfg.sh -n dnode1 -c walLevel -v 0 +system sh/cfg.sh -n dnode1 -c tableMetaKeepTimer -v 3 +system sh/exec.sh -n dnode1 -s start +sleep 500 +sql connect + +$dbPrefix = m_func_db +$tbPrefix = m_func_tb +$mtPrefix = m_func_mt + +$tbNum = 10 +$rowNum = 5 +$totalNum = $tbNum * $rowNum +$ts0 = 1537146000000 +$delta = 600000 +print ========== alter.sim +$i = 0 +$db = $dbPrefix . $i +$mt = $mtPrefix . $i + +sql drop database if exists $db +sql create database $db +sql use $db + +print =====================================> test case for twa in single block + +sql create table t1 (ts timestamp, k float); +sql insert into t1 values('2015-08-18 00:00:00', 2.064); +sql insert into t1 values('2015-08-18 00:06:00', 2.116); +sql insert into t1 values('2015-08-18 00:12:00', 2.028); +sql insert into t1 values('2015-08-18 00:18:00', 2.126); +sql insert into t1 values('2015-08-18 00:24:00', 2.041); +sql insert into t1 values('2015-08-18 00:30:00', 2.051); + +sql select twa(k),avg(k),count(1) from t1 where ts>='2015-8-18 00:00:00' and ts<='2015-8-18 00:05:00' +if $rows != 1 then + return -1 +endi + +if $data00 != 2.063999891 then + return -1 +endi + +if $data01 != 2.063999891 then + return -1 +endi + +if $data02 != 1 then + return -1 +endi + +sql select twa(k),avg(k),count(1) from t1 where ts>='2015-8-18 00:00:00' and ts<='2015-8-18 00:07:00' +if $rows != 1 then + return -1 +endi + +if $data00 != 2.089999914 then + return -1 +endi + +if $data01 != 2.089999914 then + return -1 +endi + +if $data02 != 2 then + return -1 +endi + +sql select twa(k),avg(k),count(1) from t1 where ts>='2015-8-18 00:00:00' and ts<='2015-8-18 00:07:00' interval(1m) order by ts asc +if $rows != 2 then + return -1 +endi + +if $data00 != @15-08-18 00:00:00.000@ then + return -1 +endi + +if $data01 != 2.068333156 then + return -1 +endi + +if $data02 != 2.063999891 then + return -1 +endi + +if $data03 != 1 then + return -1 +endi + +if $data10 != @15-08-18 00:06:00.000@ then + return -1 +endi + +if $data11 != 2.115999937 then + return -1 +endi + +if $data12 != 2.115999937 then + return -1 +endi + +if $data13 != 1 then + return -1 +endi + +sql select twa(k),avg(k),count(1) from t1 where ts>='2015-8-18 00:00:00' and ts<='2015-8-18 00:07:00' interval(1m) order by ts desc; +if $rows != 2 then + return -1 +endi + +if $data00 != @15-08-18 00:06:00.00@ then + return -1 +endi + +if $data01 != 2.115999937 then + return -1 +endi + +if $data02 != 2.115999937 then + return -1 +endi + +if $data03 != 1 then + return -1 +endi + +if $data11 != 2.068333156 then + return -1 +endi + +sql select twa(k),avg(k),count(1) from t1 where ts>='2015-8-18 00:00:00' and ts<='2015-8-18 00:27:00' interval(10m) order by ts asc +if $rows != 3 then + return -1 +endi + +if $data01 != 2.088666666 then + return -1 +endi + +if $data02 != 2.089999914 then + return -1 +endi + +if $data03 != 2 then + return -1 +endi + +if $data11 != 2.077099980 then + return -1 +endi + +if $data12 != 2.077000022 then + return -1 +endi + +if $data13 != 2 then + return -1 +endi + +if $data21 != 2.069333235 then + return -1 +endi + +if $data22 != 2.040999889 then + return -1 +endi + +if $data23 != 1 then + return -1 +endi + +sql select twa(k),avg(k),count(1) from t1 where ts>='2015-8-18 00:00:00' and ts<='2015-8-18 00:27:00' interval(10m) order by ts desc +if $rows != 3 then + return -1 +endi + +if $data01 != 2.069333235 then + return -1 +endi + +if $data11 != 2.077099980 then + return -1 +endi + +if $data21 != 2.088666666 then + return -1 +endi + +sql select twa(k),avg(k),count(1) from t1 where ts>='2015-8-18 00:00:00' and ts<='2015-8-18 00:30:00' order by ts asc +if $data00 != 2.073699975 then + return -1 +endi + +if $data01 != 2.070999980 then + return -1 +endi + +if $data02 != 6 then + return -1 +endi + +sql select twa(k),avg(k),count(1) from t1 where ts>='2015-8-18 00:00:00' and ts<='2015-8-18 00:30:00' order by ts desc +if $rows != 1 then + return -1 +endi + +if $data00 != 2.073699975 then + return -1 +endi + +if $data01 != 2.070999980 then + return -1 +endi + +if $data02 != 6 then + return -1 +endi + +sql select twa(k),avg(k),count(1) from t1 where ts>='2015-8-18 00:00:00' and ts<='2015-8-18 00:30:00' interval(10m) order by ts asc +sql select twa(k),avg(k),count(1) from t1 where ts>='2015-8-18 00:00:00' and ts<='2015-8-18 00:30:00' interval(10m) order by ts desc + + +#todo add test case while column filte exists. + +select count(*),TWA(k) from tm0 where ts>='1970-1-1 13:43:00' and ts<='1970-1-1 13:44:10' interval(9s) diff --git a/tests/script/general/parser/limit1_tb.sim b/tests/script/general/parser/limit1_tb.sim index 1e473eb858..72b63256db 100644 --- a/tests/script/general/parser/limit1_tb.sim +++ b/tests/script/general/parser/limit1_tb.sim @@ -703,13 +703,13 @@ sql select twa(c1), twa(c2), twa(c3), twa(c4), twa(c5), twa(c6) from $tb where t if $rows != 1 then return -1 endi -if $data00 != 4.499549955 then +if $data00 != 4.500000000 then return -1 endi -if $data02 != 4.499549955 then +if $data02 != 4.500000000 then return -1 endi -if $data05 != 4.499549955 then +if $data05 != 4.500000000 then return -1 endi @@ -717,10 +717,12 @@ sql select first(c1), first(c2), first(c3), first(c4), first(c5), first(c6) from if $rows != 0 then return -1 endi + sql select first(c1), first(c2), first(c3), first(c4), first(c5), first(c6) from $tb where ts >= $ts0 and ts <= $tsu interval(30m) limit 3 offset 1 if $rows != 3 then return -1 endi + if $data01 != 3 then return -1 endi @@ -731,7 +733,6 @@ if $data23 != 9.00000 then return -1 endi - sql select last(c1), last(c2), last(c3), last(c4), last(c5), last(c6) from $tb where ts >= $ts0 and ts <= $tsu limit 5 offset 1 if $rows != 0 then return -1 diff --git a/tests/script/general/parser/limit_tb.sim b/tests/script/general/parser/limit_tb.sim index b917627fdf..45f5541208 100644 --- a/tests/script/general/parser/limit_tb.sim +++ b/tests/script/general/parser/limit_tb.sim @@ -327,22 +327,22 @@ sql select twa(c1), twa(c2), twa(c3), twa(c4), twa(c5), twa(c6) from $tb where t if $rows != 1 then return -1 endi -if $data00 != 4.000000000 then +if $data00 != 4.500000000 then return -1 endi -if $data01 != 4.000000000 then +if $data01 != 4.500000000 then return -1 endi -if $data02 != 4.000000000 then +if $data02 != 4.500000000 then return -1 endi -if $data03 != 4.000000000 then +if $data03 != 4.500000000 then return -1 endi -if $data04 != 4.000000000 then +if $data04 != 4.500000000 then return -1 endi -if $data05 != 4.000000000 then +if $data05 != 4.500000000 then return -1 endi @@ -690,13 +690,13 @@ sql select twa(c1), twa(c2), twa(c3), twa(c4), twa(c5), twa(c6) from $tb where t if $rows != 1 then return -1 endi -if $data00 != 4.000000000 then +if $data00 != 4.500000000 then return -1 endi -if $data02 != 4.000000000 then +if $data02 != 4.500000000 then return -1 endi -if $data05 != 4.000000000 then +if $data05 != 4.500000000 then return -1 endi diff --git a/tests/script/general/parser/where.sim b/tests/script/general/parser/where.sim index 8e17220b5b..c5b600b514 100644 --- a/tests/script/general/parser/where.sim +++ b/tests/script/general/parser/where.sim @@ -131,7 +131,6 @@ if $data00 != $rowNum then return -1 endi - ## like sql_error select * from $mt where c1 like 1 #sql_error select * from $mt where t1 like 1 @@ -178,7 +177,8 @@ sql create table wh_mt2_tb1 using wh_mt2 tags ('wh_mt2_tb1') # 2019-01-01 09:00:00.000 1546304400000 # 2019-01-01 09:10:00.000 1546305000000 sql insert into wh_mt2_tb1 values ('2019-01-01 00:00:00.000', '2019-01-01 09:00:00.000', 'binary10', 'nchar10') -sql insert into wh_mt2_tb1 values ('2019-01-01 00:10:00.000', '2019-01-01 09:10:00.000', 'binary10', 'nchar10') +sql insert into wh_mt2_tb1 values ('2019-01-01 00:10:00.000', '2019-01-01 09:10:00.000', 'binary10', 'nchar10') + sql select * from wh_mt2_tb1 where c1 > 1546304400000 if $rows != 1 then return -1 diff --git a/tests/script/general/wal/sync.sim b/tests/script/general/wal/sync.sim index abaf22f919..c6f7402b87 100644 --- a/tests/script/general/wal/sync.sim +++ b/tests/script/general/wal/sync.sim @@ -82,6 +82,7 @@ restful d1 table_rest 1591772800 30000 restful d1 table_rest 1591872800 30000 restful d1 table_rest 1591972800 30000 +sleep 1000 sql select * from table_rest; print rows: $rows if $rows != 300000 then diff --git a/tests/script/sh/deploy.sh b/tests/script/sh/deploy.sh index e26778e86b..cd2f3772eb 100755 --- a/tests/script/sh/deploy.sh +++ b/tests/script/sh/deploy.sh @@ -120,7 +120,7 @@ echo "cDebugFlag 143" >> $TAOS_CFG echo "jnidebugFlag 143" >> $TAOS_CFG echo "odbcdebugFlag 143" >> $TAOS_CFG echo "httpDebugFlag 143" >> $TAOS_CFG -echo "monitorDebugFlag 143" >> $TAOS_CFG +echo "monDebugFlag 143" >> $TAOS_CFG echo "mqttDebugFlag 143" >> $TAOS_CFG echo "qdebugFlag 143" >> $TAOS_CFG echo "rpcDebugFlag 143" >> $TAOS_CFG diff --git a/tests/script/tmp/mnodes.sim b/tests/script/tmp/mnodes.sim index de02ae741b..e11140028d 100644 --- a/tests/script/tmp/mnodes.sim +++ b/tests/script/tmp/mnodes.sim @@ -20,6 +20,10 @@ system sh/cfg.sh -n dnode1 -c maxTablesPerVnode -v 20000 system sh/cfg.sh -n dnode2 -c maxTablesPerVnode -v 20000 system sh/cfg.sh -n dnode3 -c maxTablesPerVnode -v 20000 +system sh/cfg.sh -n dnode1 -c minTablesPerVnode -v 1000 +system sh/cfg.sh -n dnode2 -c minTablesPerVnode -v 1000 +system sh/cfg.sh -n dnode3 -c minTablesPerVnode -v 1000 + system sh/cfg.sh -n dnode1 -c maxVgroupsPerDb -v 20 system sh/cfg.sh -n dnode2 -c maxVgroupsPerDb -v 20 system sh/cfg.sh -n dnode3 -c maxVgroupsPerDb -v 20 diff --git a/tests/test-all.sh b/tests/test-all.sh index 5897978bce..14b649eddf 100755 --- a/tests/test-all.sh +++ b/tests/test-all.sh @@ -137,6 +137,12 @@ if [ "$2" != "sim" ]; then elif [ "$1" == "pytest" ]; then echo "### run Python full test ###" runPyCaseOneByOne fulltest.sh + elif [ "$1" == "p1" ]; then + echo "### run Python_1 test ###" + runPyCaseOneByOne pytest_1.sh + elif [ "$1" == "p2" ]; then + echo "### run Python_2 test ###" + runPyCaseOneByOne pytest_2.sh elif [ "$1" == "b2" ] || [ "$1" == "b3" ]; then exit $(($totalFailed + $totalPyFailed)) elif [ "$1" == "smoke" ] || [ -z "$1" ]; then diff --git a/tests/test/c/CMakeLists.txt b/tests/test/c/CMakeLists.txt index 26aa20e647..11480a8ba2 100644 --- a/tests/test/c/CMakeLists.txt +++ b/tests/test/c/CMakeLists.txt @@ -31,8 +31,8 @@ IF (TD_LINUX) #add_executable(createTablePerformance createTablePerformance.c) #target_link_libraries(createTablePerformance taos_static tutil common pthread) - #add_executable(createNormalTable createNormalTable.c) - #target_link_libraries(createNormalTable taos_static tutil common pthread) + add_executable(createNormalTable createNormalTable.c) + target_link_libraries(createNormalTable taos_static tutil common pthread) #add_executable(queryPerformance queryPerformance.c) #target_link_libraries(queryPerformance taos_static tutil common pthread) @@ -45,5 +45,8 @@ IF (TD_LINUX) #add_executable(invalidTableId invalidTableId.c) #target_link_libraries(invalidTableId taos_static tutil common pthread) + + add_executable(hashIterator hashIterator.c) + target_link_libraries(hashIterator taos_static tutil common pthread) ENDIF() diff --git a/tests/test/c/hashIterator.c b/tests/test/c/hashIterator.c new file mode 100644 index 0000000000..cbd8a0895e --- /dev/null +++ b/tests/test/c/hashIterator.c @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#define _DEFAULT_SOURCE +#include "os.h" +#include "taos.h" +#include "tulog.h" +#include "tutil.h" +#include "hash.h" + +typedef struct HashTestRow { + int32_t keySize; + char key[100]; +} HashTestRow; + +int main(int argc, char *argv[]) { + _hash_fn_t hashFp = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); + void * hashHandle = taosHashInit(100, hashFp, true, HASH_ENTRY_LOCK); + + pPrint("insert 3 rows to hash"); + for (int32_t t = 0; t < 3; ++t) { + HashTestRow row = {0}; + row.keySize = sprintf(row.key, "0.db.st%d", t); + + taosHashPut(hashHandle, row.key, row.keySize, &row, sizeof(HashTestRow)); + } + + pPrint("start iterator"); + HashTestRow *row = taosHashIterate(hashHandle, NULL); + while (row) { + pPrint("drop key:%s", row->key); + taosHashRemove(hashHandle, row->key, row->keySize); + + pPrint("get rows from hash"); + for (int32_t t = 0; t < 3; ++t) { + HashTestRow r = {0}; + r.keySize = sprintf(r.key, "0.db.st%d", t); + + void *result = taosHashGet(hashHandle, r.key, r.keySize); + pPrint("get key:%s result:%p", r.key, result); + } + + //Before getting the next iterator, the object just deleted can be obtained + row = taosHashIterate(hashHandle, row); + } + + pPrint("stop iterator"); + taosHashCancelIterate(hashHandle, row); + + pPrint("get rows from hash"); + for (int32_t t = 0; t < 3; ++t) { + HashTestRow r = {0}; + r.keySize = sprintf(r.key, "0.db.st%d", t); + + void *result = taosHashGet(hashHandle, r.key, r.keySize); + pPrint("get key:%s result:%p", r.key, result); + } + + return 0; +} \ No newline at end of file