diff --git a/.gitmodules b/.gitmodules index 6eec238994..156226d544 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,3 +7,6 @@ [submodule "tests/examples/rust"] path = tests/examples/rust url = https://github.com/songtianyi/tdengine-rust-bindings.git +[submodule "src/connector/hivemq-tdengine-extension"] + path = src/connector/hivemq-tdengine-extension + url = https://github.com/huskar-t/hivemq-tdengine-extension.git \ No newline at end of file diff --git a/Jenkinsfile b/Jenkinsfile index ea50d6ef5a..3968451d87 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -79,7 +79,14 @@ pipeline { cmake .. > /dev/null make > /dev/null cd ${WKC}/tests/pytest - ./crash_gen.sh -a -p -t 4 -s 2000 + ''' + catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE') { + sh ''' + cd ${WKC}/tests/pytest + ./crash_gen.sh -a -p -t 4 -s 2000 + ''' + } + sh ''' date cd ${WKC}/tests ./test-all.sh b2 diff --git a/packaging/deb/makedeb.sh b/packaging/deb/makedeb.sh index 450c6a8f55..a7bb22f345 100755 --- a/packaging/deb/makedeb.sh +++ b/packaging/deb/makedeb.sh @@ -58,7 +58,7 @@ cp -r ${top_dir}/src/connector/grafanaplugin ${pkg_dir}${install_home_pat cp -r ${top_dir}/src/connector/python ${pkg_dir}${install_home_path}/connector cp -r ${top_dir}/src/connector/go ${pkg_dir}${install_home_path}/connector cp -r ${top_dir}/src/connector/nodejs ${pkg_dir}${install_home_path}/connector -cp ${compile_dir}/build/lib/taos-jdbcdriver*dist.* ${pkg_dir}${install_home_path}/connector +cp ${compile_dir}/build/lib/taos-jdbcdriver*dist.* ${pkg_dir}${install_home_path}/connector ||: cp -r ${compile_dir}/../packaging/deb/DEBIAN ${pkg_dir}/ chmod 755 ${pkg_dir}/DEBIAN/* diff --git a/packaging/release.sh b/packaging/release.sh index 7542a5b4ca..68f947ccab 100755 --- a/packaging/release.sh +++ b/packaging/release.sh @@ -156,9 +156,15 @@ build_time=$(date +"%F %R") # get commint id from git gitinfo=$(git rev-parse --verify HEAD) -enterprise_dir="${top_dir}/../enterprise" -cd ${enterprise_dir} -gitinfoOfInternal=$(git rev-parse --verify HEAD) + +if [[ "$verMode" == "cluster" ]]; then + enterprise_dir="${top_dir}/../enterprise" + cd ${enterprise_dir} + gitinfoOfInternal=$(git rev-parse --verify HEAD) +else + gitinfoOfInternal=NULL +fi + cd ${curr_dir} # 2. cmake executable file @@ -193,23 +199,35 @@ cd ${curr_dir} # 3. Call the corresponding script for packaging if [ "$osType" != "Darwin" ]; then if [[ "$verMode" != "cluster" ]] && [[ "$cpuType" == "x64" ]] && [[ "$dbName" == "taos" ]]; then - echo "====do deb package for the ubuntu system====" - output_dir="${top_dir}/debs" - if [ -d ${output_dir} ]; then - ${csudo} rm -rf ${output_dir} + ret='0' + command -v dpkg >/dev/null 2>&1 || { ret='1'; } + if [ "$ret" -eq 0 ]; then + echo "====do deb package for the ubuntu system====" + output_dir="${top_dir}/debs" + if [ -d ${output_dir} ]; then + ${csudo} rm -rf ${output_dir} + fi + ${csudo} mkdir -p ${output_dir} + cd ${script_dir}/deb + ${csudo} ./makedeb.sh ${compile_dir} ${output_dir} ${verNumber} ${cpuType} ${osType} ${verMode} ${verType} + else + echo "==========dpkg command not exist, so not release deb package!!!" fi - ${csudo} mkdir -p ${output_dir} - cd ${script_dir}/deb - ${csudo} ./makedeb.sh ${compile_dir} ${output_dir} ${verNumber} ${cpuType} ${osType} ${verMode} ${verType} - echo "====do rpm package for the centos system====" - output_dir="${top_dir}/rpms" - if [ -d ${output_dir} ]; then - ${csudo} rm -rf ${output_dir} + ret='0' + command -v rpmbuild >/dev/null 2>&1 || { ret='1'; } + if [ "$ret" -eq 0 ]; then + echo "====do rpm package for the centos system====" + output_dir="${top_dir}/rpms" + if [ -d ${output_dir} ]; then + ${csudo} rm -rf ${output_dir} + fi + ${csudo} mkdir -p ${output_dir} + cd ${script_dir}/rpm + ${csudo} ./makerpm.sh ${compile_dir} ${output_dir} ${verNumber} ${cpuType} ${osType} ${verMode} ${verType} + else + echo "==========rpmbuild command not exist, so not release rpm package!!!" fi - ${csudo} mkdir -p ${output_dir} - cd ${script_dir}/rpm - ${csudo} ./makerpm.sh ${compile_dir} ${output_dir} ${verNumber} ${cpuType} ${osType} ${verMode} ${verType} fi echo "====do tar.gz package for all systems====" diff --git a/packaging/rpm/tdengine.spec b/packaging/rpm/tdengine.spec index 4e40263dc4..e49baeffc7 100644 --- a/packaging/rpm/tdengine.spec +++ b/packaging/rpm/tdengine.spec @@ -65,7 +65,7 @@ cp -r %{_compiledir}/../src/connector/grafanaplugin %{buildroot}%{homepath}/conn cp -r %{_compiledir}/../src/connector/python %{buildroot}%{homepath}/connector cp -r %{_compiledir}/../src/connector/go %{buildroot}%{homepath}/connector cp -r %{_compiledir}/../src/connector/nodejs %{buildroot}%{homepath}/connector -cp %{_compiledir}/build/lib/taos-jdbcdriver*dist.* %{buildroot}%{homepath}/connector +cp %{_compiledir}/build/lib/taos-jdbcdriver*dist.* %{buildroot}%{homepath}/connector ||: cp -r %{_compiledir}/../tests/examples/* %{buildroot}%{homepath}/examples #Scripts executed before installation diff --git a/packaging/tools/make_install.sh b/packaging/tools/make_install.sh index eff70d8035..831012851a 100755 --- a/packaging/tools/make_install.sh +++ b/packaging/tools/make_install.sh @@ -278,11 +278,11 @@ function install_service_on_sysvinit() { # Install taosd service if ((${os_type}==1)); then - ${csudo} cp -f ${script_dir}/../deb/init.d/taosd ${install_main_dir}/init.d - ${csudo} cp ${script_dir}/../deb/init.d/taosd ${service_config_dir} && ${csudo} chmod a+x ${service_config_dir}/taosd + ${csudo} cp -f ${script_dir}/../deb/taosd ${install_main_dir}/init.d + ${csudo} cp ${script_dir}/../deb/taosd ${service_config_dir} && ${csudo} chmod a+x ${service_config_dir}/taosd elif ((${os_type}==2)); then - ${csudo} cp -f ${script_dir}/../rpm/init.d/taosd ${install_main_dir}/init.d - ${csudo} cp ${script_dir}/../rpm/init.d/taosd ${service_config_dir} && ${csudo} chmod a+x ${service_config_dir}/taosd + ${csudo} cp -f ${script_dir}/../rpm/taosd ${install_main_dir}/init.d + ${csudo} cp ${script_dir}/../rpm/taosd ${service_config_dir} && ${csudo} chmod a+x ${service_config_dir}/taosd fi #restart_config_str="taos:2345:respawn:${service_config_dir}/taosd start" diff --git a/packaging/tools/makeclient.sh b/packaging/tools/makeclient.sh index e17c678f26..69fa53c087 100755 --- a/packaging/tools/makeclient.sh +++ b/packaging/tools/makeclient.sh @@ -110,7 +110,7 @@ mkdir -p ${install_dir}/connector if [[ "$pagMode" != "lite" ]] && [[ "$cpuType" != "aarch32" ]]; then if [ "$osType" != "Darwin" ]; then - cp ${build_dir}/lib/*.jar ${install_dir}/connector + cp ${build_dir}/lib/*.jar ${install_dir}/connector ||: fi cp -r ${connector_dir}/grafanaplugin ${install_dir}/connector/ cp -r ${connector_dir}/python ${install_dir}/connector/ diff --git a/packaging/tools/makeclient_power.sh b/packaging/tools/makeclient_power.sh index faa5a03f52..7e8bef0dff 100755 --- a/packaging/tools/makeclient_power.sh +++ b/packaging/tools/makeclient_power.sh @@ -135,7 +135,7 @@ mkdir -p ${install_dir}/connector if [[ "$pagMode" != "lite" ]] && [[ "$cpuType" != "aarch32" ]]; then if [ "$osType" != "Darwin" ]; then - cp ${build_dir}/lib/*.jar ${install_dir}/connector + cp ${build_dir}/lib/*.jar ${install_dir}/connector ||: fi cp -r ${connector_dir}/grafanaplugin ${install_dir}/connector/ cp -r ${connector_dir}/python ${install_dir}/connector/ diff --git a/packaging/tools/makepkg.sh b/packaging/tools/makepkg.sh index 75b45b544e..a6d868ed1d 100755 --- a/packaging/tools/makepkg.sh +++ b/packaging/tools/makepkg.sh @@ -124,7 +124,7 @@ cp ${lib_files} ${install_dir}/driver connector_dir="${code_dir}/connector" mkdir -p ${install_dir}/connector if [[ "$pagMode" != "lite" ]] && [[ "$cpuType" != "aarch32" ]]; then - cp ${build_dir}/lib/*.jar ${install_dir}/connector + cp ${build_dir}/lib/*.jar ${install_dir}/connector ||: cp -r ${connector_dir}/grafanaplugin ${install_dir}/connector/ cp -r ${connector_dir}/python ${install_dir}/connector/ cp -r ${connector_dir}/go ${install_dir}/connector diff --git a/packaging/tools/makepkg_power.sh b/packaging/tools/makepkg_power.sh index 2c02b99787..0ffcb63e3f 100755 --- a/packaging/tools/makepkg_power.sh +++ b/packaging/tools/makepkg_power.sh @@ -156,7 +156,7 @@ cp ${lib_files} ${install_dir}/driver connector_dir="${code_dir}/connector" mkdir -p ${install_dir}/connector if [[ "$pagMode" != "lite" ]] && [[ "$cpuType" != "aarch32" ]]; then - cp ${build_dir}/lib/*.jar ${install_dir}/connector + cp ${build_dir}/lib/*.jar ${install_dir}/connector ||: cp -r ${connector_dir}/grafanaplugin ${install_dir}/connector/ cp -r ${connector_dir}/python ${install_dir}/connector/ cp -r ${connector_dir}/go ${install_dir}/connector diff --git a/src/client/inc/tscUtil.h b/src/client/inc/tscUtil.h index 3df493349e..d86e1aa0fb 100644 --- a/src/client/inc/tscUtil.h +++ b/src/client/inc/tscUtil.h @@ -82,6 +82,7 @@ typedef struct SJoinSupporter { char* pIdTagList; // result of first stage tags int32_t totalLen; int32_t num; + SArray* pVgroupTables; } SJoinSupporter; typedef struct SVgroupTableInfo { @@ -215,7 +216,7 @@ SQueryInfo *tscGetQueryInfoDetailSafely(SSqlCmd *pCmd, int32_t subClauseIndex); void tscClearTableMetaInfo(STableMetaInfo* pTableMetaInfo, bool removeFromCache); STableMetaInfo* tscAddTableMetaInfo(SQueryInfo* pQueryInfo, const char* name, STableMeta* pTableMeta, - SVgroupsInfo* vgroupList, SArray* pTagCols); + SVgroupsInfo* vgroupList, SArray* pTagCols, SArray* pVgroupTables); STableMetaInfo* tscAddEmptyMetaInfo(SQueryInfo *pQueryInfo); int32_t tscAddSubqueryInfo(SSqlCmd *pCmd); @@ -224,6 +225,8 @@ void tscInitQueryInfo(SQueryInfo* pQueryInfo); void tscClearSubqueryInfo(SSqlCmd* pCmd); void tscFreeVgroupTableInfo(SArray* pVgroupTables); +SArray* tscCloneVgroupTableInfo(SArray* pVgroupTables); +void tscRemoveVgroupTableGroup(SArray* pVgroupTable, int32_t index); int tscGetSTableVgroupInfo(SSqlObj* pSql, int32_t clauseIndex); int tscGetTableMeta(SSqlObj* pSql, STableMetaInfo* pTableMetaInfo); diff --git a/src/client/inc/tsclient.h b/src/client/inc/tsclient.h index fa215db270..78b0bcce9c 100644 --- a/src/client/inc/tsclient.h +++ b/src/client/inc/tsclient.h @@ -30,6 +30,7 @@ extern "C" { #include "tsqlfunction.h" #include "tutil.h" #include "tcache.h" +#include "tref.h" #include "qExecutor.h" #include "qSqlparser.h" @@ -446,7 +447,7 @@ void tscFreeSqlObj(SSqlObj *pSql); void tscFreeRegisteredSqlObj(void *pSql); void tscFreeTableMetaHelper(void *pTableMeta); -void tscCloseTscObj(STscObj *pObj); +void tscCloseTscObj(void *pObj); // todo move to taos? or create a new file: taos_internal.h TAOS *taos_connect_a(char *ip, char *user, char *pass, char *db, uint16_t port, void (*fp)(void *, TAOS_RES *, int), @@ -516,6 +517,7 @@ extern void * tscQhandle; extern int tscKeepConn[]; extern int tsInsertHeadSize; extern int tscNumOfThreads; +extern int tscRefId; extern SRpcCorEpSet tscMgmtEpSet; diff --git a/src/client/src/tscFunctionImpl.c b/src/client/src/tscFunctionImpl.c index 1b4f92d3fc..12d3b7dfd3 100644 --- a/src/client/src/tscFunctionImpl.c +++ b/src/client/src/tscFunctionImpl.c @@ -2461,12 +2461,22 @@ static void percentile_function(SQLFunctionCtx *pCtx) { // the first stage, only acquire the min/max value if (pInfo->stage == 0) { if (pCtx->preAggVals.isSet) { - if (GET_DOUBLE_VAL(&pInfo->minval) > pCtx->preAggVals.statis.min) { - SET_DOUBLE_VAL(&pInfo->minval, (double)pCtx->preAggVals.statis.min); + double tmin = 0.0, tmax = 0.0; + if (pCtx->inputType >= TSDB_DATA_TYPE_TINYINT && pCtx->inputType <= TSDB_DATA_TYPE_BIGINT) { + tmin = (double)GET_INT64_VAL(&pCtx->preAggVals.statis.min); + tmax = (double)GET_INT64_VAL(&pCtx->preAggVals.statis.max); + } else if (pCtx->inputType == TSDB_DATA_TYPE_DOUBLE || pCtx->inputType == TSDB_DATA_TYPE_FLOAT) { + tmin = GET_DOUBLE_VAL(&pCtx->preAggVals.statis.min); + tmax = GET_DOUBLE_VAL(&pCtx->preAggVals.statis.max); + } else { + assert(true); + } + if (GET_DOUBLE_VAL(&pInfo->minval) > tmin) { + SET_DOUBLE_VAL(&pInfo->minval, tmin); } - if (GET_DOUBLE_VAL(&pInfo->maxval) < pCtx->preAggVals.statis.max) { - SET_DOUBLE_VAL(&pInfo->maxval, (double)pCtx->preAggVals.statis.max); + if (GET_DOUBLE_VAL(&pInfo->maxval) < tmax) { + SET_DOUBLE_VAL(&pInfo->maxval, tmax); } pInfo->numOfElems += (pCtx->size - pCtx->preAggVals.statis.numOfNull); @@ -4025,11 +4035,11 @@ static void ts_comp_function(SQLFunctionCtx *pCtx) { // primary ts must be existed, so no need to check its existance if (pCtx->order == TSDB_ORDER_ASC) { - tsBufAppend(pTSbuf, 0, &pCtx->tag, input, pCtx->size * TSDB_KEYSIZE); + tsBufAppend(pTSbuf, (int32_t)pCtx->param[0].i64Key, &pCtx->tag, input, pCtx->size * TSDB_KEYSIZE); } else { for (int32_t i = pCtx->size - 1; i >= 0; --i) { char *d = GET_INPUT_CHAR_INDEX(pCtx, i); - tsBufAppend(pTSbuf, 0, &pCtx->tag, d, TSDB_KEYSIZE); + tsBufAppend(pTSbuf, (int32_t)pCtx->param[0].i64Key, &pCtx->tag, d, (int32_t)TSDB_KEYSIZE); } } @@ -4048,7 +4058,7 @@ static void ts_comp_function_f(SQLFunctionCtx *pCtx, int32_t index) { STSBuf *pTSbuf = pInfo->pTSBuf; - tsBufAppend(pTSbuf, 0, &pCtx->tag, pData, TSDB_KEYSIZE); + tsBufAppend(pTSbuf, (int32_t)pCtx->param[0].i64Key, &pCtx->tag, pData, TSDB_KEYSIZE); SET_VAL(pCtx, pCtx->size, 1); pResInfo->hasResult = DATA_SET_FLAG; diff --git a/src/client/src/tscLocalMerge.c b/src/client/src/tscLocalMerge.c index 44ccb2471a..18d72e2d1e 100644 --- a/src/client/src/tscLocalMerge.c +++ b/src/client/src/tscLocalMerge.c @@ -698,7 +698,8 @@ int32_t tscLocalReducerEnvCreate(SSqlObj *pSql, tExtMemBuffer ***pMemBuffer, tOr pg *= 2; } - size_t numOfSubs = pTableMetaInfo->vgroupList->numOfVgroups; + size_t numOfSubs = pSql->subState.numOfSub; + assert(numOfSubs <= pTableMetaInfo->vgroupList->numOfVgroups); for (int32_t i = 0; i < numOfSubs; ++i) { (*pMemBuffer)[i] = createExtMemBuffer(nBufferSizes, rlen, pg, pModel); (*pMemBuffer)[i]->flushModel = MULTIPLE_APPEND_MODEL; diff --git a/src/client/src/tscSQLParser.c b/src/client/src/tscSQLParser.c index e2573f7e19..815af79d8f 100644 --- a/src/client/src/tscSQLParser.c +++ b/src/client/src/tscSQLParser.c @@ -877,22 +877,13 @@ static bool validateTableColumnInfo(tFieldList* pFieldList, SSqlCmd* pCmd) { int32_t nLen = 0; for (int32_t i = 0; i < pFieldList->nField; ++i) { - if (pFieldList->p[i].bytes == 0) { + TAOS_FIELD* pField = &pFieldList->p[i]; + + if (pField->bytes == 0) { invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), msg5); return false; } - nLen += pFieldList->p[i].bytes; - } - // max row length must be less than TSDB_MAX_BYTES_PER_ROW - if (nLen > TSDB_MAX_BYTES_PER_ROW) { - invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), msg2); - return false; - } - - // field name must be unique - for (int32_t i = 0; i < pFieldList->nField; ++i) { - TAOS_FIELD* pField = &pFieldList->p[i]; if (pField->type < TSDB_DATA_TYPE_BOOL || pField->type > TSDB_DATA_TYPE_NCHAR) { invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), msg4); return false; @@ -909,10 +900,19 @@ static bool validateTableColumnInfo(tFieldList* pFieldList, SSqlCmd* pCmd) { return false; } + // field name must be unique if (has(pFieldList, i + 1, pFieldList->p[i].name) == true) { invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), msg3); return false; } + + nLen += pField->bytes; + } + + // max row length must be less than TSDB_MAX_BYTES_PER_ROW + if (nLen > TSDB_MAX_BYTES_PER_ROW) { + invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), msg2); + return false; } return true; diff --git a/src/client/src/tscSchemaUtil.c b/src/client/src/tscSchemaUtil.c index dfd707344c..ac740555af 100644 --- a/src/client/src/tscSchemaUtil.c +++ b/src/client/src/tscSchemaUtil.c @@ -177,7 +177,7 @@ STableMeta* tscCreateTableMetaFromMsg(STableMetaMsg* pTableMetaMsg, size_t* size pVgroupInfo->epAddr[i].port = pEpMsg->port; } - tscInitCorVgroupInfo(&pTableMeta->corVgroupInfo, &pTableMeta->vgroupInfo); + tscInitCorVgroupInfo(&pTableMeta->corVgroupInfo, pVgroupInfo); pTableMeta->sversion = pTableMetaMsg->sversion; pTableMeta->tversion = pTableMetaMsg->tversion; diff --git a/src/client/src/tscServer.c b/src/client/src/tscServer.c index a0841fa234..a65e7da008 100644 --- a/src/client/src/tscServer.c +++ b/src/client/src/tscServer.c @@ -190,18 +190,19 @@ void tscProcessHeartBeatRsp(void *param, TAOS_RES *tres, int code) { void tscProcessActivityTimer(void *handle, void *tmrId) { STscObj *pObj = (STscObj *)handle; - if (pObj == NULL || pObj->signature != pObj) { + + int ret = taosAcquireRef(tscRefId, pObj); + if (ret < 0) { + tscTrace("%p failed to acquire TSC obj, reason:%s", pObj, tstrerror(ret)); return; } SSqlObj* pHB = pObj->pHb; - if (pObj->pTimer != tmrId || pHB == NULL) { - return; - } void** p = taosCacheAcquireByKey(tscObjCache, &pHB, sizeof(TSDB_CACHE_PTR_TYPE)); if (p == NULL) { tscWarn("%p HB object has been released already", pHB); + taosReleaseRef(tscRefId, pObj); return; } @@ -213,6 +214,8 @@ void tscProcessActivityTimer(void *handle, void *tmrId) { if (code != TSDB_CODE_SUCCESS) { tscError("%p failed to sent HB to server, reason:%s", pHB, tstrerror(code)); } + + taosReleaseRef(tscRefId, pObj); } int tscSendMsgToServer(SSqlObj *pSql) { @@ -481,14 +484,25 @@ int tscBuildFetchMsg(SSqlObj *pSql, SSqlInfo *pInfo) { if (UTIL_TABLE_IS_SUPER_TABLE(pTableMetaInfo)) { int32_t vgIndex = pTableMetaInfo->vgroupIndex; - - SVgroupsInfo* pVgroupInfo = pTableMetaInfo->vgroupList; - assert(pVgroupInfo->vgroups[vgIndex].vgId > 0 && vgIndex < pTableMetaInfo->vgroupList->numOfVgroups); + if (pTableMetaInfo->pVgroupTables == NULL) { + SVgroupsInfo *pVgroupInfo = pTableMetaInfo->vgroupList; + assert(pVgroupInfo->vgroups[vgIndex].vgId > 0 && vgIndex < pTableMetaInfo->vgroupList->numOfVgroups); - pRetrieveMsg->header.vgId = htonl(pVgroupInfo->vgroups[vgIndex].vgId); + pRetrieveMsg->header.vgId = htonl(pVgroupInfo->vgroups[vgIndex].vgId); + tscDebug("%p build fetch msg from vgId:%d, vgIndex:%d", pSql, pVgroupInfo->vgroups[vgIndex].vgId, vgIndex); + } else { + int32_t numOfVgroups = (int32_t)taosArrayGetSize(pTableMetaInfo->pVgroupTables); + assert(vgIndex >= 0 && vgIndex < numOfVgroups); + + SVgroupTableInfo* pTableIdList = taosArrayGet(pTableMetaInfo->pVgroupTables, vgIndex); + + pRetrieveMsg->header.vgId = htonl(pTableIdList->vgInfo.vgId); + tscDebug("%p build fetch msg from vgId:%d, vgIndex:%d", pSql, pTableIdList->vgInfo.vgId, vgIndex); + } } else { STableMeta* pTableMeta = pTableMetaInfo->pTableMeta; pRetrieveMsg->header.vgId = htonl(pTableMeta->vgroupInfo.vgId); + tscDebug("%p build fetch msg from only one vgroup, vgId:%d", pSql, pTableMeta->vgroupInfo.vgId); } pSql->cmd.payloadLen = sizeof(SRetrieveTableMsg); @@ -662,12 +676,12 @@ int tscBuildQueryMsg(SSqlObj *pSql, SSqlInfo *pInfo) { pQueryMsg->limit = htobe64(pQueryInfo->limit.limit); pQueryMsg->offset = htobe64(pQueryInfo->limit.offset); pQueryMsg->numOfCols = htons((int16_t)taosArrayGetSize(pQueryInfo->colList)); - pQueryMsg->interval.interval = htobe64(pQueryInfo->interval.interval); - pQueryMsg->interval.sliding = htobe64(pQueryInfo->interval.sliding); + pQueryMsg->interval.interval = htobe64(pQueryInfo->interval.interval); + pQueryMsg->interval.sliding = htobe64(pQueryInfo->interval.sliding); pQueryMsg->interval.offset = htobe64(pQueryInfo->interval.offset); pQueryMsg->interval.intervalUnit = pQueryInfo->interval.intervalUnit; - pQueryMsg->interval.slidingUnit = pQueryInfo->interval.slidingUnit; - pQueryMsg->interval.offsetUnit = pQueryInfo->interval.offsetUnit; + pQueryMsg->interval.slidingUnit = pQueryInfo->interval.slidingUnit; + pQueryMsg->interval.offsetUnit = pQueryInfo->interval.offsetUnit; pQueryMsg->numOfGroupCols = htons(pQueryInfo->groupbyExpr.numOfGroupCols); pQueryMsg->numOfTags = htonl(numOfTags); pQueryMsg->tagNameRelType = htons(pQueryInfo->tagCond.relType); @@ -850,7 +864,8 @@ int tscBuildQueryMsg(SSqlObj *pSql, SSqlInfo *pInfo) { int32_t numOfBlocks = 0; if (pQueryInfo->tsBuf != NULL) { - STSVnodeBlockInfo *pBlockInfo = tsBufGetVnodeBlockInfo(pQueryInfo->tsBuf, pTableMetaInfo->vgroupIndex); + int32_t vnodeId = htonl(pQueryMsg->head.vgId); + STSVnodeBlockInfo *pBlockInfo = tsBufGetVnodeBlockInfo(pQueryInfo->tsBuf, vnodeId); assert(QUERY_IS_JOIN_QUERY(pQueryInfo->type) && pBlockInfo != NULL); // this query should not be sent // todo refactor @@ -2271,7 +2286,7 @@ int tscGetSTableVgroupInfo(SSqlObj *pSql, int32_t clauseIndex) { for (int32_t i = 0; i < pQueryInfo->numOfTables; ++i) { STableMetaInfo *pMInfo = tscGetMetaInfo(pQueryInfo, i); STableMeta *pTableMeta = taosCacheAcquireByData(tscMetaCache, pMInfo->pTableMeta); - tscAddTableMetaInfo(pNewQueryInfo, pMInfo->name, pTableMeta, NULL, pMInfo->tagColList); + tscAddTableMetaInfo(pNewQueryInfo, pMInfo->name, pTableMeta, NULL, pMInfo->tagColList, pMInfo->pVgroupTables); } if ((code = tscAllocPayload(&pNew->cmd, TSDB_DEFAULT_PAYLOAD_SIZE)) != TSDB_CODE_SUCCESS) { diff --git a/src/client/src/tscSql.c b/src/client/src/tscSql.c index 8cac9b3398..89dfa24e8f 100644 --- a/src/client/src/tscSql.c +++ b/src/client/src/tscSql.c @@ -161,6 +161,7 @@ static SSqlObj *taosConnectImpl(const char *ip, const char *user, const char *pa registerSqlObj(pSql); tsInsertHeadSize = sizeof(SMsgDesc) + sizeof(SSubmitMsg); + taosAddRef(tscRefId, pObj); return pSql; } @@ -296,7 +297,8 @@ void taos_close(TAOS *taos) { } tscDebug("%p all sqlObj are freed, free tscObj and close dnodeConn:%p", pObj, pObj->pDnodeConn); - tscCloseTscObj(pObj); + + taosRemoveRef(tscRefId, pObj); } void waitForQueryRsp(void *param, TAOS_RES *tres, int code) { diff --git a/src/client/src/tscSubquery.c b/src/client/src/tscSubquery.c index 6b615c3a9b..794b7a068b 100644 --- a/src/client/src/tscSubquery.c +++ b/src/client/src/tscSubquery.c @@ -23,7 +23,6 @@ #include "tscSubquery.h" #include "tschemautil.h" #include "tsclient.h" -#include "tscSubquery.h" typedef struct SInsertSupporter { SSqlObj* pSql; @@ -59,6 +58,8 @@ static int64_t doTSBlockIntersect(SSqlObj* pSql, SJoinSupporter* pSupporter1, SJ pSubQueryInfo1->tsBuf = output1; pSubQueryInfo2->tsBuf = output2; + TSKEY st = taosGetTimestampUs(); + // no result generated, return directly if (pSupporter1->pTSBuf == NULL || pSupporter2->pTSBuf == NULL) { tscDebug("%p at least one ts-comp is empty, 0 for secondary query after ts blocks intersecting", pSql); @@ -95,7 +96,7 @@ static int64_t doTSBlockIntersect(SSqlObj* pSql, SJoinSupporter* pSupporter1, SJ tscInfo("%" PRId64 ", tags:%"PRId64" \t %" PRId64 ", tags:%"PRId64, elem1.ts, elem1.tag.i64Key, elem2.ts, elem2.tag.i64Key); #endif - int32_t res = tVariantCompare(&elem1.tag, &elem2.tag); + int32_t res = tVariantCompare(elem1.tag, elem2.tag); if (res == -1 || (res == 0 && tsCompare(order, elem1.ts, elem2.ts))) { if (!tsBufNextPos(pSupporter1->pTSBuf)) { break; @@ -122,8 +123,9 @@ static int64_t doTSBlockIntersect(SSqlObj* pSql, SJoinSupporter* pSupporter1, SJ win->ekey = elem1.ts; } - tsBufAppend(output1, elem1.vnode, &elem1.tag, (const char*)&elem1.ts, sizeof(elem1.ts)); - tsBufAppend(output2, elem2.vnode, &elem2.tag, (const char*)&elem2.ts, sizeof(elem2.ts)); + tsBufAppend(output1, elem1.vnode, elem1.tag, (const char*)&elem1.ts, sizeof(elem1.ts)); + tsBufAppend(output2, elem2.vnode, elem2.tag, (const char*)&elem2.ts, sizeof(elem2.ts)); + } else { pLimit->offset -= 1; } @@ -158,9 +160,10 @@ static int64_t doTSBlockIntersect(SSqlObj* pSql, SJoinSupporter* pSupporter1, SJ tsBufDestroy(pSupporter1->pTSBuf); tsBufDestroy(pSupporter2->pTSBuf); - tscDebug("%p input1:%" PRId64 ", input2:%" PRId64 ", final:%" PRId64 " for secondary query after ts blocks " - "intersecting, skey:%" PRId64 ", ekey:%" PRId64, pSql, numOfInput1, numOfInput2, output1->numOfTotal, - win->skey, win->ekey); + TSKEY et = taosGetTimestampUs(); + tscDebug("%p input1:%" PRId64 ", input2:%" PRId64 ", final:%" PRId64 " in %d vnodes for secondary query after ts blocks " + "intersecting, skey:%" PRId64 ", ekey:%" PRId64 ", numOfVnode:%d, elasped time:%"PRId64" us", pSql, numOfInput1, numOfInput2, output1->numOfTotal, + output1->numOfVnodes, win->skey, win->ekey, tsBufGetNumOfVnodes(output1), et - st); return output1->numOfTotal; } @@ -216,6 +219,11 @@ static void tscDestroyJoinSupporter(SJoinSupporter* pSupporter) { pSupporter->f = NULL; } + if (pSupporter->pVgroupTables != NULL) { + taosArrayDestroy(pSupporter->pVgroupTables); + pSupporter->pVgroupTables = NULL; + } + taosTFree(pSupporter->pIdTagList); tscTagCondRelease(&pSupporter->tagCond); free(pSupporter); @@ -305,7 +313,6 @@ static int32_t tscLaunchRealSubqueries(SSqlObj* pSql) { // set the second stage sub query for join process TSDB_QUERY_SET_TYPE(pQueryInfo->type, TSDB_QUERY_TYPE_JOIN_SEC_STAGE); - memcpy(&pQueryInfo->interval, &pSupporter->interval, sizeof(pQueryInfo->interval)); tscTagCondCopy(&pQueryInfo->tagCond, &pSupporter->tagCond); @@ -324,7 +331,9 @@ static int32_t tscLaunchRealSubqueries(SSqlObj* pSql) { tscFieldInfoUpdateOffset(pNewQueryInfo); STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pNewQueryInfo, 0); - + pTableMetaInfo->pVgroupTables = pSupporter->pVgroupTables; + pSupporter->pVgroupTables = NULL; + /* * When handling the projection query, the offset value will be modified for table-table join, which is changed * during the timestamp intersection. @@ -356,10 +365,39 @@ static int32_t tscLaunchRealSubqueries(SSqlObj* pSql) { int16_t colId = tscGetJoinTagColIdByUid(&pQueryInfo->tagCond, pTableMetaInfo->pTableMeta->id.uid); // set the tag column id for executor to extract correct tag value - pExpr->param[0].i64Key = colId; + pExpr->param[0] = (tVariant) {.i64Key = colId, .nType = TSDB_DATA_TYPE_BIGINT, .nLen = sizeof(int64_t)}; pExpr->numOfParams = 1; } + int32_t num = 0; + int32_t *list = NULL; + tsBufGetVnodeIdList(pNewQueryInfo->tsBuf, &num, &list); + + if (pTableMetaInfo->pVgroupTables != NULL) { + for(int32_t k = 0; k < taosArrayGetSize(pTableMetaInfo->pVgroupTables);) { + SVgroupTableInfo* p = taosArrayGet(pTableMetaInfo->pVgroupTables, k); + + bool found = false; + for(int32_t f = 0; f < num; ++f) { + if (p->vgInfo.vgId == list[f]) { + found = true; + break; + } + } + + if (!found) { + tscRemoveVgroupTableGroup(pTableMetaInfo->pVgroupTables, k); + } else { + k++; + } + } + + assert(taosArrayGetSize(pTableMetaInfo->pVgroupTables) > 0); + TSDB_QUERY_SET_TYPE(pQueryInfo->type, TSDB_QUERY_TYPE_MULTITABLE_QUERY); + } + + taosTFree(list); + size_t numOfCols = taosArrayGetSize(pNewQueryInfo->colList); tscDebug("%p subquery:%p tableIndex:%d, vgroupIndex:%d, type:%d, exprInfo:%" PRIzu ", colList:%" PRIzu ", fieldsInfo:%d, name:%s", pSql, pNew, 0, pTableMetaInfo->vgroupIndex, pNewQueryInfo->type, taosArrayGetSize(pNewQueryInfo->exprList), @@ -418,6 +456,8 @@ static void quitAllSubquery(SSqlObj* pSqlObj, SJoinSupporter* pSupporter) { static void updateQueryTimeRange(SQueryInfo* pQueryInfo, STimeWindow* win) { assert(pQueryInfo->window.skey <= win->skey && pQueryInfo->window.ekey >= win->ekey); pQueryInfo->window = *win; + + } int32_t tscCompareTidTags(const void* p1, const void* p2) { @@ -474,10 +514,11 @@ static void issueTSCompQuery(SSqlObj* pSql, SJoinSupporter* pSupporter, SSqlObj* SSqlCmd* pCmd = &pSql->cmd; tscClearSubqueryInfo(pCmd); tscFreeSqlResult(pSql); - + SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(pCmd, 0); + assert(pQueryInfo->numOfTables == 1); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); - tscInitQueryInfo(pQueryInfo); TSDB_QUERY_CLEAR_TYPE(pQueryInfo->type, TSDB_QUERY_TYPE_TAG_FILTER_QUERY); @@ -524,13 +565,7 @@ static void issueTSCompQuery(SSqlObj* pSql, SJoinSupporter* pSupporter, SSqlObj* tscProcessSql(pSql); } -static bool checkForDuplicateTagVal(SQueryInfo* pQueryInfo, SJoinSupporter* p1, SSqlObj* pPSqlObj) { - STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); - - SSchema* pSchema = tscGetTableTagSchema(pTableMetaInfo->pTableMeta);// todo: tags mismatch, tags not completed - SColumn *pCol = taosArrayGetP(pTableMetaInfo->tagColList, 0); - SSchema *pColSchema = &pSchema[pCol->colIndex.columnIndex]; - +static bool checkForDuplicateTagVal(SSchema* pColSchema, SJoinSupporter* p1, SSqlObj* pPSqlObj) { for(int32_t i = 1; i < p1->num; ++i) { STidTags* prev = (STidTags*) varDataVal(p1->pIdTagList + (i - 1) * p1->tagSize); STidTags* p = (STidTags*) varDataVal(p1->pIdTagList + i * p1->tagSize); @@ -564,7 +599,7 @@ static int32_t getIntersectionOfTableTuple(SQueryInfo* pQueryInfo, SSqlObj* pPar *s1 = taosArrayInit(p1->num, p1->tagSize - sizeof(int16_t)); *s2 = taosArrayInit(p2->num, p2->tagSize - sizeof(int16_t)); - if (!(checkForDuplicateTagVal(pQueryInfo, p1, pParentSql) && checkForDuplicateTagVal(pQueryInfo, p2, pParentSql))) { + if (!(checkForDuplicateTagVal(pColSchema, p1, pParentSql) && checkForDuplicateTagVal(pColSchema, p2, pParentSql))) { return TSDB_CODE_QRY_DUP_JOIN_KEY; } @@ -708,6 +743,12 @@ static void tidTagRetrieveCallback(void* param, TAOS_RES* tres, int32_t numOfRow STableMetaInfo* pTableMetaInfo2 = tscGetMetaInfo(pQueryInfo2, 0); tscBuildVgroupTableInfo(pParentSql, pTableMetaInfo2, s2); + SSqlObj* psub1 = pParentSql->pSubs[0]; + ((SJoinSupporter*)psub1->param)->pVgroupTables = tscCloneVgroupTableInfo(pTableMetaInfo1->pVgroupTables); + + SSqlObj* psub2 = pParentSql->pSubs[1]; + ((SJoinSupporter*)psub2->param)->pVgroupTables = tscCloneVgroupTableInfo(pTableMetaInfo2->pVgroupTables); + pParentSql->subState.numOfSub = 2; pParentSql->subState.numOfRemain = pParentSql->subState.numOfSub; @@ -766,9 +807,7 @@ static void tsCompRetrieveCallback(void* param, TAOS_RES* tres, int32_t numOfRow pSupporter->pTSBuf = pBuf; } else { assert(pQueryInfo->numOfTables == 1); // for subquery, only one - STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); - - tsBufMerge(pSupporter->pTSBuf, pBuf, pTableMetaInfo->vgroupIndex); + tsBufMerge(pSupporter->pTSBuf, pBuf); tsBufDestroy(pBuf); } @@ -835,6 +874,8 @@ static void tsCompRetrieveCallback(void* param, TAOS_RES* tres, int32_t numOfRow // launch the query the retrieve actual results from vnode along with the filtered timestamp SQueryInfo* pPQueryInfo = tscGetQueryInfoDetail(&pParentSql->cmd, pParentSql->cmd.clauseIndex); updateQueryTimeRange(pPQueryInfo, &win); + + //update the vgroup that involved in real data query tscLaunchRealSubqueries(pParentSql); } @@ -868,20 +909,27 @@ static void joinRetrieveFinalResCallback(void* param, TAOS_RES* tres, int numOfR assert(pQueryInfo->numOfTables == 1); // for projection query, need to try next vnode if current vnode is exhausted - if ((++pTableMetaInfo->vgroupIndex) < pTableMetaInfo->vgroupList->numOfVgroups) { - pState->numOfRemain = 1; - pState->numOfSub = 1; + int32_t numOfVgroups = 0; // TODO refactor + if (pTableMetaInfo->pVgroupTables != NULL) { + numOfVgroups = (int32_t)taosArrayGetSize(pTableMetaInfo->pVgroupTables); + } else { + numOfVgroups = pTableMetaInfo->vgroupList->numOfVgroups; + } + if ((++pTableMetaInfo->vgroupIndex) < numOfVgroups) { + tscDebug("%p no result in current vnode anymore, try next vnode, vgIndex:%d", pSql, pTableMetaInfo->vgroupIndex); pSql->cmd.command = TSDB_SQL_SELECT; pSql->fp = tscJoinQueryCallback; - tscProcessSql(pSql); + tscProcessSql(pSql); return; + } else { + tscDebug("%p no result in current subquery anymore", pSql); } } - if (atomic_sub_fetch_32(&pParentSql->subState.numOfRemain, 1) > 0) { - tscDebug("%p sub:%p completed, remain:%d, total:%d", pParentSql, tres, pParentSql->subState.numOfRemain, pState->numOfSub); + if (atomic_sub_fetch_32(&pState->numOfRemain, 1) > 0) { + tscDebug("%p sub:%p completed, remain:%d, total:%d", pParentSql, tres, pState->numOfRemain, pState->numOfSub); return; } @@ -895,60 +943,60 @@ static void joinRetrieveFinalResCallback(void* param, TAOS_RES* tres, int numOfR // update the records for each subquery in parent sql object. for (int32_t i = 0; i < pState->numOfSub; ++i) { if (pParentSql->pSubs[i] == NULL) { + tscDebug("%p %p sub:%d not retrieve data", pParentSql, NULL, i); continue; } SSqlRes* pRes1 = &pParentSql->pSubs[i]->res; - pRes1->numOfClauseTotal += pRes1->numOfRows; + + if (pRes1->row > 0 && pRes1->numOfRows > 0) { + tscDebug("%p sub:%p index:%d numOfRows:%"PRId64" total:%"PRId64 " (not retrieve)", pParentSql, pParentSql->pSubs[i], i, + pRes1->numOfRows, pRes1->numOfTotal); + assert(pRes1->row < pRes1->numOfRows); + } else { + pRes1->numOfClauseTotal += pRes1->numOfRows; + tscDebug("%p sub:%p index:%d numOfRows:%"PRId64" total:%"PRId64, pParentSql, pParentSql->pSubs[i], i, + pRes1->numOfRows, pRes1->numOfTotal); + } } // data has retrieved to client, build the join results tscBuildResFromSubqueries(pParentSql); } -static SJoinSupporter* tscUpdateSubqueryStatus(SSqlObj* pSql, int32_t numOfFetch) { - int32_t notInvolved = 0; - SJoinSupporter* pSupporter = NULL; - SSubqueryState* pState = &pSql->subState; - - for(int32_t i = 0; i < pSql->subState.numOfSub; ++i) { - if (pSql->pSubs[i] == NULL) { - notInvolved++; - } else { - pSupporter = (SJoinSupporter*)pSql->pSubs[i]->param; - } - } - - pState->numOfRemain = numOfFetch; - return pSupporter; -} - void tscFetchDatablockFromSubquery(SSqlObj* pSql) { assert(pSql->subState.numOfSub >= 1); int32_t numOfFetch = 0; - bool hasData = true; + bool hasData = true; + bool reachLimit = false; + + // if the subquery is NULL, it does not involved in the final result generation for (int32_t i = 0; i < pSql->subState.numOfSub; ++i) { - // if the subquery is NULL, it does not involved in the final result generation SSqlObj* pSub = pSql->pSubs[i]; if (pSub == NULL) { continue; } - + SSqlRes *pRes = &pSub->res; + SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(&pSub->cmd, 0); if (!tscHasReachLimitation(pQueryInfo, pRes)) { if (pRes->row >= pRes->numOfRows) { + // no data left in current result buffer hasData = false; + // The current query is completed for the active vnode, try next vnode if exists + // If it is completed, no need to fetch anymore. if (!pRes->completed) { numOfFetch++; } } } else { // has reach the limitation, no data anymore if (pRes->row >= pRes->numOfRows) { - hasData = false; + reachLimit = true; + hasData = false; break; } } @@ -958,29 +1006,102 @@ void tscFetchDatablockFromSubquery(SSqlObj* pSql) { if (hasData) { tscBuildResFromSubqueries(pSql); return; - } else if (numOfFetch <= 0) { + } + + // If at least one subquery is completed in current vnode, try the next vnode in case of multi-vnode + // super table projection query. + if (reachLimit) { pSql->res.completed = true; freeJoinSubqueryObj(pSql); - + if (pSql->res.code == TSDB_CODE_SUCCESS) { (*pSql->fp)(pSql->param, pSql, 0); } else { tscQueueAsyncRes(pSql); } - + + return; + } + + if (numOfFetch <= 0) { + bool tryNextVnode = false; + + SSqlObj* pp = pSql->pSubs[0]; + SQueryInfo* pi = tscGetQueryInfoDetail(&pp->cmd, 0); + + // get the number of subquery that need to retrieve the next vnode. + if (tscNonOrderedProjectionQueryOnSTable(pi, 0)) { + for (int32_t i = 0; i < pSql->subState.numOfSub; ++i) { + SSqlObj* pSub = pSql->pSubs[i]; + if (pSub != NULL && pSub->res.row >= pSub->res.numOfRows && pSub->res.completed) { + pSql->subState.numOfRemain++; + } + } + } + + for (int32_t i = 0; i < pSql->subState.numOfSub; ++i) { + SSqlObj* pSub = pSql->pSubs[i]; + if (pSub == NULL) { + continue; + } + + SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(&pSub->cmd, 0); + + if (tscNonOrderedProjectionQueryOnSTable(pQueryInfo, 0) && pSub->res.row >= pSub->res.numOfRows && + pSub->res.completed) { + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); + assert(pQueryInfo->numOfTables == 1); + + // for projection query, need to try next vnode if current vnode is exhausted + int32_t numOfVgroups = 0; // TODO refactor + if (pTableMetaInfo->pVgroupTables != NULL) { + numOfVgroups = (int32_t)taosArrayGetSize(pTableMetaInfo->pVgroupTables); + } else { + numOfVgroups = pTableMetaInfo->vgroupList->numOfVgroups; + } + + if ((++pTableMetaInfo->vgroupIndex) < numOfVgroups) { + tscDebug("%p no result in current vnode anymore, try next vnode, vgIndex:%d", pSub, + pTableMetaInfo->vgroupIndex); + pSub->cmd.command = TSDB_SQL_SELECT; + pSub->fp = tscJoinQueryCallback; + + tscProcessSql(pSub); + tryNextVnode = true; + } else { + tscDebug("%p no result in current subquery anymore", pSub); + } + } + } + + if (tryNextVnode) { + return; + } + + pSql->res.completed = true; + freeJoinSubqueryObj(pSql); + + if (pSql->res.code == TSDB_CODE_SUCCESS) { + (*pSql->fp)(pSql->param, pSql, 0); + } else { + tscQueueAsyncRes(pSql); + } + return; } // TODO multi-vnode retrieve for projection query with limitation has bugs, since the global limiation is not handled + // retrieve data from current vnode. tscDebug("%p retrieve data from %d subqueries", pSql, numOfFetch); - SJoinSupporter* pSupporter = tscUpdateSubqueryStatus(pSql, numOfFetch); - + SJoinSupporter* pSupporter = NULL; + pSql->subState.numOfRemain = numOfFetch; + for (int32_t i = 0; i < pSql->subState.numOfSub; ++i) { SSqlObj* pSql1 = pSql->pSubs[i]; if (pSql1 == NULL) { continue; } - + SSqlRes* pRes1 = &pSql1->res; SSqlCmd* pCmd1 = &pSql1->cmd; @@ -1122,7 +1243,7 @@ void tscJoinQueryCallback(void* param, TAOS_RES* tres, int code) { * data instead of returning to its invoker */ if (pTableMetaInfo->vgroupIndex > 0 && tscNonOrderedProjectionQueryOnSTable(pQueryInfo, 0)) { - pParentSql->subState.numOfRemain = pParentSql->subState.numOfSub; // reset the record value +// pParentSql->subState.numOfRemain = pParentSql->subState.numOfSub; // reset the record value pSql->fp = joinRetrieveFinalResCallback; // continue retrieve data pSql->cmd.command = TSDB_SQL_FETCH; @@ -1386,7 +1507,13 @@ int32_t tscHandleMasterSTableQuery(SSqlObj *pSql) { STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); SSubqueryState *pState = &pSql->subState; - pState->numOfSub = pTableMetaInfo->vgroupList->numOfVgroups; + pState->numOfSub = 0; + if (pTableMetaInfo->pVgroupTables == NULL) { + pState->numOfSub = pTableMetaInfo->vgroupList->numOfVgroups; + } else { + pState->numOfSub = (int32_t)taosArrayGetSize(pTableMetaInfo->pVgroupTables); + } + assert(pState->numOfSub > 0); int32_t ret = tscLocalReducerEnvCreate(pSql, &pMemoryBuf, &pDesc, &pModel, nBufferSize); @@ -2017,7 +2144,7 @@ static char* getResultBlockPosition(SSqlCmd* pCmd, SSqlRes* pRes, int32_t column assert(pInfo->pSqlExpr != NULL); *bytes = pInfo->pSqlExpr->resBytes; - char* pData = pRes->data + pInfo->pSqlExpr->offset * pRes->numOfRows; + char* pData = pRes->data + pInfo->pSqlExpr->offset * pRes->numOfRows + pRes->row * (*bytes); return pData; } @@ -2029,11 +2156,13 @@ static void doBuildResFromSubqueries(SSqlObj* pSql) { int32_t numOfRes = INT32_MAX; for (int32_t i = 0; i < pSql->subState.numOfSub; ++i) { - if (pSql->pSubs[i] == NULL) { + SSqlObj* pSub = pSql->pSubs[i]; + if (pSub == NULL) { continue; } - numOfRes = (int32_t)(MIN(numOfRes, pSql->pSubs[i]->res.numOfRows)); + int32_t remain = (int32_t)(pSub->res.numOfRows - pSub->res.row); + numOfRes = (int32_t)(MIN(numOfRes, remain)); } if (numOfRes == 0) { @@ -2059,14 +2188,23 @@ static void doBuildResFromSubqueries(SSqlObj* pSql) { size_t numOfExprs = tscSqlExprNumOfExprs(pQueryInfo); for(int32_t i = 0; i < numOfExprs; ++i) { SColumnIndex* pIndex = &pRes->pColumnIndex[i]; - SSqlRes *pRes1 = &pSql->pSubs[pIndex->tableIndex]->res; - SSqlCmd *pCmd1 = &pSql->pSubs[pIndex->tableIndex]->cmd; + SSqlRes* pRes1 = &pSql->pSubs[pIndex->tableIndex]->res; + SSqlCmd* pCmd1 = &pSql->pSubs[pIndex->tableIndex]->cmd; char* pData = getResultBlockPosition(pCmd1, pRes1, pIndex->columnIndex, &bytes); memcpy(data, pData, bytes * numOfRes); data += bytes * numOfRes; - pRes1->row = numOfRes; + } + + for(int32_t i = 0; i < pSql->subState.numOfSub; ++i) { + SSqlObj* pSub = pSql->pSubs[i]; + if (pSub == NULL) { + continue; + } + + pSub->res.row += numOfRes; + assert(pSub->res.row <= pSub->res.numOfRows); } pRes->numOfRows = numOfRes; @@ -2085,6 +2223,8 @@ void tscBuildResFromSubqueries(SSqlObj *pSql) { SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(&pSql->cmd, pSql->cmd.clauseIndex); size_t numOfExprs = tscSqlExprNumOfExprs(pQueryInfo); + pRes->numOfCols = (int32_t)numOfExprs; + pRes->tsrow = calloc(numOfExprs, POINTER_BYTES); pRes->buffer = calloc(numOfExprs, POINTER_BYTES); pRes->length = calloc(numOfExprs, sizeof(int32_t)); diff --git a/src/client/src/tscSystem.c b/src/client/src/tscSystem.c index 47c2d35a75..bff5062f16 100644 --- a/src/client/src/tscSystem.c +++ b/src/client/src/tscSystem.c @@ -36,6 +36,7 @@ void * tscTmr; void * tscQhandle; void * tscCheckDiskUsageTmr; int tsInsertHeadSize; +int tscRefId; int tscNumOfThreads; @@ -146,6 +147,8 @@ void taos_init_imp(void) { tscObjCache = taosCacheInit(TSDB_CACHE_PTR_KEY, refreshTime / 2, false, tscFreeRegisteredSqlObj, "sqlObj"); } + tscRefId = taosOpenRef(200, tscCloseTscObj); + tscDebug("client is initialized successfully"); } @@ -165,6 +168,7 @@ void taos_cleanup() { tscQhandle = NULL; } + taosCloseRef(tscRefId); taosCleanupKeywordsTable(); taosCloseLog(); diff --git a/src/client/src/tscUtil.c b/src/client/src/tscUtil.c index b60bf958a8..85e7122b9d 100644 --- a/src/client/src/tscUtil.c +++ b/src/client/src/tscUtil.c @@ -404,7 +404,7 @@ void tscFreeRegisteredSqlObj(void *pSql) { tscDebug("%p free sqlObj completed, tscObj:%p ref:%d", *p, pTscObj, ref); if (ref == 0) { tscDebug("%p all sqlObj freed, free tscObj:%p", *p, pTscObj); - tscCloseTscObj(pTscObj); + taosRemoveRef(tscRefId, pTscObj); } } @@ -786,8 +786,8 @@ int32_t tscMergeTableDataBlocks(SSqlObj* pSql, SArray* pTableDataBlockList) { } // TODO: all subqueries should be freed correctly before close this connection. -void tscCloseTscObj(STscObj* pObj) { - assert(pObj != NULL); +void tscCloseTscObj(void *param) { + STscObj *pObj = param; pObj->signature = NULL; taosTmrStopA(&(pObj->pTimer)); @@ -1121,6 +1121,8 @@ int32_t tscSqlExprCopy(SArray* dst, const SArray* src, uint64_t uid, bool deepco } *p1 = *pExpr; + memset(p1->param, 0, sizeof(tVariant) * tListLen(p1->param)); + for (int32_t j = 0; j < pExpr->numOfParams; ++j) { tVariantAssign(&p1->param[j], &pExpr->param[j]); } @@ -1678,19 +1680,62 @@ void tscClearSubqueryInfo(SSqlCmd* pCmd) { } void tscFreeVgroupTableInfo(SArray* pVgroupTables) { - if (pVgroupTables != NULL) { - size_t num = taosArrayGetSize(pVgroupTables); - for (size_t i = 0; i < num; i++) { - SVgroupTableInfo* pInfo = taosArrayGet(pVgroupTables, i); - - for(int32_t j = 0; j < pInfo->vgInfo.numOfEps; ++j) { - taosTFree(pInfo->vgInfo.epAddr[j].fqdn); - } - - taosArrayDestroy(pInfo->itemList); - } - taosArrayDestroy(pVgroupTables); + if (pVgroupTables == NULL) { + return; } + + size_t num = taosArrayGetSize(pVgroupTables); + for (size_t i = 0; i < num; i++) { + SVgroupTableInfo* pInfo = taosArrayGet(pVgroupTables, i); + + for(int32_t j = 0; j < pInfo->vgInfo.numOfEps; ++j) { + taosTFree(pInfo->vgInfo.epAddr[j].fqdn); + } + + taosArrayDestroy(pInfo->itemList); + } + + taosArrayDestroy(pVgroupTables); +} + +void tscRemoveVgroupTableGroup(SArray* pVgroupTable, int32_t index) { + assert(pVgroupTable != NULL && index >= 0); + + size_t size = taosArrayGetSize(pVgroupTable); + assert(size > index); + + SVgroupTableInfo* pInfo = taosArrayGet(pVgroupTable, index); + for(int32_t j = 0; j < pInfo->vgInfo.numOfEps; ++j) { + taosTFree(pInfo->vgInfo.epAddr[j].fqdn); + } + + taosArrayDestroy(pInfo->itemList); + taosArrayRemove(pVgroupTable, index); +} + +SArray* tscCloneVgroupTableInfo(SArray* pVgroupTables) { + if (pVgroupTables == NULL) { + return NULL; + } + + size_t num = taosArrayGetSize(pVgroupTables); + SArray* pa = taosArrayInit(num, sizeof(SVgroupTableInfo)); + + SVgroupTableInfo info; + for (size_t i = 0; i < num; i++) { + SVgroupTableInfo* pInfo = taosArrayGet(pVgroupTables, i); + memset(&info, 0, sizeof(SVgroupTableInfo)); + + info.vgInfo = pInfo->vgInfo; + for(int32_t j = 0; j < pInfo->vgInfo.numOfEps; ++j) { + info.vgInfo.epAddr[j].fqdn = strdup(pInfo->vgInfo.epAddr[j].fqdn); + } + + info.itemList = taosArrayClone(pInfo->itemList); + taosArrayPush(pa, &info); + } + + return pa; } void clearAllTableMetaInfo(SQueryInfo* pQueryInfo, const char* address, bool removeFromCache) { @@ -1708,7 +1753,7 @@ void clearAllTableMetaInfo(SQueryInfo* pQueryInfo, const char* address, bool rem } STableMetaInfo* tscAddTableMetaInfo(SQueryInfo* pQueryInfo, const char* name, STableMeta* pTableMeta, - SVgroupsInfo* vgroupList, SArray* pTagCols) { + SVgroupsInfo* vgroupList, SArray* pTagCols, SArray* pVgroupTables) { void* pAlloc = realloc(pQueryInfo->pTableMetaInfo, (pQueryInfo->numOfTables + 1) * POINTER_BYTES); if (pAlloc == NULL) { terrno = TSDB_CODE_TSC_OUT_OF_MEMORY; @@ -1742,13 +1787,15 @@ STableMetaInfo* tscAddTableMetaInfo(SQueryInfo* pQueryInfo, const char* name, ST if (pTagCols != NULL) { tscColumnListCopy(pTableMetaInfo->tagColList, pTagCols, -1); } + + pTableMetaInfo->pVgroupTables = tscCloneVgroupTableInfo(pVgroupTables); pQueryInfo->numOfTables += 1; return pTableMetaInfo; } STableMetaInfo* tscAddEmptyMetaInfo(SQueryInfo* pQueryInfo) { - return tscAddTableMetaInfo(pQueryInfo, NULL, NULL, NULL, NULL); + return tscAddTableMetaInfo(pQueryInfo, NULL, NULL, NULL, NULL, NULL); } void tscClearTableMetaInfo(STableMetaInfo* pTableMetaInfo, bool removeFromCache) { @@ -1822,7 +1869,7 @@ SSqlObj* createSimpleSubObj(SSqlObj* pSql, void (*fp)(), void* param, int32_t cm assert(pSql->cmd.clauseIndex == 0); STableMetaInfo* pMasterTableMetaInfo = tscGetTableMetaInfoFromCmd(&pSql->cmd, pSql->cmd.clauseIndex, 0); - tscAddTableMetaInfo(pQueryInfo, pMasterTableMetaInfo->name, NULL, NULL, NULL); + tscAddTableMetaInfo(pQueryInfo, pMasterTableMetaInfo->name, NULL, NULL, NULL, NULL); registerSqlObj(pNew); return pNew; @@ -1987,14 +2034,16 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void STableMeta* pTableMeta = taosCacheAcquireByData(tscMetaCache, pTableMetaInfo->pTableMeta); // get by name may failed due to the cache cleanup assert(pTableMeta != NULL); - pFinalInfo = tscAddTableMetaInfo(pNewQueryInfo, name, pTableMeta, pTableMetaInfo->vgroupList, pTableMetaInfo->tagColList); + pFinalInfo = tscAddTableMetaInfo(pNewQueryInfo, name, pTableMeta, pTableMetaInfo->vgroupList, + pTableMetaInfo->tagColList, pTableMetaInfo->pVgroupTables); } else { // transfer the ownership of pTableMeta to the newly create sql object. STableMetaInfo* pPrevInfo = tscGetTableMetaInfoFromCmd(&pPrevSql->cmd, pPrevSql->cmd.clauseIndex, 0); STableMeta* pPrevTableMeta = taosCacheTransfer(tscMetaCache, (void**)&pPrevInfo->pTableMeta); SVgroupsInfo* pVgroupsInfo = pPrevInfo->vgroupList; - pFinalInfo = tscAddTableMetaInfo(pNewQueryInfo, name, pPrevTableMeta, pVgroupsInfo, pTableMetaInfo->tagColList); + pFinalInfo = tscAddTableMetaInfo(pNewQueryInfo, name, pPrevTableMeta, pVgroupsInfo, pTableMetaInfo->tagColList, + pTableMetaInfo->pVgroupTables); } if (pFinalInfo->pTableMeta == NULL) { diff --git a/src/common/inc/tglobal.h b/src/common/inc/tglobal.h index 515115c323..4636eaac08 100644 --- a/src/common/inc/tglobal.h +++ b/src/common/inc/tglobal.h @@ -44,14 +44,17 @@ extern int32_t tsMaxShellConns; extern int32_t tsShellActivityTimer; extern uint32_t tsMaxTmrCtrl; extern float tsNumOfThreadsPerCore; -extern float tsRatioOfQueryThreads; +extern float tsRatioOfQueryThreads; // todo remove it extern int8_t tsDaylight; extern char tsTimezone[]; extern char tsLocale[]; -extern char tsCharset[]; // default encode string +extern char tsCharset[]; // default encode string extern int32_t tsEnableCoreFile; extern int32_t tsCompressMsgSize; +//query buffer management +extern int32_t tsQueryBufferSize; // maximum allowed usage buffer for each data node during query processing + // client extern int32_t tsTableMetaKeepTimer; extern int32_t tsMaxSQLStringLen; diff --git a/src/common/src/tglobal.c b/src/common/src/tglobal.c index c24ba490ba..32569e3982 100644 --- a/src/common/src/tglobal.c +++ b/src/common/src/tglobal.c @@ -45,14 +45,14 @@ int32_t tsEnableTelemetryReporting = 1; char tsEmail[TSDB_FQDN_LEN] = {0}; // common -int32_t tsRpcTimer = 1000; -int32_t tsRpcMaxTime = 600; // seconds; -int32_t tsMaxShellConns = 5000; +int32_t tsRpcTimer = 1000; +int32_t tsRpcMaxTime = 600; // seconds; +int32_t tsMaxShellConns = 5000; int32_t tsMaxConnections = 5000; -int32_t tsShellActivityTimer = 3; // second -float tsNumOfThreadsPerCore = 1.0; -float tsRatioOfQueryThreads = 0.5; -int8_t tsDaylight = 0; +int32_t tsShellActivityTimer = 3; // second +float tsNumOfThreadsPerCore = 1.0f; +float tsRatioOfQueryThreads = 0.5f; +int8_t tsDaylight = 0; char tsTimezone[TSDB_TIMEZONE_LEN] = {0}; char tsLocale[TSDB_LOCALE_LEN] = {0}; char tsCharset[TSDB_LOCALE_LEN] = {0}; // default encode string @@ -99,6 +99,12 @@ float tsStreamComputDelayRatio = 0.1f; int32_t tsProjectExecInterval = 10000; // every 10sec, the projection will be executed once int64_t tsMaxRetentWindow = 24 * 3600L; // maximum time window tolerance +// the maximum allowed query buffer size during query processing for each data node. +// -1 no limit (default) +// 0 no query allowed, queries are disabled +// positive value (in MB) +int32_t tsQueryBufferSize = -1; + // db parameters int32_t tsCacheBlockSize = TSDB_DEFAULT_CACHE_BLOCK_SIZE; int32_t tsBlocksPerVnode = TSDB_DEFAULT_TOTAL_BLOCKS; @@ -676,7 +682,7 @@ static void doInitGlobalConfig(void) { cfg.minValue = TSDB_MIN_CACHE_BLOCK_SIZE; cfg.maxValue = TSDB_MAX_CACHE_BLOCK_SIZE; cfg.ptrLength = 0; - cfg.unitType = TAOS_CFG_UTYPE_Mb; + cfg.unitType = TAOS_CFG_UTYPE_MB; taosInitConfigOption(cfg); cfg.option = "blocks"; @@ -839,6 +845,16 @@ static void doInitGlobalConfig(void) { cfg.unitType = TAOS_CFG_UTYPE_NONE; taosInitConfigOption(cfg); + cfg.option = "queryBufferSize"; + cfg.ptr = &tsQueryBufferSize; + cfg.valType = TAOS_CFG_VTYPE_INT32; + cfg.cfgType = TSDB_CFG_CTYPE_B_CONFIG | TSDB_CFG_CTYPE_B_SHOW; + cfg.minValue = -1; + cfg.maxValue = 500000000000.0f; + cfg.ptrLength = 0; + cfg.unitType = TAOS_CFG_UTYPE_BYTE; + taosInitConfigOption(cfg); + // locale & charset cfg.option = "timezone"; cfg.ptr = tsTimezone; diff --git a/src/common/src/tvariant.c b/src/common/src/tvariant.c index 005def6dc5..9eb9924932 100644 --- a/src/common/src/tvariant.c +++ b/src/common/src/tvariant.c @@ -144,21 +144,24 @@ void tVariantDestroy(tVariant *pVar) { void tVariantAssign(tVariant *pDst, const tVariant *pSrc) { if (pSrc == NULL || pDst == NULL) return; - *pDst = *pSrc; - + pDst->nType = pSrc->nType; if (pSrc->nType == TSDB_DATA_TYPE_BINARY || pSrc->nType == TSDB_DATA_TYPE_NCHAR) { - int32_t len = pSrc->nLen + 1; - if (pSrc->nType == TSDB_DATA_TYPE_NCHAR) { - len = len * TSDB_NCHAR_SIZE; - } - - pDst->pz = calloc(1, len); - memcpy(pDst->pz, pSrc->pz, len); + int32_t len = pSrc->nLen + TSDB_NCHAR_SIZE; + char* p = realloc(pDst->pz, len); + assert(p); + + memset(p, 0, len); + pDst->pz = p; + + memcpy(pDst->pz, pSrc->pz, pSrc->nLen); + pDst->nLen = pSrc->nLen; return; + } - // this is only for string array - if (pSrc->nType == TSDB_DATA_TYPE_ARRAY) { + if (pSrc->nType >= TSDB_DATA_TYPE_BOOL && pSrc->nType <= TSDB_DATA_TYPE_DOUBLE) { + pDst->i64Key = pSrc->i64Key; + } else if (pSrc->nType == TSDB_DATA_TYPE_ARRAY) { // this is only for string array size_t num = taosArrayGetSize(pSrc->arr); pDst->arr = taosArrayInit(num, sizeof(char*)); for(size_t i = 0; i < num; i++) { @@ -166,8 +169,6 @@ void tVariantAssign(tVariant *pDst, const tVariant *pSrc) { char* n = strdup(p); taosArrayPush(pDst->arr, &n); } - - return; } pDst->nLen = tDataTypeDesc[pDst->nType].nSize; diff --git a/src/connector/go b/src/connector/go index 8d7bf74385..8c58c512b6 160000 --- a/src/connector/go +++ b/src/connector/go @@ -1 +1 @@ -Subproject commit 8d7bf743852897110cbdcc7c4322cd7a74d4167b +Subproject commit 8c58c512b6acda8bcdfa48fdc7140227b5221766 diff --git a/src/connector/hivemq-tdengine-extension b/src/connector/hivemq-tdengine-extension new file mode 160000 index 0000000000..b62a26ecc1 --- /dev/null +++ b/src/connector/hivemq-tdengine-extension @@ -0,0 +1 @@ +Subproject commit b62a26ecc164a310104df57691691b237e091c89 diff --git a/src/inc/query.h b/src/inc/query.h index 0c18f85dc3..5e1de77889 100644 --- a/src/inc/query.h +++ b/src/inc/query.h @@ -78,7 +78,6 @@ int32_t qKillQuery(qinfo_t qinfo); int32_t qQueryCompleted(qinfo_t qinfo); - /** * destroy query info structure * @param qHandle diff --git a/src/inc/taoserror.h b/src/inc/taoserror.h index bb111d2da0..310bd78c7e 100644 --- a/src/inc/taoserror.h +++ b/src/inc/taoserror.h @@ -28,7 +28,7 @@ extern "C" { #else #define TAOS_DEFINE_ERROR(name, mod, code, msg) static const int32_t name = (0x80000000 | ((mod)<<16) | (code)); #endif - + #define TAOS_SYSTEM_ERROR(code) (0x80ff0000 | (code)) #define TAOS_SUCCEEDED(err) ((err) >= 0) #define TAOS_FAILED(err) ((err) < 0) @@ -37,7 +37,7 @@ const char* tstrerror(int32_t err); int32_t* taosGetErrno(); #define terrno (*taosGetErrno()) - + #define TSDB_CODE_SUCCESS 0 #ifdef TAOS_ERROR_C @@ -74,6 +74,12 @@ TAOS_DEFINE_ERROR(TSDB_CODE_COM_MEMORY_CORRUPTED, 0, 0x0101, "Memory cor TAOS_DEFINE_ERROR(TSDB_CODE_COM_OUT_OF_MEMORY, 0, 0x0102, "Out of memory") TAOS_DEFINE_ERROR(TSDB_CODE_COM_INVALID_CFG_MSG, 0, 0x0103, "Invalid config message") TAOS_DEFINE_ERROR(TSDB_CODE_COM_FILE_CORRUPTED, 0, 0x0104, "Data file corrupted") +TAOS_DEFINE_ERROR(TSDB_CODE_REF_NO_MEMORY, 0, 0x0105, "Ref out of memory") +TAOS_DEFINE_ERROR(TSDB_CODE_REF_FULL, 0, 0x0106, "too many Ref Objs") +TAOS_DEFINE_ERROR(TSDB_CODE_REF_ID_REMOVED, 0, 0x0107, "Ref ID is removed") +TAOS_DEFINE_ERROR(TSDB_CODE_REF_INVALID_ID, 0, 0x0108, "Invalid Ref ID") +TAOS_DEFINE_ERROR(TSDB_CODE_REF_ALREADY_EXIST, 0, 0x0109, "Ref is already there") +TAOS_DEFINE_ERROR(TSDB_CODE_REF_NOT_EXIST, 0, 0x010A, "Ref is not there") //client TAOS_DEFINE_ERROR(TSDB_CODE_TSC_INVALID_SQL, 0, 0x0200, "Invalid SQL statement") @@ -182,7 +188,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_DND_OUT_OF_MEMORY, 0, 0x0401, "Dnode out TAOS_DEFINE_ERROR(TSDB_CODE_DND_NO_WRITE_ACCESS, 0, 0x0402, "No permission for disk files in dnode") TAOS_DEFINE_ERROR(TSDB_CODE_DND_INVALID_MSG_LEN, 0, 0x0403, "Invalid message length") -// vnode +// vnode TAOS_DEFINE_ERROR(TSDB_CODE_VND_ACTION_IN_PROGRESS, 0, 0x0500, "Action in progress") TAOS_DEFINE_ERROR(TSDB_CODE_VND_MSG_NOT_PROCESSED, 0, 0x0501, "Message not processed") TAOS_DEFINE_ERROR(TSDB_CODE_VND_ACTION_NEED_REPROCESSED, 0, 0x0502, "Action need to be reprocessed") @@ -230,6 +236,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_QRY_NOT_READY, 0, 0x0707, "Query not TAOS_DEFINE_ERROR(TSDB_CODE_QRY_HAS_RSP, 0, 0x0708, "Query should response") TAOS_DEFINE_ERROR(TSDB_CODE_QRY_IN_EXEC, 0, 0x0709, "Multiple retrieval of this query") TAOS_DEFINE_ERROR(TSDB_CODE_QRY_TOO_MANY_TIMEWINDOW, 0, 0x070A, "Too many time window in query") +TAOS_DEFINE_ERROR(TSDB_CODE_QRY_NOT_ENOUGH_BUFFER, 0, 0x070B, "Query buffer limit has reached") // grant TAOS_DEFINE_ERROR(TSDB_CODE_GRANT_EXPIRED, 0, 0x0800, "License expired") diff --git a/src/mnode/src/mnodeProfile.c b/src/mnode/src/mnodeProfile.c index f8f99e22c6..c29d1ec0b7 100644 --- a/src/mnode/src/mnodeProfile.c +++ b/src/mnode/src/mnodeProfile.c @@ -182,7 +182,7 @@ static int32_t mnodeGetConnsMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pC // app name pShow->bytes[cols] = TSDB_APPNAME_LEN + VARSTR_HEADER_SIZE; pSchema[cols].type = TSDB_DATA_TYPE_BINARY; - strcpy(pSchema[cols].name, "app_name"); + strcpy(pSchema[cols].name, "program"); pSchema[cols].bytes = htons(pShow->bytes[cols]); cols++; diff --git a/src/mnode/src/mnodeSdb.c b/src/mnode/src/mnodeSdb.c index 14558485aa..8c61c61a10 100644 --- a/src/mnode/src/mnodeSdb.c +++ b/src/mnode/src/mnodeSdb.c @@ -281,7 +281,14 @@ static void sdbConfirmForward(void *ahandle, void *param, int32_t code) { ((SSdbTable *)pOper->table)->tableName, pOper->pObj, sdbGetKeyStr(pOper->table, pHead->cont), pHead->version, action, tstrerror(pOper->retCode)); if (action == SDB_ACTION_INSERT) { - sdbDeleteHash(pOper->table, pOper); + // It's better to create a table in two stages, create it first and then set it success + //sdbDeleteHash(pOper->table, pOper); + SSdbOper oper = { + .type = SDB_OPER_GLOBAL, + .table = pOper->table, + .pObj = pOper->pObj + }; + sdbDeleteRow(&oper); } } diff --git a/src/query/inc/qTsbuf.h b/src/query/inc/qTsbuf.h index 46e6f79014..6c2a955f47 100644 --- a/src/query/inc/qTsbuf.h +++ b/src/query/inc/qTsbuf.h @@ -35,16 +35,9 @@ typedef struct STSList { int32_t len; } STSList; -typedef struct STSRawBlock { - int32_t vnode; - int64_t tag; - TSKEY* ts; - int32_t len; -} STSRawBlock; - typedef struct STSElem { TSKEY ts; - tVariant tag; + tVariant* tag; int32_t vnode; } STSElem; @@ -84,6 +77,7 @@ typedef struct STSBuf { char path[PATH_MAX]; uint32_t fileSize; + // todo use array STSVnodeBlockInfoEx* pData; uint32_t numOfAlloc; uint32_t numOfVnodes; @@ -106,12 +100,12 @@ typedef struct STSBufFileHeader { STSBuf* tsBufCreate(bool autoDelete, int32_t order); STSBuf* tsBufCreateFromFile(const char* path, bool autoDelete); -STSBuf* tsBufCreateFromCompBlocks(const char* pData, int32_t numOfBlocks, int32_t len, int32_t tsOrder); +STSBuf* tsBufCreateFromCompBlocks(const char* pData, int32_t numOfBlocks, int32_t len, int32_t tsOrder, int32_t vnodeId); void* tsBufDestroy(STSBuf* pTSBuf); void tsBufAppend(STSBuf* pTSBuf, int32_t vnodeId, tVariant* tag, const char* pData, int32_t len); -int32_t tsBufMerge(STSBuf* pDestBuf, const STSBuf* pSrcBuf, int32_t vnodeIdx); +int32_t tsBufMerge(STSBuf* pDestBuf, const STSBuf* pSrcBuf); STSBuf* tsBufClone(STSBuf* pTSBuf); @@ -121,6 +115,7 @@ void tsBufFlush(STSBuf* pTSBuf); void tsBufResetPos(STSBuf* pTSBuf); STSElem tsBufGetElem(STSBuf* pTSBuf); + bool tsBufNextPos(STSBuf* pTSBuf); STSElem tsBufGetElemStartPos(STSBuf* pTSBuf, int32_t vnodeId, tVariant* tag); @@ -136,6 +131,10 @@ void tsBufSetCursor(STSBuf* pTSBuf, STSCursor* pCur); */ void tsBufDisplay(STSBuf* pTSBuf); +int32_t tsBufGetNumOfVnodes(STSBuf* pTSBuf); + +void tsBufGetVnodeIdList(STSBuf* pTSBuf, int32_t* num, int32_t** vnodeId); + #ifdef __cplusplus } #endif diff --git a/src/query/src/qExecutor.c b/src/query/src/qExecutor.c index 5ad52ef29e..d46beab2cb 100644 --- a/src/query/src/qExecutor.c +++ b/src/query/src/qExecutor.c @@ -184,7 +184,7 @@ static void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInf static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId); static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo, - SDataStatis *pStatis, void *param, int32_t colIndex); + SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId); static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv); static void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo); @@ -194,6 +194,8 @@ static void buildTagQueryResult(SQInfo *pQInfo); static int32_t setAdditionalInfo(SQInfo *pQInfo, void *pTable, STableQueryInfo *pTableQueryInfo); static int32_t flushFromResultBuf(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo); +static int32_t checkForQueryBuf(size_t numOfTables); +static void releaseQueryBuf(size_t numOfTables); bool doFilterData(SQuery *pQuery, int32_t elemPos) { for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) { @@ -1005,9 +1007,10 @@ static void blockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis * longjmp(pRuntimeEnv->env, TSDB_CODE_QRY_OUT_OF_MEMORY); } + SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv); for (int32_t k = 0; k < pQuery->numOfOutput; ++k) { char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock); - setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k); + setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k, pQInfo->vgId); } int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order); @@ -1200,7 +1203,7 @@ static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) { SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx; // compare tag first - if (tVariantCompare(&pCtx[0].tag, &elem.tag) != 0) { + if (tVariantCompare(&pCtx[0].tag, elem.tag) != 0) { return TS_JOIN_TAG_NOT_EQUALS; } @@ -1286,9 +1289,10 @@ static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pS groupbyColumnData = getGroupbyColumnData(pQuery, &type, &bytes, pDataBlock); } + SQInfo* pQInfo = GET_QINFO_ADDR(pRuntimeEnv); for (int32_t k = 0; k < pQuery->numOfOutput; ++k) { char *dataBlock = getDataBlock(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->rows, pDataBlock); - setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k); + setExecParams(pQuery, &pCtx[k], dataBlock, tsCols, pDataBlockInfo, pStatis, &sasArray[k], k, pQInfo->vgId); } // set the input column data @@ -1303,7 +1307,6 @@ static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pS // from top to bottom in desc // from bottom to top in asc order if (pRuntimeEnv->pTSBuf != NULL) { - SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pRuntimeEnv); qDebug("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->rows, pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order); } @@ -1409,6 +1412,10 @@ static void rowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pS item->lastKey = (QUERY_IS_ASC_QUERY(pQuery)? pDataBlockInfo->window.ekey:pDataBlockInfo->window.skey) + step; } + if (pRuntimeEnv->pTSBuf != NULL) { + item->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf); + } + // todo refactor: extract method for(int32_t i = 0; i < pQuery->numOfOutput; ++i) { if (pQuery->pSelectExpr[i].base.functionId != TSDB_FUNC_ARITHM) { @@ -1469,7 +1476,7 @@ static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBl } void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY *tsCol, SDataBlockInfo* pBlockInfo, - SDataStatis *pStatis, void *param, int32_t colIndex) { + SDataStatis *pStatis, void *param, int32_t colIndex, int32_t vgId) { int32_t functionId = pQuery->pSelectExpr[colIndex].base.functionId; int32_t colId = pQuery->pSelectExpr[colIndex].base.colInfo.colId; @@ -1542,6 +1549,9 @@ void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void* inputData, TSKEY } } } + } else if (functionId == TSDB_FUNC_TS_COMP) { + pCtx->param[0].i64Key = vgId; + pCtx->param[0].nType = TSDB_DATA_TYPE_BIGINT; } #if defined(_DEBUG_VIEW) @@ -2621,12 +2631,19 @@ void setTagVal(SQueryRuntimeEnv *pRuntimeEnv, void *pTable, void *tsdb) { pFuncMsg->colInfo.colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) { assert(pFuncMsg->numOfParams == 1); - int16_t tagColId = (int16_t)pExprInfo->base.arg->argValue.i64; - SColumnInfo* pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId); + int16_t tagColId = (int16_t)pExprInfo->base.arg->argValue.i64; + SColumnInfo *pColInfo = doGetTagColumnInfoById(pQuery->tagColList, pQuery->numOfTags, tagColId); doSetTagValueInParam(tsdb, pTable, tagColId, &pRuntimeEnv->pCtx[0].tag, pColInfo->type, pColInfo->bytes); - qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%"PRId64, pQInfo, pExprInfo->base.arg->argValue.i64, - pRuntimeEnv->pCtx[0].tag.i64Key) + + int16_t tagType = pRuntimeEnv->pCtx[0].tag.nType; + if (tagType == TSDB_DATA_TYPE_BINARY || tagType == TSDB_DATA_TYPE_NCHAR) { + qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%s", pQInfo, + pExprInfo->base.arg->argValue.i64, pRuntimeEnv->pCtx[0].tag.pz); + } else { + qDebug("QInfo:%p set tag value for join comparison, colId:%" PRId64 ", val:%" PRId64, pQInfo, + pExprInfo->base.arg->argValue.i64, pRuntimeEnv->pCtx[0].tag.i64Key); + } } } } @@ -3860,14 +3877,40 @@ int32_t setAdditionalInfo(SQInfo *pQInfo, void* pTable, STableQueryInfo *pTableQ // both the master and supplement scan needs to set the correct ts comp start position if (pRuntimeEnv->pTSBuf != NULL) { + tVariant* pTag = &pRuntimeEnv->pCtx[0].tag; + if (pTableQueryInfo->cur.vgroupIndex == -1) { - tVariantAssign(&pTableQueryInfo->tag, &pRuntimeEnv->pCtx[0].tag); - tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, &pTableQueryInfo->tag); + tVariantAssign(&pTableQueryInfo->tag, pTag); + + STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, &pTableQueryInfo->tag); + + // failed to find data with the specified tag value and vnodeId + if (elem.vnode < 0) { + if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) { + qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz); + } else { + qError("QInfo:%p failed to find tag:%" PRId64 " in ts_comp", pQInfo, pTag->i64Key); + } + + return false; + } // keep the cursor info of current meter - pTableQueryInfo->cur = pRuntimeEnv->pTSBuf->cur; + pTableQueryInfo->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf); + if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) { + qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex); + } else { + qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex); + } + } else { tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur); + + if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) { + qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex); + } else { + qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, pTableQueryInfo->cur.blockIndex, pTableQueryInfo->cur.tsIndex); + } } } @@ -4763,15 +4806,62 @@ static bool multiTableMultioutputHelper(SQInfo *pQInfo, int32_t index) { } if (pRuntimeEnv->pTSBuf != NULL) { - if (pRuntimeEnv->cur.vgroupIndex == -1) { - STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, &pRuntimeEnv->pCtx[0].tag); + tVariant* pTag = &pRuntimeEnv->pCtx[0].tag; - // failed to find data with the specified tag value + if (pRuntimeEnv->cur.vgroupIndex == -1) { + STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, pTag); + // failed to find data with the specified tag value and vnodeId if (elem.vnode < 0) { + if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) { + qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz); + } else { + qError("QInfo:%p failed to find tag:%"PRId64" in ts_comp", pQInfo, pTag->i64Key); + } + return false; + } else { + STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf); + + if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) { + qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, + cur.blockIndex, cur.tsIndex); + } else { + qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, + cur.blockIndex, cur.tsIndex); + } } } else { - tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur); + STSElem elem = tsBufGetElem(pRuntimeEnv->pTSBuf); + if (tVariantCompare(elem.tag, &pRuntimeEnv->pCtx[0].tag) != 0) { + + STSElem elem1 = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, pQInfo->vgId, pTag); + // failed to find data with the specified tag value and vnodeId + if (elem1.vnode < 0) { + if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) { + qError("QInfo:%p failed to find tag:%s in ts_comp", pQInfo, pTag->pz); + } else { + qError("QInfo:%p failed to find tag:%"PRId64" in ts_comp", pQInfo, pTag->i64Key); + } + + return false; + } else { + STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf); + if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) { + qDebug("QInfo:%p find tag:%s start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, cur.blockIndex, cur.tsIndex); + } else { + qDebug("QInfo:%p find tag:%"PRId64" start pos in ts_comp, blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, cur.blockIndex, cur.tsIndex); + } + } + + } else { + tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur); + STSCursor cur = tsBufGetCursor(pRuntimeEnv->pTSBuf); + if (pTag->nType == TSDB_DATA_TYPE_BINARY || pTag->nType == TSDB_DATA_TYPE_NCHAR) { + qDebug("QInfo:%p continue scan ts_comp file, tag:%s blockIndex:%d, tsIndex:%d", pQInfo, pTag->pz, cur.blockIndex, cur.tsIndex); + } else { + qDebug("QInfo:%p continue scan ts_comp file, tag:%"PRId64" blockIndex:%d, tsIndex:%d", pQInfo, pTag->i64Key, cur.blockIndex, cur.tsIndex); + } + } } } @@ -5027,6 +5117,10 @@ static void sequentialTableProcess(SQInfo *pQInfo) { break; } + if (pRuntimeEnv->pTSBuf != NULL) { + pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur; + } + } else { // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter if (pQuery->rec.rows == 0) { @@ -6320,7 +6414,7 @@ static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQ STSBuf *pTSBuf = NULL; if (pQueryMsg->tsLen > 0) { // open new file to save the result char *tsBlock = (char *) pQueryMsg + pQueryMsg->tsOffset; - pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder); + pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder, vgId); tsBufResetPos(pTSBuf); bool ret = tsBufNextPos(pTSBuf); @@ -6402,6 +6496,8 @@ static void freeQInfo(SQInfo *pQInfo) { qDebug("QInfo:%p start to free QInfo", pQInfo); + releaseQueryBuf(pQInfo->tableqinfoGroupInfo.numOfTables); + teardownQueryRuntimeEnv(&pQInfo->runtimeEnv); SQuery *pQuery = pQInfo->runtimeEnv.pQuery; @@ -6636,6 +6732,11 @@ int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, qi assert(0); } + code = checkForQueryBuf(tableGroupInfo.numOfTables); + if (code != TSDB_CODE_SUCCESS) { // not enough query buffer, abort + goto _over; + } + (*pQInfo) = createQInfoImpl(pQueryMsg, pGroupbyExpr, pExprs, &tableGroupInfo, pTagColumnInfo, isSTableQuery); pExprs = NULL; pGroupbyExpr = NULL; @@ -7037,6 +7138,48 @@ static void buildTagQueryResult(SQInfo* pQInfo) { setQueryStatus(pQuery, QUERY_COMPLETED); } +static int64_t getQuerySupportBufSize(size_t numOfTables) { + size_t s1 = sizeof(STableQueryInfo); + size_t s2 = sizeof(SHashNode); + +// size_t s3 = sizeof(STableCheckInfo); buffer consumption in tsdb + return (int64_t)((s1 + s2) * 1.5 * numOfTables); +} + +int32_t checkForQueryBuf(size_t numOfTables) { + int64_t t = getQuerySupportBufSize(numOfTables); + if (tsQueryBufferSize < 0) { + return TSDB_CODE_SUCCESS; + } else if (tsQueryBufferSize > 0) { + + while(1) { + int64_t s = tsQueryBufferSize; + int64_t remain = s - t; + if (remain >= 0) { + if (atomic_val_compare_exchange_64(&tsQueryBufferSize, s, remain) == s) { + return TSDB_CODE_SUCCESS; + } + } else { + return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER; + } + } + } + + // disable query processing if the value of tsQueryBufferSize is zero. + return TSDB_CODE_QRY_NOT_ENOUGH_BUFFER; +} + +void releaseQueryBuf(size_t numOfTables) { + if (tsQueryBufferSize <= 0) { + return; + } + + int64_t t = getQuerySupportBufSize(numOfTables); + + // restore value is not enough buffer available + atomic_add_fetch_64(&tsQueryBufferSize, t); +} + void* qGetResultRetrieveMsg(qinfo_t qinfo) { SQInfo* pQInfo = (SQInfo*) qinfo; assert(pQInfo != NULL); diff --git a/src/query/src/qExtbuffer.c b/src/query/src/qExtbuffer.c index fc9c60b39b..17be294531 100644 --- a/src/query/src/qExtbuffer.c +++ b/src/query/src/qExtbuffer.c @@ -344,8 +344,6 @@ static FORCE_INLINE int32_t primaryKeyComparator(int64_t f1, int64_t f2, int32_t return 0; } - assert(colIdx == 0); - if (tsOrder == TSDB_ORDER_DESC) { // primary column desc order return (f1 < f2) ? 1 : -1; } else { // asc diff --git a/src/query/src/qTsbuf.c b/src/query/src/qTsbuf.c index b264f6cdc9..ad29cef5c2 100644 --- a/src/query/src/qTsbuf.c +++ b/src/query/src/qTsbuf.c @@ -403,7 +403,7 @@ void tsBufAppend(STSBuf* pTSBuf, int32_t vnodeId, tVariant* tag, const char* pDa } else { expandBuffer(ptsData, len); } - + tVariantAssign(&pTSBuf->block.tag, tag); memcpy(ptsData->rawBuf + ptsData->len, pData, (size_t)len); @@ -561,6 +561,19 @@ static void tsBufGetBlock(STSBuf* pTSBuf, int32_t vnodeIndex, int32_t blockIndex pCur->tsIndex = (pCur->order == TSDB_ORDER_ASC) ? 0 : pBlock->numOfElem - 1; } +static int32_t doUpdateVnodeInfo(STSBuf* pTSBuf, int64_t offset, STSVnodeBlockInfo* pVInfo) { + if (offset < 0 || offset >= getDataStartOffset()) { + return -1; + } + + if (fseek(pTSBuf->f, (int32_t)offset, SEEK_SET) != 0) { + return -1; + } + + fwrite(pVInfo, sizeof(STSVnodeBlockInfo), 1, pTSBuf->f); + return 0; +} + STSVnodeBlockInfo* tsBufGetVnodeBlockInfo(STSBuf* pTSBuf, int32_t vnodeId) { int32_t j = tsBufFindVnodeIndexFromId(pTSBuf->pData, pTSBuf->numOfVnodes, vnodeId); if (j == -1) { @@ -649,7 +662,7 @@ bool tsBufNextPos(STSBuf* pTSBuf) { return false; } - int32_t blockIndex = pCur->order == TSDB_ORDER_ASC ? 0 : pBlockInfo->numOfBlocks - 1; + int32_t blockIndex = (pCur->order == TSDB_ORDER_ASC) ? 0 : (pBlockInfo->numOfBlocks - 1); tsBufGetBlock(pTSBuf, pCur->vgroupIndex + step, blockIndex); break; @@ -675,8 +688,7 @@ void tsBufResetPos(STSBuf* pTSBuf) { } STSElem tsBufGetElem(STSBuf* pTSBuf) { - STSElem elem1 = {.vnode = -1}; - + STSElem elem1 = {.vnode = -1}; if (pTSBuf == NULL) { return elem1; } @@ -690,7 +702,7 @@ STSElem tsBufGetElem(STSBuf* pTSBuf) { elem1.vnode = pTSBuf->pData[pCur->vgroupIndex].info.vnode; elem1.ts = *(TSKEY*)(pTSBuf->tsData.rawBuf + pCur->tsIndex * TSDB_KEYSIZE); - tVariantAssign(&elem1.tag, &pBlock->tag); + elem1.tag = &pBlock->tag; return elem1; } @@ -702,7 +714,7 @@ STSElem tsBufGetElem(STSBuf* pTSBuf) { * @param vnodeId * @return */ -int32_t tsBufMerge(STSBuf* pDestBuf, const STSBuf* pSrcBuf, int32_t vnodeId) { +int32_t tsBufMerge(STSBuf* pDestBuf, const STSBuf* pSrcBuf) { if (pDestBuf == NULL || pSrcBuf == NULL || pSrcBuf->numOfVnodes <= 0) { return 0; } @@ -712,14 +724,13 @@ int32_t tsBufMerge(STSBuf* pDestBuf, const STSBuf* pSrcBuf, int32_t vnodeId) { } // src can only have one vnode index - if (pSrcBuf->numOfVnodes > 1) { - return -1; - } - + assert(pSrcBuf->numOfVnodes == 1); + // there are data in buffer, flush to disk first tsBufFlush(pDestBuf); // compared with the last vnode id + int32_t vnodeId = tsBufGetLastVnodeInfo((STSBuf*) pSrcBuf)->info.vnode; if (vnodeId != tsBufGetLastVnodeInfo(pDestBuf)->info.vnode) { int32_t oldSize = pDestBuf->numOfVnodes; int32_t newSize = oldSize + pSrcBuf->numOfVnodes; @@ -791,14 +802,14 @@ int32_t tsBufMerge(STSBuf* pDestBuf, const STSBuf* pSrcBuf, int32_t vnodeId) { return 0; } -STSBuf* tsBufCreateFromCompBlocks(const char* pData, int32_t numOfBlocks, int32_t len, int32_t order) { +STSBuf* tsBufCreateFromCompBlocks(const char* pData, int32_t numOfBlocks, int32_t len, int32_t order, int32_t vnodeId) { STSBuf* pTSBuf = tsBufCreate(true, order); STSVnodeBlockInfo* pBlockInfo = &(addOneVnodeInfo(pTSBuf, 0)->info); pBlockInfo->numOfBlocks = numOfBlocks; pBlockInfo->compLen = len; pBlockInfo->offset = getDataStartOffset(); - pBlockInfo->vnode = 0; + pBlockInfo->vnode = vnodeId; // update prev vnode length info in file TSBufUpdateVnodeInfo(pTSBuf, pTSBuf->numOfVnodes - 1, pBlockInfo); @@ -902,8 +913,8 @@ void tsBufDisplay(STSBuf* pTSBuf) { while (tsBufNextPos(pTSBuf)) { STSElem elem = tsBufGetElem(pTSBuf); - if (elem.tag.nType == TSDB_DATA_TYPE_BIGINT) { - printf("%d-%" PRId64 "-%" PRId64 "\n", elem.vnode, elem.tag.i64Key, elem.ts); + if (elem.tag->nType == TSDB_DATA_TYPE_BIGINT) { + printf("%d-%" PRId64 "-%" PRId64 "\n", elem.vnode, elem.tag->i64Key, elem.ts); } } @@ -915,19 +926,6 @@ static int32_t getDataStartOffset() { return sizeof(STSBufFileHeader) + TS_COMP_FILE_VNODE_MAX * sizeof(STSVnodeBlockInfo); } -static int32_t doUpdateVnodeInfo(STSBuf* pTSBuf, int64_t offset, STSVnodeBlockInfo* pVInfo) { - if (offset < 0 || offset >= getDataStartOffset()) { - return -1; - } - - if (fseek(pTSBuf->f, (int32_t)offset, SEEK_SET) != 0) { - return -1; - } - - fwrite(pVInfo, sizeof(STSVnodeBlockInfo), 1, pTSBuf->f); - return 0; -} - // update prev vnode length info in file static void TSBufUpdateVnodeInfo(STSBuf* pTSBuf, int32_t index, STSVnodeBlockInfo* pBlockInfo) { int32_t offset = sizeof(STSBufFileHeader) + index * sizeof(STSVnodeBlockInfo); @@ -969,3 +967,29 @@ static STSBuf* allocResForTSBuf(STSBuf* pTSBuf) { pTSBuf->fileSize += getDataStartOffset(); return pTSBuf; } + +int32_t tsBufGetNumOfVnodes(STSBuf* pTSBuf) { + if (pTSBuf == NULL) { + return 0; + } + + return pTSBuf->numOfVnodes; +} + +void tsBufGetVnodeIdList(STSBuf* pTSBuf, int32_t* num, int32_t** vnodeId) { + int32_t size = tsBufGetNumOfVnodes(pTSBuf); + if (num != NULL) { + *num = size; + } + + *vnodeId = NULL; + if (size == 0) { + return; + } + + (*vnodeId) = malloc(tsBufGetNumOfVnodes(pTSBuf) * sizeof(int32_t)); + + for(int32_t i = 0; i < size; ++i) { + (*vnodeId)[i] = pTSBuf->pData[i].info.vnode; + } +} \ No newline at end of file diff --git a/src/query/tests/tsBufTest.cpp b/src/query/tests/tsBufTest.cpp index b78c5314f2..8cd3a9cbef 100644 --- a/src/query/tests/tsBufTest.cpp +++ b/src/query/tests/tsBufTest.cpp @@ -304,7 +304,7 @@ void TSTraverse() { int32_t totalOutput = 10; while (1) { STSElem elem = tsBufGetElem(pTSBuf); - printf("%d-%" PRIu64 "-%" PRIu64 "\n", elem.vnode, elem.tag.i64Key, elem.ts); + printf("%d-%" PRIu64 "-%" PRIu64 "\n", elem.vnode, elem.tag->i64Key, elem.ts); if (!tsBufNextPos(pTSBuf)) { break; @@ -352,7 +352,7 @@ void TSTraverse() { totalOutput = 10; while (1) { STSElem elem = tsBufGetElem(pTSBuf); - printf("%d-%" PRIu64 "-%" PRIu64 "\n", elem.vnode, elem.tag.i64Key, elem.ts); + printf("%d-%" PRIu64 "-%" PRIu64 "\n", elem.vnode, elem.tag->i64Key, elem.ts); if (!tsBufNextPos(pTSBuf)) { break; @@ -416,8 +416,8 @@ void mergeDiffVnodeBufferTest() { int64_t* list = createTsList(num, start, step); t.i64Key = i; - tsBufAppend(pTSBuf1, 0, &t, (const char*)list, num * sizeof(int64_t)); - tsBufAppend(pTSBuf2, 0, &t, (const char*)list, num * sizeof(int64_t)); + tsBufAppend(pTSBuf1, 1, &t, (const char*)list, num * sizeof(int64_t)); + tsBufAppend(pTSBuf2, 9, &t, (const char*)list, num * sizeof(int64_t)); free(list); @@ -426,7 +426,7 @@ void mergeDiffVnodeBufferTest() { tsBufFlush(pTSBuf2); - tsBufMerge(pTSBuf1, pTSBuf2, 9); + tsBufMerge(pTSBuf1, pTSBuf2); EXPECT_EQ(pTSBuf1->numOfVnodes, 2); EXPECT_EQ(pTSBuf1->numOfTotal, numOfTags * 2 * num); @@ -459,8 +459,6 @@ void mergeIdenticalVnodeBufferTest() { start += step * num; } - - for (int32_t i = numOfTags; i < numOfTags * 2; ++i) { int64_t* list = createTsList(num, start, step); @@ -473,7 +471,7 @@ void mergeIdenticalVnodeBufferTest() { tsBufFlush(pTSBuf2); - tsBufMerge(pTSBuf1, pTSBuf2, 12); + tsBufMerge(pTSBuf1, pTSBuf2); EXPECT_EQ(pTSBuf1->numOfVnodes, 1); EXPECT_EQ(pTSBuf1->numOfTotal, numOfTags * 2 * num); @@ -482,7 +480,7 @@ void mergeIdenticalVnodeBufferTest() { STSElem elem = tsBufGetElem(pTSBuf1); EXPECT_EQ(elem.vnode, 12); - printf("%d-%" PRIu64 "-%" PRIu64 "\n", elem.vnode, elem.tag.i64Key, elem.ts); + printf("%d-%" PRIu64 "-%" PRIu64 "\n", elem.vnode, elem.tag->i64Key, elem.ts); } tsBufDestroy(pTSBuf1); diff --git a/src/rpc/src/rpcMain.c b/src/rpc/src/rpcMain.c index 6e9088d9fb..294e204791 100644 --- a/src/rpc/src/rpcMain.c +++ b/src/rpc/src/rpcMain.c @@ -20,6 +20,7 @@ #include "ttimer.h" #include "tutil.h" #include "lz4.h" +#include "tref.h" #include "taoserror.h" #include "tsocket.h" #include "tglobal.h" @@ -72,7 +73,6 @@ typedef struct { SRpcInfo *pRpc; // associated SRpcInfo SRpcEpSet epSet; // ip list provided by app void *ahandle; // handle provided by app - void *signature; // for validation struct SRpcConn *pConn; // pConn allocated char msgType; // message type uint8_t *pCont; // content provided by app @@ -132,6 +132,10 @@ int tsRpcMaxRetry; int tsRpcHeadSize; int tsRpcOverhead; +static int tsRpcRefId = -1; +static int32_t tsRpcNum = 0; +static pthread_once_t tsRpcInit = PTHREAD_ONCE_INIT; + // server:0 client:1 tcp:2 udp:0 #define RPC_CONN_UDPS 0 #define RPC_CONN_UDPC 1 @@ -211,14 +215,21 @@ static void rpcUnlockConn(SRpcConn *pConn); static void rpcAddRef(SRpcInfo *pRpc); static void rpcDecRef(SRpcInfo *pRpc); -void *rpcOpen(const SRpcInit *pInit) { - SRpcInfo *pRpc; +static void rpcInit(void) { tsProgressTimer = tsRpcTimer/2; tsRpcMaxRetry = tsRpcMaxTime * 1000/tsProgressTimer; tsRpcHeadSize = RPC_MSG_OVERHEAD; tsRpcOverhead = sizeof(SRpcReqContext); + tsRpcRefId = taosOpenRef(200, free); +} + +void *rpcOpen(const SRpcInit *pInit) { + SRpcInfo *pRpc; + + pthread_once(&tsRpcInit, rpcInit); + pRpc = (SRpcInfo *)calloc(1, sizeof(SRpcInfo)); if (pRpc == NULL) return NULL; @@ -237,6 +248,8 @@ void *rpcOpen(const SRpcInit *pInit) { pRpc->afp = pInit->afp; pRpc->refCount = 1; + atomic_add_fetch_32(&tsRpcNum, 1); + size_t size = sizeof(SRpcConn) * pRpc->sessions; pRpc->connList = (SRpcConn *)calloc(1, size); if (pRpc->connList == NULL) { @@ -363,7 +376,6 @@ void rpcSendRequest(void *shandle, const SRpcEpSet *pEpSet, SRpcMsg *pMsg) { int contLen = rpcCompressRpcMsg(pMsg->pCont, pMsg->contLen); pContext = (SRpcReqContext *) ((char*)pMsg->pCont-sizeof(SRpcHead)-sizeof(SRpcReqContext)); pContext->ahandle = pMsg->ahandle; - pContext->signature = pContext; pContext->pRpc = (SRpcInfo *)shandle; pContext->epSet = *pEpSet; pContext->contLen = contLen; @@ -386,6 +398,7 @@ void rpcSendRequest(void *shandle, const SRpcEpSet *pEpSet, SRpcMsg *pMsg) { // set the handle to pContext, so app can cancel the request if (pMsg->handle) *((void **)pMsg->handle) = pContext; + taosAddRef(tsRpcRefId, pContext); rpcSendReqToServer(pRpc, pContext); return; @@ -536,14 +549,15 @@ int rpcReportProgress(void *handle, char *pCont, int contLen) { void rpcCancelRequest(void *handle) { SRpcReqContext *pContext = handle; - // signature is used to check if pContext is freed. - // pContext may have been released just before app calls the rpcCancelRequest - if (pContext == NULL || pContext->signature != pContext) return; + int code = taosAcquireRef(tsRpcRefId, pContext); + if (code < 0) return; if (pContext->pConn) { tDebug("%s, app tries to cancel request", pContext->pConn->info); rpcCloseConn(pContext->pConn); } + + taosReleaseRef(tsRpcRefId, pContext); } static void rpcFreeMsg(void *msg) { @@ -612,7 +626,7 @@ static void rpcReleaseConn(SRpcConn *pConn) { // if there is an outgoing message, free it if (pConn->outType && pConn->pReqMsg) { if (pConn->pContext) pConn->pContext->pConn = NULL; - rpcFreeMsg(pConn->pReqMsg); + taosRemoveRef(tsRpcRefId, pConn->pContext); } } @@ -1068,7 +1082,6 @@ static void *rpcProcessMsgFromPeer(SRecvInfo *pRecv) { static void rpcNotifyClient(SRpcReqContext *pContext, SRpcMsg *pMsg) { SRpcInfo *pRpc = pContext->pRpc; - pContext->signature = NULL; pContext->pConn = NULL; if (pContext->pRsp) { // for synchronous API @@ -1085,7 +1098,7 @@ static void rpcNotifyClient(SRpcReqContext *pContext, SRpcMsg *pMsg) { } // free the request message - rpcFreeCont(pContext->pCont); + taosRemoveRef(tsRpcRefId, pContext); } static void rpcProcessIncomingMsg(SRpcConn *pConn, SRpcHead *pHead, SRpcReqContext *pContext) { @@ -1593,6 +1606,12 @@ static void rpcDecRef(SRpcInfo *pRpc) pthread_mutex_destroy(&pRpc->mutex); tDebug("%s rpc resources are released", pRpc->label); taosTFree(pRpc); + + int count = atomic_sub_fetch_32(&tsRpcNum, 1); + if (count == 0) { + taosCloseRef(tsRpcRefId); + // tsRpcInit = PTHREAD_ONCE_INIT; // windows compliling error + } } } diff --git a/src/util/inc/tconfig.h b/src/util/inc/tconfig.h index 0520cf29a8..33819f6a20 100644 --- a/src/util/inc/tconfig.h +++ b/src/util/inc/tconfig.h @@ -53,7 +53,7 @@ enum { TAOS_CFG_UTYPE_NONE, TAOS_CFG_UTYPE_PERCENT, TAOS_CFG_UTYPE_GB, - TAOS_CFG_UTYPE_Mb, + TAOS_CFG_UTYPE_MB, TAOS_CFG_UTYPE_BYTE, TAOS_CFG_UTYPE_SECOND, TAOS_CFG_UTYPE_MS diff --git a/src/util/inc/tref.h b/src/util/inc/tref.h new file mode 100644 index 0000000000..6619ff407e --- /dev/null +++ b/src/util/inc/tref.h @@ -0,0 +1,38 @@ + +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TDENGINE_TREF_H +#define TDENGINE_TREF_H + +#ifdef __cplusplus +extern "C" { +#endif + +int taosOpenRef(int max, void (*fp)(void *)); // return refId which will be used by other APIs +void taosCloseRef(int refId); +int taosListRef(); // return the number of references in system +int taosAddRef(int refId, void *p); +int taosAcquireRef(int refId, void *p); +void taosReleaseRef(int refId, void *p); + +#define taosRemoveRef taosReleaseRef + + +#ifdef __cplusplus +} +#endif + +#endif // TDENGINE_TREF_H diff --git a/src/util/src/tcache.c b/src/util/src/tcache.c index 6e20c1708d..5be7253f6d 100644 --- a/src/util/src/tcache.c +++ b/src/util/src/tcache.c @@ -335,7 +335,7 @@ void *taosCacheTransfer(SCacheObj *pCacheObj, void **data) { } void taosCacheRelease(SCacheObj *pCacheObj, void **data, bool _remove) { - if (pCacheObj == NULL || taosHashGetSize(pCacheObj->pHashTable) + pCacheObj->numOfElemsInTrash == 0) { + if (pCacheObj == NULL) { return; } @@ -343,7 +343,12 @@ void taosCacheRelease(SCacheObj *pCacheObj, void **data, bool _remove) { uError("cache:%s, NULL data to release", pCacheObj->name); return; } - + + + // The operation of removal from hash table and addition to trashcan is not an atomic operation, + // therefore the check for the empty of both the hash table and the trashcan has a race condition. + // It happens when there is only one object in the cache, and two threads which has referenced this object + // start to free the it simultaneously [TD-1569]. size_t offset = offsetof(SCacheDataNode, data); SCacheDataNode *pNode = (SCacheDataNode *)((char *)(*data) - offset); diff --git a/src/util/src/tref.c b/src/util/src/tref.c new file mode 100644 index 0000000000..5e0a85fdb6 --- /dev/null +++ b/src/util/src/tref.c @@ -0,0 +1,402 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "os.h" +#include "taoserror.h" +#include "tulog.h" +#include "tutil.h" + +#define TSDB_REF_OBJECTS 50 +#define TSDB_REF_STATE_EMPTY 0 +#define TSDB_REF_STATE_ACTIVE 1 +#define TSDB_REF_STATE_DELETED 2 + +typedef struct SRefNode { + struct SRefNode *prev; + struct SRefNode *next; + void *p; + int32_t count; +} SRefNode; + +typedef struct { + SRefNode **nodeList; + int state; // 0: empty, 1: active; 2: deleted + int refId; + int max; + int32_t count; // total number of SRefNodes in this set + int64_t *lockedBy; + void (*fp)(void *); +} SRefSet; + +static SRefSet tsRefSetList[TSDB_REF_OBJECTS]; +static pthread_once_t tsRefModuleInit = PTHREAD_ONCE_INIT; +static pthread_mutex_t tsRefMutex; +static int tsRefSetNum = 0; +static int tsNextId = 0; + +static void taosInitRefModule(void); +static int taosHashRef(SRefSet *pSet, void *p); +static void taosLockList(int64_t *lockedBy); +static void taosUnlockList(int64_t *lockedBy); +static void taosIncRefCount(SRefSet *pSet); +static void taosDecRefCount(SRefSet *pSet); + +int taosOpenRef(int max, void (*fp)(void *)) +{ + SRefNode **nodeList; + SRefSet *pSet; + int64_t *lockedBy; + int i, refId; + + pthread_once(&tsRefModuleInit, taosInitRefModule); + + nodeList = calloc(sizeof(SRefNode *), (size_t)max); + if (nodeList == NULL) { + return TSDB_CODE_REF_NO_MEMORY; + } + + lockedBy = calloc(sizeof(int64_t), (size_t)max); + if (lockedBy == NULL) { + free(nodeList); + return TSDB_CODE_REF_NO_MEMORY; + } + + pthread_mutex_lock(&tsRefMutex); + + for (i = 0; i < TSDB_REF_OBJECTS; ++i) { + tsNextId = (tsNextId + 1) % TSDB_REF_OBJECTS; + if (tsRefSetList[tsNextId].state == TSDB_REF_STATE_EMPTY) break; + } + + if (i < TSDB_REF_OBJECTS) { + refId = tsNextId; + pSet = tsRefSetList + refId; + taosIncRefCount(pSet); + pSet->max = max; + pSet->nodeList = nodeList; + pSet->lockedBy = lockedBy; + pSet->fp = fp; + pSet->state = TSDB_REF_STATE_ACTIVE; + pSet->refId = refId; + + tsRefSetNum++; + uTrace("refId:%d is opened, max:%d, fp:%p refSetNum:%d", refId, max, fp, tsRefSetNum); + } else { + refId = TSDB_CODE_REF_FULL; + free (nodeList); + free (lockedBy); + uTrace("run out of Ref ID, maximum:%d refSetNum:%d", TSDB_REF_OBJECTS, tsRefSetNum); + } + + pthread_mutex_unlock(&tsRefMutex); + + return refId; +} + +void taosCloseRef(int refId) +{ + SRefSet *pSet; + int deleted = 0; + + if (refId < 0 || refId >= TSDB_REF_OBJECTS) { + uTrace("refId:%d is invalid, out of range", refId); + return; + } + + pSet = tsRefSetList + refId; + + pthread_mutex_lock(&tsRefMutex); + + if (pSet->state == TSDB_REF_STATE_ACTIVE) { + pSet->state = TSDB_REF_STATE_DELETED; + deleted = 1; + uTrace("refId:%d is closed, count:%d", refId, pSet->count); + } else { + uTrace("refId:%d is already closed, count:%d", refId, pSet->count); + } + + pthread_mutex_unlock(&tsRefMutex); + + if (deleted) taosDecRefCount(pSet); +} + +int taosAddRef(int refId, void *p) +{ + int hash; + SRefNode *pNode; + SRefSet *pSet; + + if (refId < 0 || refId >= TSDB_REF_OBJECTS) { + uTrace("refId:%d p:%p failed to add, refId not valid", refId, p); + return TSDB_CODE_REF_INVALID_ID; + } + + uTrace("refId:%d p:%p try to add", refId, p); + + pSet = tsRefSetList + refId; + taosIncRefCount(pSet); + if (pSet->state != TSDB_REF_STATE_ACTIVE) { + taosDecRefCount(pSet); + uTrace("refId:%d p:%p failed to add, not active", refId, p); + return TSDB_CODE_REF_ID_REMOVED; + } + + int code = 0; + hash = taosHashRef(pSet, p); + + taosLockList(pSet->lockedBy+hash); + + pNode = pSet->nodeList[hash]; + while ( pNode ) { + if ( pNode->p == p ) + break; + + pNode = pNode->next; + } + + if (pNode) { + code = TSDB_CODE_REF_ALREADY_EXIST; + uTrace("refId:%d p:%p is already there, faild to add", refId, p); + } else { + pNode = calloc(sizeof(SRefNode), 1); + if (pNode) { + pNode->p = p; + pNode->count = 1; + pNode->prev = 0; + pNode->next = pSet->nodeList[hash]; + pSet->nodeList[hash] = pNode; + uTrace("refId:%d p:%p is added, count::%d", refId, p, pSet->count); + } else { + code = TSDB_CODE_REF_NO_MEMORY; + uTrace("refId:%d p:%p is not added, since no memory", refId, p); + } + } + + if (code < 0) taosDecRefCount(pSet); + + taosUnlockList(pSet->lockedBy+hash); + + return code; +} + +int taosAcquireRef(int refId, void *p) +{ + int hash, code = 0; + SRefNode *pNode; + SRefSet *pSet; + + if ( refId < 0 || refId >= TSDB_REF_OBJECTS ) { + uTrace("refId:%d p:%p failed to acquire, refId not valid", refId, p); + return TSDB_CODE_REF_INVALID_ID; + } + + uTrace("refId:%d p:%p try to acquire", refId, p); + + pSet = tsRefSetList + refId; + taosIncRefCount(pSet); + if (pSet->state != TSDB_REF_STATE_ACTIVE) { + uTrace("refId:%d p:%p failed to acquire, not active", refId, p); + taosDecRefCount(pSet); + return TSDB_CODE_REF_ID_REMOVED; + } + + hash = taosHashRef(pSet, p); + + taosLockList(pSet->lockedBy+hash); + + pNode = pSet->nodeList[hash]; + + while (pNode) { + if (pNode->p == p) { + break; + } + + pNode = pNode->next; + } + + if (pNode) { + pNode->count++; + uTrace("refId:%d p:%p is acquired", refId, p); + } else { + code = TSDB_CODE_REF_NOT_EXIST; + uTrace("refId:%d p:%p is not there, failed to acquire", refId, p); + } + + taosUnlockList(pSet->lockedBy+hash); + + taosDecRefCount(pSet); + + return code; +} + +void taosReleaseRef(int refId, void *p) +{ + int hash; + SRefNode *pNode; + SRefSet *pSet; + int released = 0; + + if (refId < 0 || refId >= TSDB_REF_OBJECTS) { + uTrace("refId:%d p:%p failed to release, refId not valid", refId, p); + return; + } + + uTrace("refId:%d p:%p try to release", refId, p); + + pSet = tsRefSetList + refId; + if (pSet->state == TSDB_REF_STATE_EMPTY) { + uTrace("refId:%d p:%p failed to release, cleaned", refId, p); + return; + } + + hash = taosHashRef(pSet, p); + + taosLockList(pSet->lockedBy+hash); + + pNode = pSet->nodeList[hash]; + while (pNode) { + if ( pNode->p == p ) + break; + + pNode = pNode->next; + } + + if (pNode) { + pNode->count--; + + if (pNode->count == 0) { + if ( pNode->prev ) { + pNode->prev->next = pNode->next; + } else { + pSet->nodeList[hash] = pNode->next; + } + + if ( pNode->next ) { + pNode->next->prev = pNode->prev; + } + + (*pSet->fp)(pNode->p); + + free(pNode); + released = 1; + uTrace("refId:%d p:%p is removed, count::%d", refId, p, pSet->count); + } else { + uTrace("refId:%d p:%p is released", refId, p); + } + } else { + uTrace("refId:%d p:%p is not there, failed to release", refId, p); + } + + taosUnlockList(pSet->lockedBy+hash); + + if (released) taosDecRefCount(pSet); +} + +int taosListRef() { + SRefSet *pSet; + SRefNode *pNode; + int num = 0; + + pthread_mutex_lock(&tsRefMutex); + + for (int i = 0; i < TSDB_REF_OBJECTS; ++i) { + pSet = tsRefSetList + i; + + if (pSet->state == TSDB_REF_STATE_EMPTY) + continue; + + uInfo("refId:%d state:%d count::%d", i, pSet->state, pSet->count); + + for (int j=0; j < pSet->max; ++j) { + pNode = pSet->nodeList[j]; + + while (pNode) { + uInfo("refId:%d p:%p count:%d", i, pNode->p, pNode->count); + pNode = pNode->next; + num++; + } + } + } + + pthread_mutex_unlock(&tsRefMutex); + + return num; +} + +static int taosHashRef(SRefSet *pSet, void *p) +{ + int hash = 0; + int64_t v = (int64_t)p; + + for (int i = 0; i < sizeof(v); ++i) { + hash += (int)(v & 0xFFFF); + v = v >> 16; + i = i + 2; + } + + hash = hash % pSet->max; + + return hash; +} + +static void taosLockList(int64_t *lockedBy) { + int64_t tid = taosGetPthreadId(); + int i = 0; + while (atomic_val_compare_exchange_64(lockedBy, 0, tid) != 0) { + if (++i % 100 == 0) { + sched_yield(); + } + } +} + +static void taosUnlockList(int64_t *lockedBy) { + int64_t tid = taosGetPthreadId(); + if (atomic_val_compare_exchange_64(lockedBy, tid, 0) != tid) { + assert(false); + } +} + +static void taosInitRefModule(void) { + pthread_mutex_init(&tsRefMutex, NULL); +} + +static void taosIncRefCount(SRefSet *pSet) { + atomic_add_fetch_32(&pSet->count, 1); + uTrace("refId:%d inc count:%d", pSet->refId, pSet->count); +} + +static void taosDecRefCount(SRefSet *pSet) { + int32_t count = atomic_sub_fetch_32(&pSet->count, 1); + uTrace("refId:%d dec count:%d", pSet->refId, pSet->count); + + if (count > 0) return; + + pthread_mutex_lock(&tsRefMutex); + + if (pSet->state != TSDB_REF_STATE_EMPTY) { + pSet->state = TSDB_REF_STATE_EMPTY; + pSet->max = 0; + pSet->fp = NULL; + + taosTFree(pSet->nodeList); + taosTFree(pSet->lockedBy); + + tsRefSetNum--; + uTrace("refId:%d is cleaned, refSetNum:%d count:%d", pSet->refId, tsRefSetNum, pSet->count); + } + + pthread_mutex_unlock(&tsRefMutex); +} + diff --git a/src/util/tests/CMakeLists.txt b/src/util/tests/CMakeLists.txt index 8687a8005d..0c96ed2a2f 100644 --- a/src/util/tests/CMakeLists.txt +++ b/src/util/tests/CMakeLists.txt @@ -9,7 +9,22 @@ IF (HEADER_GTEST_INCLUDE_DIR AND LIB_GTEST_STATIC_DIR) INCLUDE_DIRECTORIES(${HEADER_GTEST_INCLUDE_DIR}) AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} SOURCE_LIST) - + + LIST(REMOVE_ITEM SOURCE_LIST ${CMAKE_CURRENT_SOURCE_DIR}/trefTest.c) ADD_EXECUTABLE(utilTest ${SOURCE_LIST}) TARGET_LINK_LIBRARIES(utilTest tutil common osdetail gtest pthread gcov) + + LIST(APPEND BIN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/trefTest.c) + ADD_EXECUTABLE(trefTest ${BIN_SRC}) + TARGET_LINK_LIBRARIES(trefTest common tutil) + ENDIF() + +#IF (TD_LINUX) +# ADD_EXECUTABLE(trefTest ./trefTest.c) +# TARGET_LINK_LIBRARIES(trefTest tutil common) +#ENDIF () + +INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/util/inc) + + diff --git a/src/util/tests/trefTest.c b/src/util/tests/trefTest.c new file mode 100644 index 0000000000..486f9f6d6d --- /dev/null +++ b/src/util/tests/trefTest.c @@ -0,0 +1,166 @@ +#include +#include +#include +#include +#include +#include "os.h" +#include "tref.h" +#include "tlog.h" +#include "tglobal.h" +#include "taoserror.h" +#include "tulog.h" + +typedef struct { + int refNum; + int steps; + int refId; + void **p; +} SRefSpace; + +void *takeRefActions(void *param) { + SRefSpace *pSpace = (SRefSpace *)param; + int code, id; + + for (int i=0; i < pSpace->steps; ++i) { + printf("s"); + id = random() % pSpace->refNum; + code = taosAddRef(pSpace->refId, pSpace->p[id]); + usleep(1); + + id = random() % pSpace->refNum; + code = taosAcquireRef(pSpace->refId, pSpace->p[id]); + if (code >= 0) { + usleep(id % 5 + 1); + taosReleaseRef(pSpace->refId, pSpace->p[id]); + } + + id = random() % pSpace->refNum; + taosRemoveRef(pSpace->refId, pSpace->p[id]); + usleep(id %5 + 1); + + id = random() % pSpace->refNum; + code = taosAcquireRef(pSpace->refId, pSpace->p[id]); + if (code >= 0) { + usleep(id % 5 + 1); + taosReleaseRef(pSpace->refId, pSpace->p[id]); + } + } + + for (int i=0; i < pSpace->refNum; ++i) { + taosRemoveRef(pSpace->refId, pSpace->p[i]); + } + + //uInfo("refId:%d thread exits", pSpace->refId); + + return NULL; +} + +void myfree(void *p) { + return; +} + +void *openRefSpace(void *param) { + SRefSpace *pSpace = (SRefSpace *)param; + + printf("c"); + pSpace->refId = taosOpenRef(10000, myfree); + + if (pSpace->refId < 0) { + printf("failed to open ref, reson:%s\n", tstrerror(pSpace->refId)); + return NULL; + } + + pSpace->p = (void **) calloc(sizeof(void *), pSpace->refNum); + for (int i=0; irefNum; ++i) { + pSpace->p[i] = (void *) malloc(128); + } + + pthread_attr_t thattr; + pthread_attr_init(&thattr); + pthread_attr_setdetachstate(&thattr, PTHREAD_CREATE_JOINABLE); + + pthread_t thread1, thread2, thread3; + pthread_create(&(thread1), &thattr, takeRefActions, (void *)(pSpace)); + pthread_create(&(thread2), &thattr, takeRefActions, (void *)(pSpace)); + pthread_create(&(thread3), &thattr, takeRefActions, (void *)(pSpace)); + + pthread_join(thread1, NULL); + pthread_join(thread2, NULL); + pthread_join(thread3, NULL); + + taosCloseRef(pSpace->refId); + + for (int i=0; irefNum; ++i) { + free(pSpace->p[i]); + } + + uInfo("refId:%d main thread exit", pSpace->refId); + free(pSpace->p); + pSpace->p = NULL; + + return NULL; +} + +int main(int argc, char *argv[]) { + int refNum = 100; + int threads = 10; + int steps = 10000; + int loops = 1; + + uDebugFlag = 143; + + for (int i=1; ifd, pHead, contLen) != contLen) { wError("wal:%s, failed to write(%s)", pWal->name, strerror(errno)); terrno = TAOS_SYSTEM_ERROR(errno); + return terrno; } else { pWal->version = pHead->version; } + ASSERT(contLen == pHead->len + sizeof(SWalHead)); - return terrno; + return 0; } void walFsync(void *handle) { @@ -424,7 +426,7 @@ static int walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp) { if (!taosCheckChecksumWhole((uint8_t *)pHead, sizeof(SWalHead))) { wWarn("wal:%s, cksum is messed up, skip the rest of file", name); terrno = TSDB_CODE_WAL_FILE_CORRUPTED; - // ASSERT(false); + ASSERT(false); break; } diff --git a/tests/examples/go/taosdemo.go b/tests/examples/go/taosdemo.go index b42e1e6d70..14a67b93d3 100644 --- a/tests/examples/go/taosdemo.go +++ b/tests/examples/go/taosdemo.go @@ -107,7 +107,7 @@ func main() { fmt.Scanln() url = "root:taosdata@/tcp(" + configPara.hostName + ":" + strconv.Itoa(configPara.serverPort) + ")/" - //url = fmt.Sprintf("%s:%s@/tcp(%s:%d)/%s?interpolateParams=true", configPara.user, configPara.password, configPara.hostName, configPara.serverPort, configPara.dbName) + //url = fmt.Sprintf("%s:%s@/tcp(%s:%d)/%s?interpolateParams=true", configPara.user, configPara.password, configPara.hostName, configPara.serverPort, configPara.dbName) // open connect to taos server //db, err := sql.Open(taosDriverName, url) //if err != nil { @@ -115,6 +115,7 @@ func main() { // os.Exit(1) //} //defer db.Close() + rand.Seed(time.Now().Unix()) createDatabase(configPara.dbName, configPara.supTblName) fmt.Printf("======== create database success! ========\n\n") diff --git a/tests/examples/nodejs/README-win.md b/tests/examples/nodejs/README-win.md new file mode 100644 index 0000000000..75fec69413 --- /dev/null +++ b/tests/examples/nodejs/README-win.md @@ -0,0 +1,200 @@ +# 如何在windows上使用nodejs进行TDengine应用开发 + +## 环境准备 + +(1)安装nodejs-10.22.0 + +下载链接:https://nodejs.org/dist/v10.22.0/node-v10.22.0-win-x64.zip +解压安装,把node配置到环境变量里 + +cmd启动命令行,查看node的版本 + +```shell +> node.exe --version +v10.22.0 + +> npm --version +6.14.6 +``` + + + +(2)安装python2.7 + +下载链接:https://www.python.org/ftp/python/2.7.18/python-2.7.18.amd64.msi + +查看python版本 + +```shell +>python --version +Python 2.7.18 +``` + + +(3)安装TDengine-client + +下载地址:https://www.taosdata.com/cn/all-downloads/,选择一个合适的windows-client下载(client应该尽量与server端的版本保持一致) + +使用client的taos shell连接server + +```shell +>taos -h node5 + +Welcome to the TDengine shell from Linux, Client Version:2.0.6.0 +Copyright (c) 2017 by TAOS Data, Inc. All rights reserved. + +taos> show dnodes; + id | end_point | vnodes | cores | status | role | create_time | offline reason | +============================================================================================================================================ + 1 | node5:6030 | 7 | 1 | ready | any | 2020-10-26 09:45:26.308 | | +Query OK, 1 row(s) in set (0.036000s) +``` + +注意: +* 检查能否在client的机器上ping通server的fqdn +* 如果你的dns server并没有提供到server的域名解析,可以将server的hostname配置到client的hosts文件中 + + +## 应用开发 + +(1)建立nodejs项目 + +``` +npm init +``` + +(2)安装windows-build-tools +``` +npm install --global --production windows-build-tools +``` + +(3)安装td2.0-connector驱动 + +``` tdshell +npm install td2.0-connector +``` + +(4)nodejs访问tdengine的示例程序 + +```javascript +const taos = require('td2.0-connector'); + +var host = null; +var port = 6030; +for (var i = 2; i < global.process.argv.length; i++) { + var key = global.process.argv[i].split("=")[0]; + var value = global.process.argv[i].split("=")[1]; + + if ("host" == key) { + host = value; + } + if ("port" == key) { + port = value; + } +} + +if (host == null) { + console.log("Usage: node nodejsChecker.js host= port="); + process.exit(0); +} + +// establish connection +var conn = taos.connect({host: host, user: "root", password: "taosdata", port: port}); +var cursor = conn.cursor(); +// create database +executeSql("create database if not exists testnodejs", 0); +// use db +executeSql("use testnodejs", 0); +// drop table +executeSql("drop table if exists testnodejs.weather", 0); +// create table +executeSql("create table if not exists testnodejs.weather(ts timestamp, temperature float, humidity int)", 0); +// insert +executeSql("insert into testnodejs.weather (ts, temperature, humidity) values(now, 20.5, 34)", 1); +// select +executeQuery("select * from testnodejs.weather"); +// close connection +conn.close(); + +function executeQuery(sql) { + var start = new Date().getTime(); + var promise = cursor.query(sql, true); + var end = new Date().getTime(); + promise.then(function (result) { + printSql(sql, result != null, (end - start)); + result.pretty(); + }); +} + +function executeSql(sql, affectRows) { + var start = new Date().getTime(); + var promise = cursor.execute(sql); + var end = new Date().getTime(); + printSql(sql, promise == affectRows, (end - start)); +} + +function printSql(sql, succeed, cost) { + console.log("[ " + (succeed ? "OK" : "ERROR!") + " ] time cost: " + cost + " ms, execute statement ====> " + sql); +} +``` + +(5)测试nodejs程序 + +```shell +>node nodejsChecker.js +Usage: node nodejsChecker.js host= port= +# 提示指定host + +>node nodejsChecker.js host=node5 +Successfully connected to TDengine +Query OK, 0 row(s) affected (0.00997610s) +[ OK ] time cost: 14 ms, execute statement ====> create database if not exists testnodejs +Query OK, 0 row(s) affected (0.00235920s) +[ OK ] time cost: 4 ms, execute statement ====> use testnodejs +Query OK, 0 row(s) affected (0.06604280s) +[ OK ] time cost: 67 ms, execute statement ====> drop table if exists testnodejs.weather +Query OK, 0 row(s) affected (0.59403290s) +[ OK ] time cost: 595 ms, execute statement ====> create table if not exists testnodejs.weather(ts timestamp, temperature float, humidity int) +Query OK, 1 row(s) affected (0.01058950s) +[ OK ] time cost: 12 ms, execute statement ====> insert into testnodejs.weather (ts, temperature, humidity) values(now, 20.5, 34) +Query OK, 1 row(s) in set (0.00401490s) +[ OK ] time cost: 10 ms, execute statement ====> select * from testnodejs.weather +Connection is closed + + ts | temperature | humidity | +===================================================================== +2020-10-27 18:49:15.547 | 20.5 | 34 | +``` + +## 指南 + +### 如何设置主机名和hosts + +在server上查看hostname和fqdn +```shell +查看hostname +# hostname +taos-server + +查看fqdn +# hostname -f +taos-server +``` + +windows下hosts文件位于: +C:\\Windows\System32\drivers\etc\hosts +修改hosts文件,添加server的ip和hostname + +``` +192.168.56.101 node5 +``` + +> 什么是FQDN? +> +> FQDN(Full qualified domain name)全限定域名,fqdn由2部分组成:hostname+domainname。 +> +> 例如,一个邮件服务器的fqdn可能是:mymail.somecollege.edu,其中mymail是hostname(主机名),somcollege.edu是domainname(域名)。本例中,.edu是顶级域名,.somecollege是二级域名。 +> +> 当连接服务器时,必须指定fqdn,然后,dns服务器通过查看dns表,将hostname解析为相应的ip地址。如果只指定hostname(不指定domainname),应用程序可能服务解析主机名。因为如果你试图访问不在本地的远程服务器时,本地的dns服务器和可能没有远程服务器的hostname列表。 +> +> 参考:https://kb.iu.edu/d/aiuv diff --git a/tests/examples/nodejs/nodejsChecker.js b/tests/examples/nodejs/nodejsChecker.js index c77944f752..f838d5cc84 100644 --- a/tests/examples/nodejs/nodejsChecker.js +++ b/tests/examples/nodejs/nodejsChecker.js @@ -42,8 +42,8 @@ function executeQuery(sql){ var start = new Date().getTime(); var promise = cursor.query(sql, true); var end = new Date().getTime(); - printSql(sql, promise != null,(end - start)); promise.then(function(result){ + printSql(sql, result != null,(end - start)); result.pretty(); }); } diff --git a/tests/gotest/batchtest.bat b/tests/gotest/batchtest.bat old mode 100644 new mode 100755 index abe9a58f31..efd8961bb0 --- a/tests/gotest/batchtest.bat +++ b/tests/gotest/batchtest.bat @@ -7,6 +7,9 @@ set serverPort=%2 if "%severIp%"=="" (set severIp=127.0.0.1) if "%serverPort%"=="" (set serverPort=6030) +go env -w GO111MODULE=on +go env -w GOPROXY=https://goproxy.io,direct + cd case001 case001.bat %severIp% %serverPort% diff --git a/tests/gotest/batchtest.sh b/tests/gotest/batchtest.sh old mode 100644 new mode 100755 index e8ed9ecbed..0fbbf40714 --- a/tests/gotest/batchtest.sh +++ b/tests/gotest/batchtest.sh @@ -13,6 +13,9 @@ if [ ! -n "$serverPort" ]; then serverPort=6030 fi +go env -w GO111MODULE=on +go env -w GOPROXY=https://goproxy.io,direct + bash ./case001/case001.sh $severIp $serverPort #bash ./case002/case002.sh $severIp $serverPort #bash ./case003/case003.sh $severIp $serverPort diff --git a/tests/pytest/cluster/bananceTest.py b/tests/pytest/cluster/bananceTest.py new file mode 100644 index 0000000000..ef25afa7d2 --- /dev/null +++ b/tests/pytest/cluster/bananceTest.py @@ -0,0 +1,57 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import sys +from clusterSetup import * +from util.sql import tdSql +from util.log import tdLog +import random +import time + +class ClusterTestcase: + + ## test case 32 ## + def run(self): + + nodes = Nodes() + nodes.addConfigs("maxVgroupsPerDb", "10") + nodes.addConfigs("maxTablesPerVnode", "1000") + nodes.restartAllTaosd() + + ctest = ClusterTest(nodes.node1.hostName) + ctest.connectDB() + ctest.createSTable(1) + ctest.run() + tdSql.init(ctest.conn.cursor(), False) + + tdSql.execute("use %s" % ctest.dbName) + tdSql.query("show vgroups") + dnodes = [] + for i in range(10): + dnodes.append(int(tdSql.getData(i, 4))) + + s = set(dnodes) + if len(s) < 3: + tdLog.exit("cluster is not balanced") + + tdLog.info("cluster is balanced") + + nodes.removeConfigs("maxVgroupsPerDb", "10") + nodes.removeConfigs("maxTablesPerVnode", "1000") + nodes.restartAllTaosd() + + tdSql.close() + tdLog.success("%s successfully executed" % __file__) + +ct = ClusterTestcase() +ct.run() diff --git a/tests/pytest/cluster/basicTest.py b/tests/pytest/cluster/basicTest.py new file mode 100644 index 0000000000..b990d7fd98 --- /dev/null +++ b/tests/pytest/cluster/basicTest.py @@ -0,0 +1,47 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import sys +from clusterSetup import * +from util.sql import tdSql +from util.log import tdLog +import random + +class ClusterTestcase: + + ## test case 1, 33 ## + def run(self): + + nodes = Nodes() + ctest = ClusterTest(nodes.node1.hostName) + + ctest.connectDB() + tdSql.init(ctest.conn.cursor(), False) + + ## Test case 1 ## + tdLog.info("Test case 1 repeat %d times" % ctest.repeat) + for i in range(ctest.repeat): + tdLog.info("Start Round %d" % (i + 1)) + replica = random.randint(1,3) + ctest.createSTable(replica) + ctest.run() + tdLog.sleep(10) + tdSql.query("select count(*) from %s.%s" %(ctest.dbName, ctest.stbName)) + tdSql.checkData(0, 0, ctest.numberOfRecords * ctest.numberOfTables) + tdLog.info("Round %d completed" % (i + 1)) + + tdSql.close() + tdLog.success("%s successfully executed" % __file__) + +ct = ClusterTestcase() +ct.run() \ No newline at end of file diff --git a/tests/pytest/cluster/changeReplicaTest.py b/tests/pytest/cluster/changeReplicaTest.py new file mode 100644 index 0000000000..7fa68edbfe --- /dev/null +++ b/tests/pytest/cluster/changeReplicaTest.py @@ -0,0 +1,51 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import sys +from clusterSetup import * +from util.sql import tdSql +from util.log import tdLog +import random + +class ClusterTestcase: + + ## test case 7, ## + def run(self): + + nodes = Nodes() + ctest = ClusterTest(nodes.node1.hostName) + ctest.connectDB() + tdSql.init(ctest.conn.cursor(), False) + + tdSql.execute("use %s" % ctest.dbName) + tdSql.query("show vgroups") + for i in range(10): + tdSql.checkData(i, 5, "master") + + tdSql.execute("alter database %s replica 2" % ctest.dbName) + tdLog.sleep(30) + tdSql.query("show vgroups") + for i in range(10): + tdSql.checkData(i, 5, "master") + tdSql.checkData(i, 7, "slave") + + tdSql.execute("alter database %s replica 3" % ctest.dbName) + tdLog.sleep(30) + tdSql.query("show vgroups") + for i in range(10): + tdSql.checkData(i, 5, "master") + tdSql.checkData(i, 7, "slave") + tdSql.checkData(i, 9, "slave") + +ct = ClusterTestcase() +ct.run() \ No newline at end of file diff --git a/tests/pytest/cluster/clusterSetup.py b/tests/pytest/cluster/clusterSetup.py new file mode 100644 index 0000000000..36af8ac42e --- /dev/null +++ b/tests/pytest/cluster/clusterSetup.py @@ -0,0 +1,202 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import os +import sys +sys.path.insert(0, os.getcwd()) +from fabric import Connection +from util.sql import * +from util.log import * +import taos +import random +import threading +import logging + +class Node: + def __init__(self, index, username, hostIP, hostName, password, homeDir): + self.index = index + self.username = username + self.hostIP = hostIP + self.hostName = hostName + self.homeDir = homeDir + self.conn = Connection("{}@{}".format(username, hostName), connect_kwargs={"password": "{}".format(password)}) + + def startTaosd(self): + try: + self.conn.run("sudo systemctl start taosd") + except Exception as e: + print("Start Taosd error for node %d " % self.index) + logging.exception(e) + + def stopTaosd(self): + try: + self.conn.run("sudo systemctl stop taosd") + except Exception as e: + print("Stop Taosd error for node %d " % self.index) + logging.exception(e) + + def restartTaosd(self): + try: + self.conn.run("sudo systemctl restart taosd") + except Exception as e: + print("Stop Taosd error for node %d " % self.index) + logging.exception(e) + + def removeTaosd(self): + try: + self.conn.run("rmtaos") + except Exception as e: + print("remove taosd error for node %d " % self.index) + logging.exception(e) + + def installTaosd(self, packagePath): + self.conn.put(packagePath, self.homeDir) + self.conn.cd(self.homeDir) + self.conn.run("tar -zxf $(basename '%s')" % packagePath) + with self.conn.cd("TDengine-enterprise-server"): + self.conn.run("yes|./install.sh") + + def configTaosd(self, taosConfigKey, taosConfigValue): + self.conn.run("sudo echo '%s %s' >> %s" % (taosConfigKey, taosConfigValue, "/etc/taos/taos.cfg")) + + def removeTaosConfig(self, taosConfigKey, taosConfigValue): + self.conn.run("sudo sed -in-place -e '/%s %s/d' %s" % (taosConfigKey, taosConfigValue, "/etc/taos/taos.cfg")) + + def configHosts(self, ip, name): + self.conn.run("echo '%s %s' >> %s" % (ip, name, '/etc/hosts')) + + def removeData(self): + try: + self.conn.run("sudo rm -rf /var/lib/taos/*") + except Exception as e: + print("remove taosd data error for node %d " % self.index) + logging.exception(e) + + def removeLog(self): + try: + self.conn.run("sudo rm -rf /var/log/taos/*") + except Exception as e: + print("remove taosd error for node %d " % self.index) + logging.exception(e) + + def removeDataForMnode(self): + try: + self.conn.run("sudo rm -rf /var/lib/taos/*") + except Exception as e: + print("remove taosd error for node %d " % self.index) + logging.exception(e) + + def removeDataForVnode(self, id): + try: + self.conn.run("sudo rm -rf /var/lib/taos/vnode%d/*.data" % id) + except Exception as e: + print("remove taosd error for node %d " % self.index) + logging.exception(e) + +class Nodes: + def __init__(self): + self.node1 = Node(1, 'ubuntu', '192.168.1.52', 'node1', 'tbase125!', '/home/ubuntu') + self.node2 = Node(2, 'ubuntu', '192.168.1.53', 'node2', 'tbase125!', '/home/ubuntu') + self.node3 = Node(3, 'ubuntu', '192.168.1.54', 'node3', 'tbase125!', '/home/ubuntu') + + def stopAllTaosd(self): + self.node1.stopTaosd() + self.node2.stopTaosd() + self.node3.stopTaosd() + + def startAllTaosd(self): + self.node1.startTaosd() + self.node2.startTaosd() + self.node3.startTaosd() + + def restartAllTaosd(self): + self.node1.restartTaosd() + self.node2.restartTaosd() + self.node3.restartTaosd() + + def addConfigs(self, configKey, configValue): + self.node1.configTaosd(configKey, configValue) + self.node2.configTaosd(configKey, configValue) + self.node3.configTaosd(configKey, configValue) + + def removeConfigs(self, configKey, configValue): + self.node1.removeTaosConfig(configKey, configValue) + self.node2.removeTaosConfig(configKey, configValue) + self.node3.removeTaosConfig(configKey, configValue) + + def removeAllDataFiles(self): + self.node1.removeData() + self.node2.removeData() + self.node3.removeData() + +class ClusterTest: + def __init__(self, hostName): + self.host = hostName + self.user = "root" + self.password = "taosdata" + self.config = "/etc/taos" + self.dbName = "mytest" + self.stbName = "meters" + self.numberOfThreads = 20 + self.numberOfTables = 10000 + self.numberOfRecords = 1000 + self.tbPrefix = "t" + self.ts = 1538548685000 + self.repeat = 1 + + def connectDB(self): + self.conn = taos.connect( + host=self.host, + user=self.user, + password=self.password, + config=self.config) + + def createSTable(self, replica): + cursor = self.conn.cursor() + tdLog.info("drop database if exists %s" % self.dbName) + cursor.execute("drop database if exists %s" % self.dbName) + tdLog.info("create database %s replica %d" % (self.dbName, replica)) + cursor.execute("create database %s replica %d" % (self.dbName, replica)) + tdLog.info("use %s" % self.dbName) + cursor.execute("use %s" % self.dbName) + tdLog.info("drop table if exists %s" % self.stbName) + cursor.execute("drop table if exists %s" % self.stbName) + tdLog.info("create table %s(ts timestamp, current float, voltage int, phase int) tags(id int)" % self.stbName) + cursor.execute("create table %s(ts timestamp, current float, voltage int, phase int) tags(id int)" % self.stbName) + cursor.close() + + def insertData(self, threadID): + print("Thread %d: starting" % threadID) + cursor = self.conn.cursor() + tablesPerThread = int(self.numberOfTables / self.numberOfThreads) + baseTableID = tablesPerThread * threadID + for i in range (tablesPerThread): + cursor.execute("create table %s%d using %s tags(%d)" % (self.tbPrefix, baseTableID + i, self.stbName, baseTableID + i)) + query = "insert into %s%d values" % (self.tbPrefix, baseTableID + i) + base = self.numberOfRecords * i + for j in range(self.numberOfRecords): + query += "(%d, %f, %d, %d)" % (self.ts + base + j, random.random(), random.randint(210, 230), random.randint(0, 10)) + cursor.execute(query) + cursor.close() + print("Thread %d: finishing" % threadID) + + def run(self): + threads = [] + tdLog.info("Inserting data") + for i in range(self.numberOfThreads): + thread = threading.Thread(target=self.insertData, args=(i,)) + threads.append(thread) + thread.start() + + for i in range(self.numberOfThreads): + threads[i].join() \ No newline at end of file diff --git a/tests/pytest/cluster/dataFileRecoveryTest.py b/tests/pytest/cluster/dataFileRecoveryTest.py new file mode 100644 index 0000000000..089d3fffc1 --- /dev/null +++ b/tests/pytest/cluster/dataFileRecoveryTest.py @@ -0,0 +1,53 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import sys +from clusterSetup import * +from util.sql import tdSql +from util.log import tdLog +import random + +class ClusterTestcase: + + ## test case 20, 21, 22 ## + def run(self): + + nodes = Nodes() + ctest = ClusterTest(nodes.node1.hostName) + ctest.connectDB() + ctest.createSTable(3) + ctest.run() + tdSql.init(ctest.conn.cursor(), False) + + nodes.node2.stopTaosd() + tdSql.execute("use %s" % ctest.dbName) + tdSql.query("show vgroups") + vnodeID = tdSql.getData(0, 0) + nodes.node2.removeDataForVnode(vnodeID) + nodes.node2.startTaosd() + + # Wait for vnode file to recover + for i in range(10): + tdSql.query("select count(*) from t0") + + tdLog.sleep(10) + + for i in range(10): + tdSql.query("select count(*) from t0") + tdSql.checkData(0, 0, 1000) + + tdSql.close() + tdLog.success("%s successfully executed" % __file__) + +ct = ClusterTestcase() +ct.run() diff --git a/tests/pytest/cluster/fullDnodesTest.py b/tests/pytest/cluster/fullDnodesTest.py new file mode 100644 index 0000000000..3c4b10d97a --- /dev/null +++ b/tests/pytest/cluster/fullDnodesTest.py @@ -0,0 +1,47 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import sys +from clusterSetup import * +from util.sql import tdSql +from util.log import tdLog +import random + +class ClusterTestcase: + + ##Cover test case 5 ## + def run(self): + # cluster environment set up + nodes = Nodes() + nodes.addConfigs("maxVgroupsPerDb", "10") + nodes.addConfigs("maxTablesPerVnode", "1000") + nodes.restartAllTaosd() + + ctest = ClusterTest(nodes.node1.hostName) + ctest.connectDB() + ctest.createSTable(1) + ctest.run() + + tdSql.init(ctest.conn.cursor(), False) + tdSql.execute("use %s" % ctest.dbName) + tdSql.error("create table tt1 using %s tags(1)" % ctest.stbName) + + nodes.removeConfigs("maxVgroupsPerDb", "10") + nodes.removeConfigs("maxTablesPerVnode", "1000") + nodes.restartAllTaosd() + + tdSql.close() + tdLog.success("%s successfully executed" % __file__) + +ct = ClusterTestcase() +ct.run() \ No newline at end of file diff --git a/tests/pytest/cluster/killAndRestartDnodesTest.py b/tests/pytest/cluster/killAndRestartDnodesTest.py new file mode 100644 index 0000000000..be927e862f --- /dev/null +++ b/tests/pytest/cluster/killAndRestartDnodesTest.py @@ -0,0 +1,75 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import sys +from clusterSetup import * +from util.sql import tdSql +from util.log import tdLog +import random + +class ClusterTestcase: + + ## test case 7, 10 ## + def run(self): + # cluster environment set up + tdLog.info("Test case 7, 10") + + nodes = Nodes() + ctest = ClusterTest(nodes.node1.hostName) + ctest.connectDB() + tdSql.init(ctest.conn.cursor(), False) + + nodes.node1.stopTaosd() + tdSql.query("show dnodes") + tdSql.checkRows(3) + tdSql.checkData(0, 4, "offline") + tdSql.checkData(1, 4, "ready") + tdSql.checkData(2, 4, "ready") + + nodes.node1.startTaosd() + tdSql.checkRows(3) + tdSql.checkData(0, 4, "ready") + tdSql.checkData(1, 4, "ready") + tdSql.checkData(2, 4, "ready") + + nodes.node2.stopTaosd() + tdSql.query("show dnodes") + tdSql.checkRows(3) + tdSql.checkData(0, 4, "ready") + tdSql.checkData(1, 4, "offline") + tdSql.checkData(2, 4, "ready") + + nodes.node2.startTaosd() + tdSql.checkRows(3) + tdSql.checkData(0, 4, "ready") + tdSql.checkData(1, 4, "ready") + tdSql.checkData(2, 4, "ready") + + nodes.node3.stopTaosd() + tdSql.query("show dnodes") + tdSql.checkRows(3) + tdSql.checkData(0, 4, "ready") + tdSql.checkData(1, 4, "ready") + tdSql.checkData(2, 4, "offline") + + nodes.node3.startTaosd() + tdSql.checkRows(3) + tdSql.checkData(0, 4, "ready") + tdSql.checkData(1, 4, "ready") + tdSql.checkData(2, 4, "ready") + + tdSql.close() + tdLog.success("%s successfully executed" % __file__) + +ct = ClusterTestcase() +ct.run() \ No newline at end of file diff --git a/tests/pytest/cluster/offlineThresholdTest.py b/tests/pytest/cluster/offlineThresholdTest.py new file mode 100644 index 0000000000..8373424f93 --- /dev/null +++ b/tests/pytest/cluster/offlineThresholdTest.py @@ -0,0 +1,54 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import sys +from clusterSetup import * +from util.sql import tdSql +from util.log import tdLog +import random + +class ClusterTestcase: + + ## cover test case 6, 8, 9, 11 ## + def run(self): + # cluster environment set up + nodes = Nodes() + ctest = ClusterTest(nodes.node1.hostName) + ctest.connectDB() + tdSql.init(ctest.conn.cursor(), False) + + nodes.addConfigs("offlineThreshold", "10") + nodes.removeAllDataFiles() + nodes.restartAllTaosd() + nodes.node3.stopTaosd() + + tdLog.sleep(10) + tdSql.query("show dnodes") + tdSql.checkRows(3) + tdSql.checkData(2, 4, "offline") + + tdLog.sleep(60) + tdSql.checkRows(3) + tdSql.checkData(2, 4, "dropping") + + tdLog.sleep(300) + tdSql.checkRows(2) + + nodes.removeConfigs("offlineThreshold", "10") + nodes.restartAllTaosd() + + tdSql.close() + tdLog.success("%s successfully executed" % __file__) + +ct = ClusterTestcase() +ct.run() \ No newline at end of file diff --git a/tests/pytest/cluster/oneReplicaOfflineTest.py b/tests/pytest/cluster/oneReplicaOfflineTest.py new file mode 100644 index 0000000000..0223dfe01a --- /dev/null +++ b/tests/pytest/cluster/oneReplicaOfflineTest.py @@ -0,0 +1,65 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import sys +from clusterSetup import * +from util.sql import tdSql +from util.log import tdLog +import random + +class ClusterTestcase: + + ## test case 28, 29, 30, 31 ## + def run(self): + + nodes = Nodes() + ctest = ClusterTest(nodes.node1.hostName) + ctest.connectDB() + ctest.createSTable(3) + ctest.run() + tdSql.init(ctest.conn.cursor(), False) + + tdSql.execute("use %s" % ctest.dbName) + + nodes.node2.stopTaosd() + for i in range(100): + tdSql.execute("drop table t%d" % i) + + nodes.node2.startTaosd() + tdSql.query("show tables") + tdSql.checkRows(9900) + + nodes.node2.stopTaosd() + for i in range(10): + tdSql.execute("create table a%d using meters tags(2)" % i) + + nodes.node2.startTaosd() + tdSql.query("show tables") + tdSql.checkRows(9910) + + nodes.node2.stopTaosd() + tdSql.execute("alter table meters add col col6 int") + nodes.node2.startTaosd() + + nodes.node2.stopTaosd() + tdSql.execute("drop database %s" % ctest.dbName) + + nodes.node2.startTaosd() + tdSql.query("show databases") + tdSql.checkRows(0) + + tdSql.close() + tdLog.success("%s successfully executed" % __file__) + +ct = ClusterTestcase() +ct.run() diff --git a/tests/pytest/cluster/queryTimeTest.py b/tests/pytest/cluster/queryTimeTest.py new file mode 100644 index 0000000000..74a9081ccf --- /dev/null +++ b/tests/pytest/cluster/queryTimeTest.py @@ -0,0 +1,54 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import sys +from clusterSetup import * +from util.sql import tdSql +from util.log import tdLog +import random +import time + +class ClusterTestcase: + + ## test case 32 ## + def run(self): + + nodes = Nodes() + ctest = ClusterTest(nodes.node1.hostName) + ctest.connectDB() + ctest.createSTable(1) + ctest.run() + tdSql.init(ctest.conn.cursor(), False) + + tdSql.execute("use %s" % ctest.dbName) + totalTime = 0 + for i in range(10): + startTime = time.time() + tdSql.query("select * from %s" % ctest.stbName) + totalTime += time.time() - startTime + print("replica 1: avarage query time for %d records: %f seconds" % (ctest.numberOfTables * ctest.numberOfRecords,totalTime / 10)) + + tdSql.execute("alter database %s replica 3" % ctest.dbName) + tdLog.sleep(60) + totalTime = 0 + for i in range(10): + startTime = time.time() + tdSql.query("select * from %s" % ctest.stbName) + totalTime += time.time() - startTime + print("replica 3: avarage query time for %d records: %f seconds" % (ctest.numberOfTables * ctest.numberOfRecords,totalTime / 10)) + + tdSql.close() + tdLog.success("%s successfully executed" % __file__) + +ct = ClusterTestcase() +ct.run() diff --git a/tests/pytest/cluster/stopAllDnodesTest.py b/tests/pytest/cluster/stopAllDnodesTest.py new file mode 100644 index 0000000000..a71ae52e3d --- /dev/null +++ b/tests/pytest/cluster/stopAllDnodesTest.py @@ -0,0 +1,45 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import sys +from clusterSetup import * +from util.sql import tdSql +from util.log import tdLog +import random + +class ClusterTestcase: + + ## test case 19 ## + def run(self): + + nodes = Nodes() + ctest = ClusterTest(nodes.node1.hostName) + tdSql.init(ctest.conn.cursor(), False) + + tdSql.query("show databases") + count = tdSql.queryRows; + + nodes.stopAllTaosd() + nodes.node1.startTaosd() + tdSql.error("show databases") + + nodes.node2.startTaosd() + tdSql.error("show databases") + + nodes.node3.startTaosd() + tdLog.sleep(10) + tdSql.query("show databases") + tdSql.checkRows(count) + +ct = ClusterTestcase() +ct.run() diff --git a/tests/pytest/cluster/stopTwoDnodesTest.py b/tests/pytest/cluster/stopTwoDnodesTest.py new file mode 100644 index 0000000000..9e9958e2d3 --- /dev/null +++ b/tests/pytest/cluster/stopTwoDnodesTest.py @@ -0,0 +1,48 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import sys +from clusterSetup import * +from util.sql import tdSql +from util.log import tdLog +import random + +class ClusterTestcase: + + ## test case 17, 18 ## + def run(self): + + nodes = Nodes() + ctest = ClusterTest(nodes.node1.hostName) + ctest.connectDB() + ctest.createSTable(1) + ctest.run() + tdSql.init(ctest.conn.cursor(), False) + + tdSql.query("show databases") + count = tdSql.queryRows; + tdSql.execute("use %s" % ctest.dbName) + tdSql.execute("alter database %s replica 3" % ctest.dbName) + nodes.node2.stopTaosd() + nodes.node3.stopTaosd() + tdSql.error("show databases") + + nodes.node2.startTaosd() + tdSql.error("show databases") + + nodes.node3.startTaosd() + tdSql.query("show databases") + tdSql.checkRows(count) + +ct = ClusterTestcase() +ct.run() diff --git a/tests/pytest/cluster/syncingTest.py b/tests/pytest/cluster/syncingTest.py new file mode 100644 index 0000000000..96be048d23 --- /dev/null +++ b/tests/pytest/cluster/syncingTest.py @@ -0,0 +1,50 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import sys +from clusterSetup import * +from util.sql import tdSql +from util.log import tdLog +import random + +class ClusterTestcase: + + ## test case 24, 25, 26, 27 ## + def run(self): + + nodes = Nodes() + ctest = ClusterTest(nodes.node1.hostName) + ctest.connectDB() + ctest.createSTable(1) + ctest.run() + tdSql.init(ctest.conn.cursor(), False) + + + tdSql.execute("use %s" % ctest.dbName) + tdSql.execute("alter database %s replica 3" % ctest.dbName) + + for i in range(100): + tdSql.execute("drop table t%d" % i) + + for i in range(100): + tdSql.execute("create table a%d using meters tags(1)" % i) + + tdSql.execute("alter table meters add col col5 int") + tdSql.execute("alter table meters drop col col5 int") + tdSql.execute("drop database %s" % ctest.dbName) + + tdSql.close() + tdLog.success("%s successfully executed" % __file__) + +ct = ClusterTestcase() +ct.run() diff --git a/tests/pytest/cluster/testcluster.sh b/tests/pytest/cluster/testcluster.sh new file mode 100644 index 0000000000..6e15a498c0 --- /dev/null +++ b/tests/pytest/cluster/testcluster.sh @@ -0,0 +1,12 @@ +python3 basicTest.py +python3 bananceTest.py +python3 changeReplicaTest.py +python3 dataFileRecoveryTest.py +python3 fullDnodesTest.py +python3 killAndRestartDnodesTest.py +python3 offlineThresholdTest.py +python3 oneReplicaOfflineTest.py +python3 queryTimeTest.py +python3 stopAllDnodesTest.py +python3 stopTwoDnodesTest.py +python3 syncingTest.py \ No newline at end of file diff --git a/tests/pytest/crash_gen.sh b/tests/pytest/crash_gen.sh index 4ffe35fc3c..0af09634df 100755 --- a/tests/pytest/crash_gen.sh +++ b/tests/pytest/crash_gen.sh @@ -54,6 +54,7 @@ export PYTHONPATH=$(pwd)/../../src/connector/python/linux/python3:$(pwd) export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$LIB_DIR # Now we are all let, and let's see if we can find a crash. Note we pass all params +CRASH_GEN_EXEC=crash_gen_bootstrap.py if [[ $1 == '--valgrind' ]]; then shift export PYTHONMALLOC=malloc @@ -66,14 +67,16 @@ if [[ $1 == '--valgrind' ]]; then --leak-check=yes \ --suppressions=crash_gen/valgrind_taos.supp \ $PYTHON_EXEC \ - ./crash_gen/crash_gen.py $@ > $VALGRIND_OUT 2> $VALGRIND_ERR + $CRASH_GEN_EXEC $@ > $VALGRIND_OUT 2> $VALGRIND_ERR elif [[ $1 == '--helgrind' ]]; then shift + HELGRIND_OUT=helgrind.out + HELGRIND_ERR=helgrind.err valgrind \ --tool=helgrind \ $PYTHON_EXEC \ - ./crash_gen/crash_gen.py $@ + $CRASH_GEN_EXEC $@ > $HELGRIND_OUT 2> $HELGRIND_ERR else - $PYTHON_EXEC ./crash_gen/crash_gen.py $@ + $PYTHON_EXEC $CRASH_GEN_EXEC $@ fi diff --git a/tests/pytest/crash_gen/README.md b/tests/pytest/crash_gen/README.md new file mode 100644 index 0000000000..6788ab1a63 --- /dev/null +++ b/tests/pytest/crash_gen/README.md @@ -0,0 +1,130 @@ +

User's Guide to the Crash_Gen Tool

+ +# Introduction + +To effectively test and debug our TDengine product, we have developed a simple tool to +exercise various functions of the system in a randomized fashion, hoping to expose +maximum number of problems, hopefully without a pre-determined scenario. + +# Preparation + +To run this tool, please ensure the followed preparation work is done first. + +1. Fetch a copy of the TDengine source code, and build it successfully in the `build/` + directory +1. Ensure that the system has Python3.8 or above properly installed. We use + Ubuntu 20.04LTS as our own development environment, and suggest you also use such + an environment if possible. + +# Simple Execution + +To run the tool with the simplest method, follow the steps below: + +1. Open a terminal window, start the `taosd` service in the `build/` directory + (or however you prefer to start the `taosd` service) +1. Open another terminal window, go into the `tests/pytest/` directory, and + run `./crash_gen.sh -p -t 3 -s 10` (change the two parameters here as you wish) +1. Watch the output to the end and see if you get a `SUCCESS` or `FAILURE` + +That's it! + +# Running Clusters + +This tool also makes it easy to test/verify the clustering capabilities of TDengine. You +can start a cluster quite easily with the following command: + +``` +$ cd tests/pytest/ +$ ./crash_gen.sh -e -o 3 +``` + +The `-e` option above tells the tool to start the service, and do not run any tests, while +the `-o 3` option tells the tool to start 3 DNodes and join them together in a cluster. +Obviously you can adjust the the number here. + +## Behind the Scenes + +When the tool runs a cluster, it users a number of directories, each holding the information +for a single DNode, see: + +``` +$ ls build/cluster* +build/cluster_dnode_0: +cfg data log + +build/cluster_dnode_1: +cfg data log + +build/cluster_dnode_2: +cfg data log +``` + +Therefore, when something goes wrong and you want to reset everything with the cluster, simple +erase all the files: + +``` +$ rm -rf build/cluster_dnode_* +``` + +## Addresses and Ports + +The DNodes in the cluster all binds the the `127.0.0.1` IP address (for now anyway), and +uses port 6030 for the first DNode, and 6130 for the 2nd one, and so on. + +## Testing Against a Cluster + +In a separate terminal window, you can invoke the tool in client mode and test against +a cluster, such as: + +``` +$ ./crash_gen.sh -p -t 10 -s 100 -i 3 +``` + +Here the `-i` option tells the tool to always create tables with 3 replicas, and run +all tests against such tables. + +# Additional Features + +The exhaustive features of the tool is available through the `-h` option: + +``` +$ ./crash_gen.sh -h +usage: crash_gen_bootstrap.py [-h] [-a] [-b MAX_DBS] [-c CONNECTOR_TYPE] [-d] [-e] [-g IGNORE_ERRORS] [-i MAX_REPLICAS] [-l] [-n] [-o NUM_DNODES] [-p] [-r] + [-s MAX_STEPS] [-t NUM_THREADS] [-v] [-x] + +TDengine Auto Crash Generator (PLEASE NOTICE the Prerequisites Below) +--------------------------------------------------------------------- +1. You build TDengine in the top level ./build directory, as described in offical docs +2. You run the server there before this script: ./build/bin/taosd -c test/cfg + +optional arguments: + -h, --help show this help message and exit + -a, --auto-start-service + Automatically start/stop the TDengine service (default: false) + -b MAX_DBS, --max-dbs MAX_DBS + Maximum number of DBs to keep, set to disable dropping DB. (default: 0) + -c CONNECTOR_TYPE, --connector-type CONNECTOR_TYPE + Connector type to use: native, rest, or mixed (default: 10) + -d, --debug Turn on DEBUG mode for more logging (default: false) + -e, --run-tdengine Run TDengine service in foreground (default: false) + -g IGNORE_ERRORS, --ignore-errors IGNORE_ERRORS + Ignore error codes, comma separated, 0x supported (default: None) + -i MAX_REPLICAS, --max-replicas MAX_REPLICAS + Maximum number of replicas to use, when testing against clusters. (default: 1) + -l, --larger-data Write larger amount of data during write operations (default: false) + -n, --dynamic-db-table-names + Use non-fixed names for dbs/tables, useful for multi-instance executions (default: false) + -o NUM_DNODES, --num-dnodes NUM_DNODES + Number of Dnodes to initialize, used with -e option. (default: 1) + -p, --per-thread-db-connection + Use a single shared db connection (default: false) + -r, --record-ops Use a pair of always-fsynced fils to record operations performing + performed, for power-off tests (default: false) + -s MAX_STEPS, --max-steps MAX_STEPS + Maximum number of steps to run (default: 100) + -t NUM_THREADS, --num-threads NUM_THREADS + Number of threads to run (default: 10) + -v, --verify-data Verify data written in a number of places by reading back (default: false) + -x, --continue-on-exception + Continue execution after encountering unexpected/disallowed errors/exceptions (default: false) +``` + diff --git a/tests/pytest/crash_gen/crash_gen.py b/tests/pytest/crash_gen/crash_gen.py index 48196ab383..8d2b0080bc 100755 --- a/tests/pytest/crash_gen/crash_gen.py +++ b/tests/pytest/crash_gen/crash_gen.py @@ -14,42 +14,36 @@ # For type hinting before definition, ref: # https://stackoverflow.com/questions/33533148/how-do-i-specify-that-the-return-type-of-a-method-is-the-same-as-the-class-itsel from __future__ import annotations -import taos -from util.sql import * -from util.cases import * -from util.dnodes import * -from util.log import * -from queue import Queue, Empty -from typing import IO + from typing import Set from typing import Dict from typing import List -from requests.auth import HTTPBasicAuth +from typing import Optional # Type hinting, ref: https://stackoverflow.com/questions/19202633/python-3-type-hinting-for-none + import textwrap -import datetime -import logging import time +import datetime import random +import logging import threading -import requests import copy import argparse import getopt import sys import os -import io import signal import traceback import resource from guppy import hpy import gc -try: - import psutil -except: - print("Psutil module needed, please install: sudo pip3 install psutil") - sys.exit(-1) +from .service_manager import ServiceManager, TdeInstance +from .misc import Logging, Status, CrashGenError, Dice, Helper, Progress +from .db import DbConn, MyTDSql, DbConnNative, DbManager + +import taos +import requests # Require Python 3 if sys.version_info[0] < 3: @@ -59,41 +53,37 @@ if sys.version_info[0] < 3: # Command-line/Environment Configurations, will set a bit later # ConfigNameSpace = argparse.Namespace -gConfig = argparse.Namespace() # Dummy value, will be replaced later -gSvcMgr = None # TODO: refactor this hack, use dep injection -logger = None # type: Logger +gConfig: argparse.Namespace +gSvcMgr: ServiceManager # TODO: refactor this hack, use dep injection +# logger: logging.Logger +gContainer: Container -def runThread(wt: WorkerThread): - wt.run() - -class CrashGenError(Exception): - def __init__(self, msg=None, errno=None): - self.msg = msg - self.errno = errno - - def __str__(self): - return self.msg +# def runThread(wt: WorkerThread): +# wt.run() class WorkerThread: - def __init__(self, pool: ThreadPool, tid, tc: ThreadCoordinator, - # te: TaskExecutor, - ): # note: main thread context! + def __init__(self, pool: ThreadPool, tid, tc: ThreadCoordinator): + """ + Note: this runs in the main thread context + """ # self._curStep = -1 self._pool = pool self._tid = tid self._tc = tc # type: ThreadCoordinator # self.threadIdent = threading.get_ident() - self._thread = threading.Thread(target=runThread, args=(self,)) + # self._thread = threading.Thread(target=runThread, args=(self,)) + self._thread = threading.Thread(target=self.run) self._stepGate = threading.Event() # Let us have a DB connection of our own if (gConfig.per_thread_db_connection): # type: ignore # print("connector_type = {}".format(gConfig.connector_type)) - if gConfig.connector_type == 'native': - self._dbConn = DbConn.createNative() + tInst = gContainer.defTdeInstance + if gConfig.connector_type == 'native': + self._dbConn = DbConn.createNative(tInst.getDbTarget()) elif gConfig.connector_type == 'rest': - self._dbConn = DbConn.createRest() + self._dbConn = DbConn.createRest(tInst.getDbTarget()) elif gConfig.connector_type == 'mixed': if Dice.throw(2) == 0: # 1/2 chance self._dbConn = DbConn.createNative() @@ -105,10 +95,10 @@ class WorkerThread: # self._dbInUse = False # if "use db" was executed already def logDebug(self, msg): - logger.debug(" TRD[{}] {}".format(self._tid, msg)) + Logging.debug(" TRD[{}] {}".format(self._tid, msg)) def logInfo(self, msg): - logger.info(" TRD[{}] {}".format(self._tid, msg)) + Logging.info(" TRD[{}] {}".format(self._tid, msg)) # def dbInUse(self): # return self._dbInUse @@ -127,10 +117,10 @@ class WorkerThread: def run(self): # initialization after thread starts, in the thread context # self.isSleeping = False - logger.info("Starting to run thread: {}".format(self._tid)) + Logging.info("Starting to run thread: {}".format(self._tid)) if (gConfig.per_thread_db_connection): # type: ignore - logger.debug("Worker thread openning database connection") + Logging.debug("Worker thread openning database connection") self._dbConn.open() self._doTaskLoop() @@ -140,7 +130,7 @@ class WorkerThread: if self._dbConn.isOpen: #sometimes it is not open self._dbConn.close() else: - logger.warning("Cleaning up worker thread, dbConn already closed") + Logging.warning("Cleaning up worker thread, dbConn already closed") def _doTaskLoop(self): # while self._curStep < self._pool.maxSteps: @@ -151,15 +141,15 @@ class WorkerThread: tc.crossStepBarrier() # shared barrier first, INCLUDING the last one except threading.BrokenBarrierError as err: # main thread timed out print("_bto", end="") - logger.debug("[TRD] Worker thread exiting due to main thread barrier time-out") + Logging.debug("[TRD] Worker thread exiting due to main thread barrier time-out") break - logger.debug("[TRD] Worker thread [{}] exited barrier...".format(self._tid)) + Logging.debug("[TRD] Worker thread [{}] exited barrier...".format(self._tid)) self.crossStepGate() # then per-thread gate, after being tapped - logger.debug("[TRD] Worker thread [{}] exited step gate...".format(self._tid)) + Logging.debug("[TRD] Worker thread [{}] exited step gate...".format(self._tid)) if not self._tc.isRunning(): print("_wts", end="") - logger.debug("[TRD] Thread Coordinator not running any more, worker thread now stopping...") + Logging.debug("[TRD] Thread Coordinator not running any more, worker thread now stopping...") break # Before we fetch the task and run it, let's ensure we properly "use" the database (not needed any more) @@ -178,15 +168,15 @@ class WorkerThread: raise # Fetch a task from the Thread Coordinator - logger.debug( "[TRD] Worker thread [{}] about to fetch task".format(self._tid)) + Logging.debug( "[TRD] Worker thread [{}] about to fetch task".format(self._tid)) task = tc.fetchTask() # Execute such a task - logger.debug("[TRD] Worker thread [{}] about to execute task: {}".format( + Logging.debug("[TRD] Worker thread [{}] about to execute task: {}".format( self._tid, task.__class__.__name__)) task.execute(self) tc.saveExecutedTask(task) - logger.debug("[TRD] Worker thread [{}] finished executing task".format(self._tid)) + Logging.debug("[TRD] Worker thread [{}] finished executing task".format(self._tid)) # self._dbInUse = False # there may be changes between steps # print("_wtd", end=None) # worker thread died @@ -209,7 +199,7 @@ class WorkerThread: self.verifyThreadSelf() # only allowed by ourselves # Wait again at the "gate", waiting to be "tapped" - logger.debug( + Logging.debug( "[TRD] Worker thread {} about to cross the step gate".format( self._tid)) self._stepGate.wait() @@ -222,7 +212,7 @@ class WorkerThread: self.verifyThreadMain() # only allowed for main thread if self._thread.is_alive(): - logger.debug("[TRD] Tapping worker thread {}".format(self._tid)) + Logging.debug("[TRD] Tapping worker thread {}".format(self._tid)) self._stepGate.set() # wake up! time.sleep(0) # let the released thread run a bit else: @@ -253,7 +243,7 @@ class WorkerThread: class ThreadCoordinator: - WORKER_THREAD_TIMEOUT = 60 # one minute + WORKER_THREAD_TIMEOUT = 180 # one minute def __init__(self, pool: ThreadPool, dbManager: DbManager): self._curStep = -1 # first step is 0 @@ -267,7 +257,7 @@ class ThreadCoordinator: self._stepBarrier = threading.Barrier( self._pool.numThreads + 1) # one barrier for all threads self._execStats = ExecutionStats() - self._runStatus = MainExec.STATUS_RUNNING + self._runStatus = Status.STATUS_RUNNING self._initDbs() def getTaskExecutor(self): @@ -280,14 +270,14 @@ class ThreadCoordinator: self._stepBarrier.wait(timeout) def requestToStop(self): - self._runStatus = MainExec.STATUS_STOPPING + self._runStatus = Status.STATUS_STOPPING self._execStats.registerFailure("User Interruption") def _runShouldEnd(self, transitionFailed, hasAbortedTask, workerTimeout): maxSteps = gConfig.max_steps # type: ignore if self._curStep >= (maxSteps - 1): # maxStep==10, last curStep should be 9 return True - if self._runStatus != MainExec.STATUS_RUNNING: + if self._runStatus != Status.STATUS_RUNNING: return True if transitionFailed: return True @@ -308,7 +298,7 @@ class ThreadCoordinator: def _releaseAllWorkerThreads(self, transitionFailed): self._curStep += 1 # we are about to get into next step. TODO: race condition here! # Now not all threads had time to go to sleep - logger.debug( + Logging.debug( "--\r\n\n--> Step {} starts with main thread waking up".format(self._curStep)) # A new TE for the new step @@ -316,7 +306,7 @@ class ThreadCoordinator: if not transitionFailed: # only if not failed self._te = TaskExecutor(self._curStep) - logger.debug("[TRD] Main thread waking up at step {}, tapping worker threads".format( + Logging.debug("[TRD] Main thread waking up at step {}, tapping worker threads".format( self._curStep)) # Now not all threads had time to go to sleep # Worker threads will wake up at this point, and each execute it's own task self.tapAllThreads() # release all worker thread from their "gates" @@ -325,10 +315,10 @@ class ThreadCoordinator: # Now main thread (that's us) is ready to enter a step # let other threads go past the pool barrier, but wait at the # thread gate - logger.debug("[TRD] Main thread about to cross the barrier") + Logging.debug("[TRD] Main thread about to cross the barrier") self.crossStepBarrier(timeout=self.WORKER_THREAD_TIMEOUT) self._stepBarrier.reset() # Other worker threads should now be at the "gate" - logger.debug("[TRD] Main thread finished crossing the barrier") + Logging.debug("[TRD] Main thread finished crossing the barrier") def _doTransition(self): transitionFailed = False @@ -336,11 +326,11 @@ class ThreadCoordinator: for x in self._dbs: db = x # type: Database sm = db.getStateMachine() - logger.debug("[STT] starting transitions for DB: {}".format(db.getName())) + Logging.debug("[STT] starting transitions for DB: {}".format(db.getName())) # at end of step, transiton the DB state tasksForDb = db.filterTasks(self._executedTasks) sm.transition(tasksForDb, self.getDbManager().getDbConn()) - logger.debug("[STT] transition ended for DB: {}".format(db.getName())) + Logging.debug("[STT] transition ended for DB: {}".format(db.getName())) # Due to limitation (or maybe not) of the TD Python library, # we cannot share connections across threads @@ -348,14 +338,14 @@ class ThreadCoordinator: # Moving below to task loop # if sm.hasDatabase(): # for t in self._pool.threadList: - # logger.debug("[DB] use db for all worker threads") + # Logging.debug("[DB] use db for all worker threads") # t.useDb() # t.execSql("use db") # main thread executing "use # db" on behalf of every worker thread except taos.error.ProgrammingError as err: if (err.msg == 'network unavailable'): # broken DB connection - logger.info("DB connection broken, execution failed") + Logging.info("DB connection broken, execution failed") traceback.print_stack() transitionFailed = True self._te = None # Not running any more @@ -368,7 +358,7 @@ class ThreadCoordinator: self.resetExecutedTasks() # clear the tasks after we are done # Get ready for next step - logger.debug("<-- Step {} finished, trasition failed = {}".format(self._curStep, transitionFailed)) + Logging.debug("<-- Step {} finished, trasition failed = {}".format(self._curStep, transitionFailed)) return transitionFailed def run(self): @@ -382,8 +372,9 @@ class ThreadCoordinator: hasAbortedTask = False workerTimeout = False while not self._runShouldEnd(transitionFailed, hasAbortedTask, workerTimeout): - if not gConfig.debug: # print this only if we are not in debug mode - print(".", end="", flush=True) + if not gConfig.debug: # print this only if we are not in debug mode + Progress.emit(Progress.STEP_BOUNDARY) + # print(".", end="", flush=True) # if (self._curStep % 2) == 0: # print memory usage once every 10 steps # memUsage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss # print("[m:{}]".format(memUsage), end="", flush=True) # print memory usage @@ -395,8 +386,9 @@ class ThreadCoordinator: try: self._syncAtBarrier() # For now just cross the barrier + Progress.emit(Progress.END_THREAD_STEP) except threading.BrokenBarrierError as err: - logger.info("Main loop aborted, caused by worker thread time-out") + Logging.info("Main loop aborted, caused by worker thread time-out") self._execStats.registerFailure("Aborted due to worker thread timeout") print("\n\nWorker Thread time-out detected, important thread info:") ts = ThreadStacks() @@ -409,7 +401,7 @@ class ThreadCoordinator: # threads are QUIET. hasAbortedTask = self._hasAbortedTask() # from previous step if hasAbortedTask: - logger.info("Aborted task encountered, exiting test program") + Logging.info("Aborted task encountered, exiting test program") self._execStats.registerFailure("Aborted Task Encountered") break # do transition only if tasks are error free @@ -420,29 +412,30 @@ class ThreadCoordinator: transitionFailed = True errno2 = Helper.convertErrno(err.errno) # correct error scheme errMsg = "Transition failed: errno=0x{:X}, msg: {}".format(errno2, err) - logger.info(errMsg) + Logging.info(errMsg) traceback.print_exc() self._execStats.registerFailure(errMsg) # Then we move on to the next step + Progress.emit(Progress.BEGIN_THREAD_STEP) self._releaseAllWorkerThreads(transitionFailed) if hasAbortedTask or transitionFailed : # abnormal ending, workers waiting at "gate" - logger.debug("Abnormal ending of main thraed") + Logging.debug("Abnormal ending of main thraed") elif workerTimeout: - logger.debug("Abnormal ending of main thread, due to worker timeout") + Logging.debug("Abnormal ending of main thread, due to worker timeout") else: # regular ending, workers waiting at "barrier" - logger.debug("Regular ending, main thread waiting for all worker threads to stop...") + Logging.debug("Regular ending, main thread waiting for all worker threads to stop...") self._syncAtBarrier() self._te = None # No more executor, time to end - logger.debug("Main thread tapping all threads one last time...") + Logging.debug("Main thread tapping all threads one last time...") self.tapAllThreads() # Let the threads run one last time - logger.debug("\r\n\n--> Main thread ready to finish up...") - logger.debug("Main thread joining all threads") + Logging.debug("\r\n\n--> Main thread ready to finish up...") + Logging.debug("Main thread joining all threads") self._pool.joinAll() # Get all threads to finish - logger.info("\nAll worker threads finished") + Logging.info("\nAll worker threads finished") self._execStats.endExec() def cleanup(self): # free resources @@ -474,7 +467,7 @@ class ThreadCoordinator: wakeSeq.append(i) else: wakeSeq.insert(0, i) - logger.debug( + Logging.debug( "[TRD] Main thread waking up worker threads: {}".format( str(wakeSeq))) # TODO: set dice seed to a deterministic value @@ -492,9 +485,11 @@ class ThreadCoordinator: dbc = self.getDbManager().getDbConn() if gConfig.max_dbs == 0: self._dbs.append(Database(0, dbc)) - else: + else: + baseDbNumber = int(datetime.datetime.now().timestamp( # Don't use Dice/random, as they are deterministic + )*333) % 888 if gConfig.dynamic_db_table_names else 0 for i in range(gConfig.max_dbs): - self._dbs.append(Database(i, dbc)) + self._dbs.append(Database(baseDbNumber + i, dbc)) def pickDatabase(self): idxDb = 0 @@ -512,7 +507,7 @@ class ThreadCoordinator: # pick a task type for current state db = self.pickDatabase() - taskType = db.getStateMachine().pickTaskType() # type: Task + taskType = db.getStateMachine().pickTaskType() # dynamic name of class return taskType(self._execStats, db) # create a task from it def resetExecutedTasks(self): @@ -522,13 +517,6 @@ class ThreadCoordinator: with self._lock: self._executedTasks.append(task) -# We define a class to run a number of threads in locking steps. - -class Helper: - @classmethod - def convertErrno(cls, errno): - return errno if (errno > 0) else 0x80000000 + errno - class ThreadPool: def __init__(self, numThreads, maxSteps): self.numThreads = numThreads @@ -546,7 +534,7 @@ class ThreadPool: def joinAll(self): for workerThread in self.threadList: - logger.debug("Joining thread...") + Logging.debug("Joining thread...") workerThread._thread.join() def cleanup(self): @@ -603,7 +591,7 @@ class LinearQueue(): def allocate(self, i): with self._lock: - # logger.debug("LQ allocating item {}".format(i)) + # Logging.debug("LQ allocating item {}".format(i)) if (i in self.inUse): raise RuntimeError( "Cannot re-use same index in queue: {}".format(i)) @@ -611,7 +599,7 @@ class LinearQueue(): def release(self, i): with self._lock: - # logger.debug("LQ releasing item {}".format(i)) + # Logging.debug("LQ releasing item {}".format(i)) self.inUse.remove(i) # KeyError possible, TODO: why? def size(self): @@ -633,357 +621,6 @@ class LinearQueue(): return ret -class DbConn: - TYPE_NATIVE = "native-c" - TYPE_REST = "rest-api" - TYPE_INVALID = "invalid" - - @classmethod - def create(cls, connType): - if connType == cls.TYPE_NATIVE: - return DbConnNative() - elif connType == cls.TYPE_REST: - return DbConnRest() - else: - raise RuntimeError( - "Unexpected connection type: {}".format(connType)) - - @classmethod - def createNative(cls): - return cls.create(cls.TYPE_NATIVE) - - @classmethod - def createRest(cls): - return cls.create(cls.TYPE_REST) - - def __init__(self): - self.isOpen = False - self._type = self.TYPE_INVALID - self._lastSql = None - - def getLastSql(self): - return self._lastSql - - def open(self): - if (self.isOpen): - raise RuntimeError("Cannot re-open an existing DB connection") - - # below implemented by child classes - self.openByType() - - logger.debug("[DB] data connection opened, type = {}".format(self._type)) - self.isOpen = True - - def queryScalar(self, sql) -> int: - return self._queryAny(sql) - - def queryString(self, sql) -> str: - return self._queryAny(sql) - - def _queryAny(self, sql): # actual query result as an int - if (not self.isOpen): - raise RuntimeError("Cannot query database until connection is open") - nRows = self.query(sql) - if nRows != 1: - raise taos.error.ProgrammingError( - "Unexpected result for query: {}, rows = {}".format(sql, nRows), - (0x991 if nRows==0 else 0x992) - ) - if self.getResultRows() != 1 or self.getResultCols() != 1: - raise RuntimeError("Unexpected result set for query: {}".format(sql)) - return self.getQueryResult()[0][0] - - def use(self, dbName): - self.execute("use {}".format(dbName)) - - def existsDatabase(self, dbName: str): - ''' Check if a certain database exists ''' - self.query("show databases") - dbs = [v[0] for v in self.getQueryResult()] # ref: https://stackoverflow.com/questions/643823/python-list-transformation - # ret2 = dbName in dbs - # print("dbs = {}, str = {}, ret2={}, type2={}".format(dbs, dbName,ret2, type(dbName))) - return dbName in dbs # TODO: super weird type mangling seen, once here - - def hasTables(self): - return self.query("show tables") > 0 - - def execute(self, sql): - ''' Return the number of rows affected''' - raise RuntimeError("Unexpected execution, should be overriden") - - def safeExecute(self, sql): - '''Safely execute any SQL query, returning True/False upon success/failure''' - try: - self.execute(sql) - return True # ignore num of results, return success - except taos.error.ProgrammingError as err: - return False # failed, for whatever TAOS reason - # Not possile to reach here, non-TAOS exception would have been thrown - - def query(self, sql) -> int: # return num rows returned - ''' Return the number of rows affected''' - raise RuntimeError("Unexpected execution, should be overriden") - - def openByType(self): - raise RuntimeError("Unexpected execution, should be overriden") - - def getQueryResult(self): - raise RuntimeError("Unexpected execution, should be overriden") - - def getResultRows(self): - raise RuntimeError("Unexpected execution, should be overriden") - - def getResultCols(self): - raise RuntimeError("Unexpected execution, should be overriden") - -# Sample: curl -u root:taosdata -d "show databases" localhost:6020/rest/sql - - -class DbConnRest(DbConn): - def __init__(self): - super().__init__() - self._type = self.TYPE_REST - self._url = "http://localhost:6041/rest/sql" # fixed for now - self._result = None - - def openByType(self): # Open connection - pass # do nothing, always open - - def close(self): - if (not self.isOpen): - raise RuntimeError("Cannot clean up database until connection is open") - # Do nothing for REST - logger.debug("[DB] REST Database connection closed") - self.isOpen = False - - def _doSql(self, sql): - self._lastSql = sql # remember this, last SQL attempted - try: - r = requests.post(self._url, - data = sql, - auth = HTTPBasicAuth('root', 'taosdata')) - except: - print("REST API Failure (TODO: more info here)") - raise - rj = r.json() - # Sanity check for the "Json Result" - if ('status' not in rj): - raise RuntimeError("No status in REST response") - - if rj['status'] == 'error': # clearly reported error - if ('code' not in rj): # error without code - raise RuntimeError("REST error return without code") - errno = rj['code'] # May need to massage this in the future - # print("Raising programming error with REST return: {}".format(rj)) - raise taos.error.ProgrammingError( - rj['desc'], errno) # todo: check existance of 'desc' - - if rj['status'] != 'succ': # better be this - raise RuntimeError( - "Unexpected REST return status: {}".format( - rj['status'])) - - nRows = rj['rows'] if ('rows' in rj) else 0 - self._result = rj - return nRows - - def execute(self, sql): - if (not self.isOpen): - raise RuntimeError( - "Cannot execute database commands until connection is open") - logger.debug("[SQL-REST] Executing SQL: {}".format(sql)) - nRows = self._doSql(sql) - logger.debug( - "[SQL-REST] Execution Result, nRows = {}, SQL = {}".format(nRows, sql)) - return nRows - - def query(self, sql): # return rows affected - return self.execute(sql) - - def getQueryResult(self): - return self._result['data'] - - def getResultRows(self): - print(self._result) - raise RuntimeError("TBD") - # return self._tdSql.queryRows - - def getResultCols(self): - print(self._result) - raise RuntimeError("TBD") - - # Duplicate code from TDMySQL, TODO: merge all this into DbConnNative - - -class MyTDSql: - # Class variables - _clsLock = threading.Lock() # class wide locking - longestQuery = None # type: str - longestQueryTime = 0.0 # seconds - lqStartTime = 0.0 - # lqEndTime = 0.0 # Not needed, as we have the two above already - - def __init__(self, hostAddr, cfgPath): - # Make the DB connection - self._conn = taos.connect(host=hostAddr, config=cfgPath) - self._cursor = self._conn.cursor() - - self.queryRows = 0 - self.queryCols = 0 - self.affectedRows = 0 - - # def init(self, cursor, log=True): - # self.cursor = cursor - # if (log): - # caller = inspect.getframeinfo(inspect.stack()[1][0]) - # self.cursor.log(caller.filename + ".sql") - - def close(self): - self._cursor.close() # can we double close? - self._conn.close() # TODO: very important, cursor close does NOT close DB connection! - self._cursor.close() - - def _execInternal(self, sql): - startTime = time.time() - ret = self._cursor.execute(sql) - # print("\nSQL success: {}".format(sql)) - queryTime = time.time() - startTime - # Record the query time - cls = self.__class__ - if queryTime > (cls.longestQueryTime + 0.01) : - with cls._clsLock: - cls.longestQuery = sql - cls.longestQueryTime = queryTime - cls.lqStartTime = startTime - return ret - - def query(self, sql): - self.sql = sql - try: - self._execInternal(sql) - self.queryResult = self._cursor.fetchall() - self.queryRows = len(self.queryResult) - self.queryCols = len(self._cursor.description) - except Exception as e: - # caller = inspect.getframeinfo(inspect.stack()[1][0]) - # args = (caller.filename, caller.lineno, sql, repr(e)) - # tdLog.exit("%s(%d) failed: sql:%s, %s" % args) - raise - return self.queryRows - - def execute(self, sql): - self.sql = sql - try: - self.affectedRows = self._execInternal(sql) - except Exception as e: - # caller = inspect.getframeinfo(inspect.stack()[1][0]) - # args = (caller.filename, caller.lineno, sql, repr(e)) - # tdLog.exit("%s(%d) failed: sql:%s, %s" % args) - raise - return self.affectedRows - - -class DbConnNative(DbConn): - # Class variables - _lock = threading.Lock() - _connInfoDisplayed = False - totalConnections = 0 # Not private - - def __init__(self): - super().__init__() - self._type = self.TYPE_NATIVE - self._conn = None - # self._cursor = None - - def getBuildPath(self): - selfPath = os.path.dirname(os.path.realpath(__file__)) - if ("community" in selfPath): - projPath = selfPath[:selfPath.find("communit")] - else: - projPath = selfPath[:selfPath.find("tests")] - - buildPath = None - for root, dirs, files in os.walk(projPath): - if ("taosd" in files): - rootRealPath = os.path.dirname(os.path.realpath(root)) - if ("packaging" not in rootRealPath): - buildPath = root[:len(root) - len("/build/bin")] - break - if buildPath == None: - raise RuntimeError("Failed to determine buildPath, selfPath={}, projPath={}" - .format(selfPath, projPath)) - return buildPath - - - def openByType(self): # Open connection - cfgPath = self.getBuildPath() + "/test/cfg" - hostAddr = "127.0.0.1" - - cls = self.__class__ # Get the class, to access class variables - with cls._lock: # force single threading for opening DB connections. # TODO: whaaat??!!! - if not cls._connInfoDisplayed: - cls._connInfoDisplayed = True # updating CLASS variable - logger.info("Initiating TAOS native connection to {}, using config at {}".format(hostAddr, cfgPath)) - # Make the connection - # self._conn = taos.connect(host=hostAddr, config=cfgPath) # TODO: make configurable - # self._cursor = self._conn.cursor() - # Record the count in the class - self._tdSql = MyTDSql(hostAddr, cfgPath) # making DB connection - cls.totalConnections += 1 - - self._tdSql.execute('reset query cache') - # self._cursor.execute('use db') # do this at the beginning of every - - # Open connection - # self._tdSql = MyTDSql() - # self._tdSql.init(self._cursor) - - def close(self): - if (not self.isOpen): - raise RuntimeError("Cannot clean up database until connection is open") - self._tdSql.close() - # Decrement the class wide counter - cls = self.__class__ # Get the class, to access class variables - with cls._lock: - cls.totalConnections -= 1 - - logger.debug("[DB] Database connection closed") - self.isOpen = False - - def execute(self, sql): - if (not self.isOpen): - raise RuntimeError("Cannot execute database commands until connection is open") - logger.debug("[SQL] Executing SQL: {}".format(sql)) - self._lastSql = sql - nRows = self._tdSql.execute(sql) - logger.debug( - "[SQL] Execution Result, nRows = {}, SQL = {}".format( - nRows, sql)) - return nRows - - def query(self, sql): # return rows affected - if (not self.isOpen): - raise RuntimeError( - "Cannot query database until connection is open") - logger.debug("[SQL] Executing SQL: {}".format(sql)) - self._lastSql = sql - nRows = self._tdSql.query(sql) - logger.debug( - "[SQL] Query Result, nRows = {}, SQL = {}".format( - nRows, sql)) - return nRows - # results are in: return self._tdSql.queryResult - - def getQueryResult(self): - return self._tdSql.queryResult - - def getResultRows(self): - return self._tdSql.queryRows - - def getResultCols(self): - return self._tdSql.queryCols - - class AnyState: STATE_INVALID = -1 STATE_EMPTY = 0 # nothing there, no even a DB @@ -1232,7 +869,7 @@ class StateMechine: def init(self, dbc: DbConn): # late initailization, don't save the dbConn self._curState = self._findCurrentState(dbc) # starting state - logger.debug("Found Starting State: {}".format(self._curState)) + Logging.debug("Found Starting State: {}".format(self._curState)) # TODO: seems no lnoger used, remove? def getCurrentState(self): @@ -1270,7 +907,7 @@ class StateMechine: raise RuntimeError( "No suitable task types found for state: {}".format( self._curState)) - logger.debug( + Logging.debug( "[OPS] Tasks found for state {}: {}".format( self._curState, typesToStrings(taskTypes))) @@ -1280,27 +917,27 @@ class StateMechine: ts = time.time() # we use this to debug how fast/slow it is to do the various queries to find the current DB state dbName =self._db.getName() if not dbc.existsDatabase(dbName): # dbc.hasDatabases(): # no database?! - logger.debug( "[STT] empty database found, between {} and {}".format(ts, time.time())) + Logging.debug( "[STT] empty database found, between {} and {}".format(ts, time.time())) return StateEmpty() # did not do this when openning connection, and this is NOT the worker # thread, which does this on their own dbc.use(dbName) if not dbc.hasTables(): # no tables - logger.debug("[STT] DB_ONLY found, between {} and {}".format(ts, time.time())) + Logging.debug("[STT] DB_ONLY found, between {} and {}".format(ts, time.time())) return StateDbOnly() sTable = self._db.getFixedSuperTable() if sTable.hasRegTables(dbc, dbName): # no regular tables - logger.debug("[STT] SUPER_TABLE_ONLY found, between {} and {}".format(ts, time.time())) + Logging.debug("[STT] SUPER_TABLE_ONLY found, between {} and {}".format(ts, time.time())) return StateSuperTableOnly() else: # has actual tables - logger.debug("[STT] HAS_DATA found, between {} and {}".format(ts, time.time())) + Logging.debug("[STT] HAS_DATA found, between {} and {}".format(ts, time.time())) return StateHasData() # We transition the system to a new state by examining the current state itself def transition(self, tasks, dbc: DbConn): if (len(tasks) == 0): # before 1st step, or otherwise empty - logger.debug("[STT] Starting State: {}".format(self._curState)) + Logging.debug("[STT] Starting State: {}".format(self._curState)) return # do nothing # this should show up in the server log, separating steps @@ -1336,7 +973,7 @@ class StateMechine: # Nothing for sure newState = self._findCurrentState(dbc) - logger.debug("[STT] New DB state determined: {}".format(newState)) + Logging.debug("[STT] New DB state determined: {}".format(newState)) # can old state move to new state through the tasks? self._curState.verifyTasksToState(tasks, newState) self._curState = newState @@ -1354,7 +991,7 @@ class StateMechine: # read data task, default to 10: TODO: change to a constant weights.append(10) i = self._weighted_choice_sub(weights) - # logger.debug(" (weighted random:{}/{}) ".format(i, len(taskTypes))) + # Logging.debug(" (weighted random:{}/{}) ".format(i, len(taskTypes))) return taskTypes[i] # ref: @@ -1372,6 +1009,8 @@ class Database: possibly in a cluster environment. For now we use it to manage state transitions in that database + + TODO: consider moving, but keep in mind it contains "StateMachine" ''' _clsLock = threading.Lock() # class wide lock _lastInt = 101 # next one is initial integer @@ -1433,7 +1072,7 @@ class Database: t3 = datetime.datetime(2012, 1, 1) # default "keep" is 10 years t4 = datetime.datetime.fromtimestamp( t3.timestamp() + elSec2) # see explanation above - logger.info("Setting up TICKS to start from: {}".format(t4)) + Logging.info("Setting up TICKS to start from: {}".format(t4)) return t4 @classmethod @@ -1468,64 +1107,6 @@ class Database: return ret -class DbManager(): - ''' This is a wrapper around DbConn(), to make it easier to use. - - TODO: rename this to DbConnManager - ''' - def __init__(self): - self.tableNumQueue = LinearQueue() # TODO: delete? - # self.openDbServerConnection() - self._dbConn = DbConn.createNative() if ( - gConfig.connector_type == 'native') else DbConn.createRest() - try: - self._dbConn.open() # may throw taos.error.ProgrammingError: disconnected - except taos.error.ProgrammingError as err: - # print("Error type: {}, msg: {}, value: {}".format(type(err), err.msg, err)) - if (err.msg == 'client disconnected'): # cannot open DB connection - print( - "Cannot establish DB connection, please re-run script without parameter, and follow the instructions.") - sys.exit(2) - else: - print("Failed to connect to DB, errno = {}, msg: {}" - .format(Helper.convertErrno(err.errno), err.msg)) - raise - except BaseException: - print("[=] Unexpected exception") - raise - - # Do this after dbConn is in proper shape - # Moved to Database() - # self._stateMachine = StateMechine(self._dbConn) - - def getDbConn(self): - return self._dbConn - - # TODO: not used any more, to delete - def pickAndAllocateTable(self): # pick any table, and "use" it - return self.tableNumQueue.pickAndAllocate() - - # TODO: Not used any more, to delete - def addTable(self): - with self._lock: - tIndex = self.tableNumQueue.push() - return tIndex - - # Not used any more, to delete - def releaseTable(self, i): # return the table back, so others can use it - self.tableNumQueue.release(i) - - # TODO: not used any more, delete - def getTableNameToDelete(self): - tblNum = self.tableNumQueue.pop() # TODO: race condition! - if (not tblNum): # maybe false - return False - - return "table_{}".format(tblNum) - - def cleanUp(self): - self._dbConn.close() - class TaskExecutor(): class BoundedList: def __init__(self, size=10): @@ -1584,10 +1165,10 @@ class TaskExecutor(): self._boundedList.add(n) # def logInfo(self, msg): - # logger.info(" T[{}.x]: ".format(self._curStep) + msg) + # Logging.info(" T[{}.x]: ".format(self._curStep) + msg) # def logDebug(self, msg): - # logger.debug(" T[{}.x]: ".format(self._curStep) + msg) + # Logging.debug(" T[{}.x]: ".format(self._curStep) + msg) class Task(): @@ -1600,19 +1181,19 @@ class Task(): @classmethod def allocTaskNum(cls): Task.taskSn += 1 # IMPORTANT: cannot use cls.taskSn, since each sub class will have a copy - # logger.debug("Allocating taskSN: {}".format(Task.taskSn)) + # Logging.debug("Allocating taskSN: {}".format(Task.taskSn)) return Task.taskSn def __init__(self, execStats: ExecutionStats, db: Database): self._workerThread = None - self._err = None # type: Exception + self._err: Optional[Exception] = None self._aborted = False self._curStep = None self._numRows = None # Number of rows affected # Assign an incremental task serial number self._taskNum = self.allocTaskNum() - # logger.debug("Creating new task {}...".format(self._taskNum)) + # Logging.debug("Creating new task {}...".format(self._taskNum)) self._execStats = execStats self._db = db # A task is always associated/for a specific DB @@ -1645,15 +1226,22 @@ class Task(): "To be implemeted by child classes, class name: {}".format( self.__class__.__name__)) + def _isServiceStable(self): + if not gSvcMgr: + return True # we don't run service, so let's assume it's stable + return gSvcMgr.isStable() # otherwise let's examine the service + def _isErrAcceptable(self, errno, msg): if errno in [ 0x05, # TSDB_CODE_RPC_NOT_READY 0x0B, # Unable to establish connection, more details in TD-1648 - # 0x200, # invalid SQL, TODO: re-examine with TD-934 + 0x200, # invalid SQL, TODO: re-examine with TD-934 + 0x20F, # query terminated, possibly due to vnoding being dropped, see TD-1776 0x217, # "db not selected", client side defined error code - 0x218, # "Table does not exist" client side defined error code - 0x360, 0x362, - 0x369, # tag already exists + # 0x218, # "Table does not exist" client side defined error code + 0x360, # Table already exists + 0x362, + # 0x369, # tag already exists 0x36A, 0x36B, 0x36D, 0x381, 0x380, # "db not selected" @@ -1662,12 +1250,17 @@ class Task(): 0x503, 0x510, # vnode not in ready state 0x14, # db not ready, errno changed - 0x600, + 0x600, # Invalid table ID, why? 1000 # REST catch-all error ]: return True # These are the ALWAYS-ACCEPTABLE ones - elif (errno in [ 0x0B ]) and gConfig.auto_start_service: - return True # We may get "network unavilable" when restarting service + # This case handled below already. + # elif (errno in [ 0x0B ]) and gConfig.auto_start_service: + # return True # We may get "network unavilable" when restarting service + elif gConfig.ignore_errors: # something is specified on command line + moreErrnos = [int(v, 0) for v in gConfig.ignore_errors.split(',')] + if errno in moreErrnos: + return True elif errno == 0x200 : # invalid SQL, we need to div in a bit more if msg.find("invalid column name") != -1: return True @@ -1675,8 +1268,8 @@ class Task(): return True elif msg.find("duplicated column names") != -1: # also alter table tag issues return True - elif (gSvcMgr!=None) and gSvcMgr.isRestarting(): - logger.info("Ignoring error when service is restarting: errno = {}, msg = {}".format(errno, msg)) + elif not self._isServiceStable(): # We are managing service, and ... + Logging.info("Ignoring error when service starting/stopping: errno = {}, msg = {}".format(errno, msg)) return True return False # Not an acceptable error @@ -1735,10 +1328,11 @@ class Task(): self._aborted = True traceback.print_exc() except BaseException: # TODO: what is this again??!! - self.logDebug( - "[=] Unexpected exception, SQL: {}".format( - wt.getDbConn().getLastSql())) - raise + raise RuntimeError("Punt") + # self.logDebug( + # "[=] Unexpected exception, SQL: {}".format( + # wt.getDbConn().getLastSql())) + # raise self._execStats.endTaskType(self.__class__.__name__, self.isSuccess()) self.logDebug("[X] task execution completed, {}, status: {}".format( @@ -1817,14 +1411,14 @@ class ExecutionStats: self._failureReason = reason def printStats(self): - logger.info( + Logging.info( "----------------------------------------------------------------------") - logger.info( + Logging.info( "| Crash_Gen test {}, with the following stats:". format( "FAILED (reason: {})".format( self._failureReason) if self._failed else "SUCCEEDED")) - logger.info("| Task Execution Times (success/total):") - execTimesAny = 0 + Logging.info("| Task Execution Times (success/total):") + execTimesAny = 0.0 for k, n in self._execTimes.items(): execTimesAny += n[0] errStr = None @@ -1834,28 +1428,28 @@ class ExecutionStats: errStrs = ["0x{:X}:{}".format(eno, n) for (eno, n) in errors.items()] # print("error strings = {}".format(errStrs)) errStr = ", ".join(errStrs) - logger.info("| {0:<24}: {1}/{2} (Errors: {3})".format(k, n[1], n[0], errStr)) + Logging.info("| {0:<24}: {1}/{2} (Errors: {3})".format(k, n[1], n[0], errStr)) - logger.info( + Logging.info( "| Total Tasks Executed (success or not): {} ".format(execTimesAny)) - logger.info( + Logging.info( "| Total Tasks In Progress at End: {}".format( self._tasksInProgress)) - logger.info( + Logging.info( "| Total Task Busy Time (elapsed time when any task is in progress): {:.3f} seconds".format( self._accRunTime)) - logger.info( + Logging.info( "| Average Per-Task Execution Time: {:.3f} seconds".format(self._accRunTime / execTimesAny)) - logger.info( + Logging.info( "| Total Elapsed Time (from wall clock): {:.3f} seconds".format( self._elapsedTime)) - logger.info("| Top numbers written: {}".format(TaskExecutor.getBoundedList())) - logger.info("| Active DB Native Connections (now): {}".format(DbConnNative.totalConnections)) - logger.info("| Longest native query time: {:.3f} seconds, started: {}". + Logging.info("| Top numbers written: {}".format(TaskExecutor.getBoundedList())) + Logging.info("| Active DB Native Connections (now): {}".format(DbConnNative.totalConnections)) + Logging.info("| Longest native query time: {:.3f} seconds, started: {}". format(MyTDSql.longestQueryTime, time.strftime("%x %X", time.localtime(MyTDSql.lqStartTime))) ) - logger.info("| Longest native query: {}".format(MyTDSql.longestQuery)) - logger.info( + Logging.info("| Longest native query: {}".format(MyTDSql.longestQuery)) + Logging.info( "----------------------------------------------------------------------") @@ -1865,11 +1459,14 @@ class StateTransitionTask(Task): LARGE_NUMBER_OF_RECORDS = 50 SMALL_NUMBER_OF_RECORDS = 3 + _baseTableNumber = None + + _endState = None + @classmethod def getInfo(cls): # each sub class should supply their own information raise RuntimeError("Overriding method expected") - - _endState = None + @classmethod def getEndState(cls): # TODO: optimize by calling it fewer times raise RuntimeError("Overriding method expected") @@ -1889,7 +1486,10 @@ class StateTransitionTask(Task): @classmethod def getRegTableName(cls, i): - return "reg_table_{}".format(i) + if ( StateTransitionTask._baseTableNumber is None): + StateTransitionTask._baseTableNumber = Dice.throw( + 999) if gConfig.dynamic_db_table_names else 0 + return "reg_table_{}".format(StateTransitionTask._baseTableNumber + i) def execute(self, wt: WorkerThread): super().execute(wt) @@ -1909,7 +1509,8 @@ class TaskCreateDb(StateTransitionTask): # was: self.execWtSql(wt, "create database db") repStr = "" if gConfig.max_replicas != 1: - numReplica = Dice.throw(gConfig.max_replicas) + 1 # 1,2 ... N + # numReplica = Dice.throw(gConfig.max_replicas) + 1 # 1,2 ... N + numReplica = gConfig.max_replicas # fixed, always repStr = "replica {}".format(numReplica) self.execWtSql(wt, "create database {} {}" .format(self._db.getName(), repStr) ) @@ -1925,7 +1526,7 @@ class TaskDropDb(StateTransitionTask): def _executeInternal(self, te: TaskExecutor, wt: WorkerThread): self.execWtSql(wt, "drop database {}".format(self._db.getName())) - logger.debug("[OPS] database dropped at {}".format(time.time())) + Logging.debug("[OPS] database dropped at {}".format(time.time())) class TaskCreateSuperTable(StateTransitionTask): @classmethod @@ -1938,7 +1539,7 @@ class TaskCreateSuperTable(StateTransitionTask): def _executeInternal(self, te: TaskExecutor, wt: WorkerThread): if not self._db.exists(wt.getDbConn()): - logger.debug("Skipping task, no DB yet") + Logging.debug("Skipping task, no DB yet") return sTable = self._db.getFixedSuperTable() # type: TdSuperTable @@ -1973,7 +1574,7 @@ class TdSuperTable: dbc.query("select TBNAME from {}.{}".format(dbName, self._stName)) # TODO: analyze result set later except taos.error.ProgrammingError as err: errno2 = Helper.convertErrno(err.errno) - logger.debug("[=] Failed to get tables from super table: errno=0x{:X}, msg: {}".format(errno2, err)) + Logging.debug("[=] Failed to get tables from super table: errno=0x{:X}, msg: {}".format(errno2, err)) raise qr = dbc.getQueryResult() @@ -2045,15 +1646,39 @@ class TaskReadData(StateTransitionTask): def canBeginFrom(cls, state: AnyState): return state.canReadData() + # def _canRestartService(self): + # if not gSvcMgr: + # return True # always + # return gSvcMgr.isActive() # only if it's running TODO: race condition here + def _executeInternal(self, te: TaskExecutor, wt: WorkerThread): sTable = self._db.getFixedSuperTable() - # 1 in 5 chance, simulate a broken connection. - if random.randrange(5) == 0: # TODO: break connection in all situations - wt.getDbConn().close() - wt.getDbConn().open() - print("_r", end="", flush=True) - + # 1 in 5 chance, simulate a broken connection, only if service stable (not restarting) + if random.randrange(20)==0: # and self._canRestartService(): # TODO: break connection in all situations + # Logging.info("Attempting to reconnect to server") # TODO: change to DEBUG + Progress.emit(Progress.SERVICE_RECONNECT_START) + try: + wt.getDbConn().close() + wt.getDbConn().open() + except ConnectionError as err: # may fail + if not gSvcMgr: + Logging.error("Failed to reconnect in client-only mode") + raise # Not OK if we are running in client-only mode + if gSvcMgr.isRunning(): # may have race conditon, but low prob, due to + Logging.error("Failed to reconnect when managed server is running") + raise # Not OK if we are running normally + + Progress.emit(Progress.SERVICE_RECONNECT_FAILURE) + # Logging.info("Ignoring DB reconnect error") + + # print("_r", end="", flush=True) + Progress.emit(Progress.SERVICE_RECONNECT_SUCCESS) + # The above might have taken a lot of time, service might be running + # by now, causing error below to be incorrectly handled due to timing issue + return # TODO: fix server restart status race condtion + + dbc = wt.getDbConn() dbName = self._db.getName() for rTbName in sTable.getRegTables(dbc, dbName): # regular tables @@ -2088,7 +1713,7 @@ class TaskReadData(StateTransitionTask): dbc.execute("select {} from {}.{}".format(aggExpr, dbName, sTable.getName())) except taos.error.ProgrammingError as err: errno2 = Helper.convertErrno(err.errno) - logger.debug("[=] Read Failure: errno=0x{:X}, msg: {}, SQL: {}".format(errno2, err, dbc.getLastSql())) + Logging.debug("[=] Read Failure: errno=0x{:X}, msg: {}, SQL: {}".format(errno2, err, dbc.getLastSql())) raise class TaskDropSuperTable(StateTransitionTask): @@ -2119,7 +1744,7 @@ class TaskDropSuperTable(StateTransitionTask): errno2 = Helper.convertErrno(err.errno) if (errno2 in [0x362]): # mnode invalid table name isSuccess = False - logger.debug("[DB] Acceptable error when dropping a table") + Logging.debug("[DB] Acceptable error when dropping a table") continue # try to delete next regular table if (not tickOutput): @@ -2199,20 +1824,19 @@ class TaskAddData(StateTransitionTask): # Track which table is being actively worked on activeTable: Set[int] = set() - # We use these two files to record operations to DB, useful for power-off - # tests - fAddLogReady = None - fAddLogDone = None + # We use these two files to record operations to DB, useful for power-off tests + fAddLogReady = None # type: TextIOWrapper + fAddLogDone = None # type: TextIOWrapper @classmethod def prepToRecordOps(cls): if gConfig.record_ops: if (cls.fAddLogReady is None): - logger.info( + Logging.info( "Recording in a file operations to be performed...") cls.fAddLogReady = open("add_log_ready.txt", "w") if (cls.fAddLogDone is None): - logger.info("Recording in a file operations completed...") + Logging.info("Recording in a file operations completed...") cls.fAddLogDone = open("add_log_done.txt", "w") @classmethod @@ -2288,490 +1912,8 @@ class TaskAddData(StateTransitionTask): self.activeTable.discard(i) # not raising an error, unlike remove -# Deterministic random number generator -class Dice(): - seeded = False # static, uninitialized - @classmethod - def seed(cls, s): # static - if (cls.seeded): - raise RuntimeError( - "Cannot seed the random generator more than once") - cls.verifyRNG() - random.seed(s) - cls.seeded = True # TODO: protect against multi-threading - @classmethod - def verifyRNG(cls): # Verify that the RNG is determinstic - random.seed(0) - x1 = random.randrange(0, 1000) - x2 = random.randrange(0, 1000) - x3 = random.randrange(0, 1000) - if (x1 != 864 or x2 != 394 or x3 != 776): - raise RuntimeError("System RNG is not deterministic") - - @classmethod - def throw(cls, stop): # get 0 to stop-1 - return cls.throwRange(0, stop) - - @classmethod - def throwRange(cls, start, stop): # up to stop-1 - if (not cls.seeded): - raise RuntimeError("Cannot throw dice before seeding it") - return random.randrange(start, stop) - - @classmethod - def choice(cls, cList): - return random.choice(cList) - - -class LoggingFilter(logging.Filter): - def filter(self, record: logging.LogRecord): - if (record.levelno >= logging.INFO): - return True # info or above always log - - # Commenting out below to adjust... - - # if msg.startswith("[TRD]"): - # return False - return True - - -class MyLoggingAdapter(logging.LoggerAdapter): - def process(self, msg, kwargs): - return "[{}]{}".format(threading.get_ident() % 10000, msg), kwargs - # return '[%s] %s' % (self.extra['connid'], msg), kwargs - - -class SvcManager: - def __init__(self): - print("Starting TDengine Service Manager") - # signal.signal(signal.SIGTERM, self.sigIntHandler) # Moved to MainExec - # signal.signal(signal.SIGINT, self.sigIntHandler) - # signal.signal(signal.SIGUSR1, self.sigUsrHandler) # different handler! - - self.inSigHandler = False - # self._status = MainExec.STATUS_RUNNING # set inside - # _startTaosService() - self.svcMgrThread = None # type: ServiceManagerThread - self._lock = threading.Lock() - self._isRestarting = False - - def _doMenu(self): - choice = "" - while True: - print("\nInterrupting Service Program, Choose an Action: ") - print("1: Resume") - print("2: Terminate") - print("3: Restart") - # Remember to update the if range below - # print("Enter Choice: ", end="", flush=True) - while choice == "": - choice = input("Enter Choice: ") - if choice != "": - break # done with reading repeated input - if choice in ["1", "2", "3"]: - break # we are done with whole method - print("Invalid choice, please try again.") - choice = "" # reset - return choice - - def sigUsrHandler(self, signalNumber, frame): - print("Interrupting main thread execution upon SIGUSR1") - if self.inSigHandler: # already - print("Ignoring repeated SIG...") - return # do nothing if it's already not running - self.inSigHandler = True - - choice = self._doMenu() - if choice == "1": - # TODO: can the sub-process be blocked due to us not reading from - # queue? - self.sigHandlerResume() - elif choice == "2": - self.stopTaosService() - elif choice == "3": # Restart - self.restart() - else: - raise RuntimeError("Invalid menu choice: {}".format(choice)) - - self.inSigHandler = False - - def sigIntHandler(self, signalNumber, frame): - print("SvcManager: INT Signal Handler starting...") - if self.inSigHandler: - print("Ignoring repeated SIG_INT...") - return - self.inSigHandler = True - - self.stopTaosService() - print("SvcManager: INT Signal Handler returning...") - self.inSigHandler = False - - def sigHandlerResume(self): - print("Resuming TDengine service manager thread (main thread)...\n\n") - - def _checkServiceManagerThread(self): - if self.svcMgrThread: # valid svc mgr thread - if self.svcMgrThread.isStopped(): # done? - self.svcMgrThread.procIpcBatch() # one last time. TODO: appropriate? - self.svcMgrThread = None # no more - - def _procIpcAll(self): - while self.isRunning() or self.isRestarting() : # for as long as the svc mgr thread is still here - if self.isRunning(): - self.svcMgrThread.procIpcBatch() # regular processing, - self._checkServiceManagerThread() - elif self.isRetarting(): - print("Service restarting...") - time.sleep(0.5) # pause, before next round - print( - "Service Manager Thread (with subprocess) has ended, main thread now exiting...") - - def startTaosService(self): - with self._lock: - if self.svcMgrThread: - raise RuntimeError("Cannot start TAOS service when one may already be running") - - # Find if there's already a taosd service, and then kill it - for proc in psutil.process_iter(): - if proc.name() == 'taosd': - print("Killing an existing TAOSD process in 2 seconds... press CTRL-C to interrupe") - time.sleep(2.0) - proc.kill() - # print("Process: {}".format(proc.name())) - - - self.svcMgrThread = ServiceManagerThread() # create the object - print("Attempting to start TAOS service started, printing out output...") - self.svcMgrThread.start() - self.svcMgrThread.procIpcBatch(trimToTarget=10, forceOutput=True) # for printing 10 lines - print("TAOS service started") - - def stopTaosService(self, outputLines=20): - with self._lock: - if not self.isRunning(): - logger.warning("Cannot stop TAOS service, not running") - return - - print("Terminating Service Manager Thread (SMT) execution...") - self.svcMgrThread.stop() - if self.svcMgrThread.isStopped(): - self.svcMgrThread.procIpcBatch(outputLines) # one last time - self.svcMgrThread = None - print("End of TDengine Service Output") - print("----- TDengine Service (managed by SMT) is now terminated -----\n") - else: - print("WARNING: SMT did not terminate as expected") - - def run(self): - self.startTaosService() - self._procIpcAll() # pump/process all the messages, may encounter SIG + restart - if self.isRunning(): # if sig handler hasn't destroyed it by now - self.stopTaosService() # should have started already - - def restart(self): - if self._isRestarting: - logger.warning("Cannot restart service when it's already restarting") - return - - self._isRestarting = True - if self.isRunning(): - self.stopTaosService() - else: - logger.warning("Service not running when restart requested") - - self.startTaosService() - self._isRestarting = False - - def isRunning(self): - return self.svcMgrThread != None - - def isRestarting(self): - return self._isRestarting - -class ServiceManagerThread: - MAX_QUEUE_SIZE = 10000 - - def __init__(self): - self._tdeSubProcess = None # type: TdeSubProcess - self._thread = None - self._status = None - - def getStatus(self): - return self._status - - def isRunning(self): - # return self._thread and self._thread.is_alive() - return self._status == MainExec.STATUS_RUNNING - - def isStopping(self): - return self._status == MainExec.STATUS_STOPPING - - def isStopped(self): - return self._status == MainExec.STATUS_STOPPED - - # Start the thread (with sub process), and wait for the sub service - # to become fully operational - def start(self): - if self._thread: - raise RuntimeError("Unexpected _thread") - if self._tdeSubProcess: - raise RuntimeError("TDengine sub process already created/running") - - self._status = MainExec.STATUS_STARTING - - self._tdeSubProcess = TdeSubProcess() - self._tdeSubProcess.start() - - self._ipcQueue = Queue() - self._thread = threading.Thread( # First thread captures server OUTPUT - target=self.svcOutputReader, - args=(self._tdeSubProcess.getStdOut(), self._ipcQueue)) - self._thread.daemon = True # thread dies with the program - self._thread.start() - - self._thread2 = threading.Thread( # 2nd thread captures server ERRORs - target=self.svcErrorReader, - args=(self._tdeSubProcess.getStdErr(), self._ipcQueue)) - self._thread2.daemon = True # thread dies with the program - self._thread2.start() - - # wait for service to start - for i in range(0, 100): - time.sleep(1.0) - # self.procIpcBatch() # don't pump message during start up - print("_zz_", end="", flush=True) - if self._status == MainExec.STATUS_RUNNING: - logger.info("[] TDengine service READY to process requests") - return # now we've started - # TODO: handle this better? - self.procIpcBatch(100, True) # display output before cronking out, trim to last 20 msgs, force output - raise RuntimeError("TDengine service did not start successfully") - - def stop(self): - # can be called from both main thread or signal handler - print("Terminating TDengine service running as the sub process...") - if self.isStopped(): - print("Service already stopped") - return - if self.isStopping(): - print("Service is already being stopped") - return - # Linux will send Control-C generated SIGINT to the TDengine process - # already, ref: - # https://unix.stackexchange.com/questions/176235/fork-and-how-signals-are-delivered-to-processes - if not self._tdeSubProcess: - raise RuntimeError("sub process object missing") - - self._status = MainExec.STATUS_STOPPING - retCode = self._tdeSubProcess.stop() - print("Attempted to stop sub process, got return code: {}".format(retCode)) - if (retCode==-11): # SGV - logger.error("[[--ERROR--]]: TDengine service SEGV fault (check core file!)") - - if self._tdeSubProcess.isRunning(): # still running - print("FAILED to stop sub process, it is still running... pid = {}".format( - self._tdeSubProcess.getPid())) - else: - self._tdeSubProcess = None # not running any more - self.join() # stop the thread, change the status, etc. - - def join(self): - # TODO: sanity check - if not self.isStopping(): - raise RuntimeError( - "Unexpected status when ending svc mgr thread: {}".format( - self._status)) - - if self._thread: - self._thread.join() - self._thread = None - self._status = MainExec.STATUS_STOPPED - # STD ERR thread - self._thread2.join() - self._thread2 = None - else: - print("Joining empty thread, doing nothing") - - def _trimQueue(self, targetSize): - if targetSize <= 0: - return # do nothing - q = self._ipcQueue - if (q.qsize() <= targetSize): # no need to trim - return - - logger.debug("Triming IPC queue to target size: {}".format(targetSize)) - itemsToTrim = q.qsize() - targetSize - for i in range(0, itemsToTrim): - try: - q.get_nowait() - except Empty: - break # break out of for loop, no more trimming - - TD_READY_MSG = "TDengine is initialized successfully" - - def procIpcBatch(self, trimToTarget=0, forceOutput=False): - self._trimQueue(trimToTarget) # trim if necessary - # Process all the output generated by the underlying sub process, - # managed by IO thread - print("<", end="", flush=True) - while True: - try: - line = self._ipcQueue.get_nowait() # getting output at fast speed - self._printProgress("_o") - except Empty: - # time.sleep(2.3) # wait only if there's no output - # no more output - print(".>", end="", flush=True) - return # we are done with THIS BATCH - else: # got line, printing out - if forceOutput: - logger.info(line) - else: - logger.debug(line) - print(">", end="", flush=True) - - _ProgressBars = ["--", "//", "||", "\\\\"] - - def _printProgress(self, msg): # TODO: assuming 2 chars - print(msg, end="", flush=True) - pBar = self._ProgressBars[Dice.throw(4)] - print(pBar, end="", flush=True) - print('\b\b\b\b', end="", flush=True) - - def svcOutputReader(self, out: IO, queue): - # Important Reference: https://stackoverflow.com/questions/375427/non-blocking-read-on-a-subprocess-pipe-in-python - # print("This is the svcOutput Reader...") - # for line in out : - for line in iter(out.readline, b''): - # print("Finished reading a line: {}".format(line)) - # print("Adding item to queue...") - try: - line = line.decode("utf-8").rstrip() - except UnicodeError: - print("\nNon-UTF8 server output: {}\n".format(line)) - - # This might block, and then causing "out" buffer to block - queue.put(line) - self._printProgress("_i") - - if self._status == MainExec.STATUS_STARTING: # we are starting, let's see if we have started - if line.find(self.TD_READY_MSG) != -1: # found - logger.info("Waiting for the service to become FULLY READY") - time.sleep(1.0) # wait for the server to truly start. TODO: remove this - logger.info("Service is now FULLY READY") - self._status = MainExec.STATUS_RUNNING - - # Trim the queue if necessary: TODO: try this 1 out of 10 times - self._trimQueue(self.MAX_QUEUE_SIZE * 9 // 10) # trim to 90% size - - if self.isStopping(): # TODO: use thread status instead - # WAITING for stopping sub process to finish its outptu - print("_w", end="", flush=True) - - # queue.put(line) - # meaning sub process must have died - print("\nNo more output from IO thread managing TDengine service") - out.close() - - def svcErrorReader(self, err: IO, queue): - for line in iter(err.readline, b''): - print("\nTDengine Service (taosd) ERROR (from stderr): {}".format(line)) - - -class TdeSubProcess: - def __init__(self): - self.subProcess = None - - def getStdOut(self): - return self.subProcess.stdout - - def getStdErr(self): - return self.subProcess.stderr - - def isRunning(self): - return self.subProcess is not None - - def getPid(self): - return self.subProcess.pid - - def getBuildPath(self): - selfPath = os.path.dirname(os.path.realpath(__file__)) - if ("community" in selfPath): - projPath = selfPath[:selfPath.find("communit")] - else: - projPath = selfPath[:selfPath.find("tests")] - - for root, dirs, files in os.walk(projPath): - if ("taosd" in files): - rootRealPath = os.path.dirname(os.path.realpath(root)) - if ("packaging" not in rootRealPath): - buildPath = root[:len(root) - len("/build/bin")] - break - return buildPath - - def start(self): - ON_POSIX = 'posix' in sys.builtin_module_names - - taosdPath = self.getBuildPath() + "/build/bin/taosd" - cfgPath = self.getBuildPath() + "/test/cfg" - - # Delete the log files - logPath = self.getBuildPath() + "/test/log" - # ref: https://stackoverflow.com/questions/1995373/deleting-all-files-in-a-directory-with-python/1995397 - # filelist = [ f for f in os.listdir(logPath) ] # if f.endswith(".bak") ] - # for f in filelist: - # filePath = os.path.join(logPath, f) - # print("Removing log file: {}".format(filePath)) - # os.remove(filePath) - if os.path.exists(logPath): - logPathSaved = logPath + "_" + time.strftime('%Y-%m-%d-%H-%M-%S') - logger.info("Saving old log files to: {}".format(logPathSaved)) - os.rename(logPath, logPathSaved) - # os.mkdir(logPath) # recreate, no need actually, TDengine will auto-create with proper perms - - svcCmd = [taosdPath, '-c', cfgPath] - # svcCmdSingle = "{} -c {}".format(taosdPath, cfgPath) - # svcCmd = ['vmstat', '1'] - if self.subProcess: # already there - raise RuntimeError("Corrupt process state") - - # print("Starting service: {}".format(svcCmd)) - self.subProcess = subprocess.Popen( - svcCmd, shell=False, - # svcCmdSingle, shell=True, # capture core dump? - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - # bufsize=1, # not supported in binary mode - close_fds=ON_POSIX - ) # had text=True, which interferred with reading EOF - - def stop(self): - if not self.subProcess: - print("Sub process already stopped") - return -1 - - retCode = self.subProcess.poll() # contains real sub process return code - if retCode: # valid return code, process ended - self.subProcess = None - else: # process still alive, let's interrupt it - print( - "Sub process is running, sending SIG_INT and waiting for it to terminate...") - # sub process should end, then IPC queue should end, causing IO - # thread to end - self.subProcess.send_signal(signal.SIGINT) - try: - self.subProcess.wait(10) - retCode = self.subProcess.returncode - except subprocess.TimeoutExpired as err: - print("Time out waiting for TDengine service process to exit") - retCode = -3 - else: - print("TDengine service process terminated successfully from SIG_INT") - retCode = -4 - self.subProcess = None - return retCode class ThreadStacks: # stack info for all threads def __init__(self): @@ -2808,17 +1950,17 @@ class ClientManager: # signal.signal(signal.SIGTERM, self.sigIntHandler) # signal.signal(signal.SIGINT, self.sigIntHandler) - self._status = MainExec.STATUS_RUNNING + self._status = Status.STATUS_RUNNING self.tc = None self.inSigHandler = False def sigIntHandler(self, signalNumber, frame): - if self._status != MainExec.STATUS_RUNNING: + if self._status != Status.STATUS_RUNNING: print("Repeated SIGINT received, forced exit...") # return # do nothing if it's already not running sys.exit(-1) - self._status = MainExec.STATUS_STOPPING # immediately set our status + self._status = Status.STATUS_STOPPING # immediately set our status print("ClientManager: Terminating program...") self.tc.requestToStop() @@ -2898,15 +2040,20 @@ class ClientManager: # self._printLastNumbers() global gConfig - dbManager = DbManager() # Regular function + # Prepare Tde Instance + global gContainer + tInst = gContainer.defTdeInstance = TdeInstance() # "subdir to hold the instance" + + dbManager = DbManager(gConfig.connector_type, tInst.getDbTarget()) # Regular function thPool = ThreadPool(gConfig.num_threads, gConfig.max_steps) self.tc = ThreadCoordinator(thPool, dbManager) + print("Starting client instance to: {}".format(tInst)) self.tc.run() # print("exec stats: {}".format(self.tc.getExecStats())) # print("TC failed = {}".format(self.tc.isFailed())) if svcMgr: # gConfig.auto_start_service: - svcMgr.stopTaosService() + svcMgr.stopTaosServices() svcMgr = None # Print exec status, etc., AFTER showing messages from the server self.conclude() @@ -2936,18 +2083,10 @@ class ClientManager: # self.tc.getDbManager().cleanUp() # clean up first, so we can show ZERO db connections self.tc.printStats() - - - class MainExec: - STATUS_STARTING = 1 - STATUS_RUNNING = 2 - STATUS_STOPPING = 3 - STATUS_STOPPED = 4 - def __init__(self): self._clientMgr = None - self._svcMgr = None + self._svcMgr = None # type: ServiceManager signal.signal(signal.SIGTERM, self.sigIntHandler) signal.signal(signal.SIGINT, self.sigIntHandler) @@ -2960,219 +2099,185 @@ class MainExec: self._svcMgr.sigUsrHandler(signalNumber, frame) def sigIntHandler(self, signalNumber, frame): - if self._svcMgr: + if self._svcMgr: self._svcMgr.sigIntHandler(signalNumber, frame) - if self._clientMgr: + if self._clientMgr: self._clientMgr.sigIntHandler(signalNumber, frame) def runClient(self): global gSvcMgr if gConfig.auto_start_service: - self._svcMgr = SvcManager() - gSvcMgr = self._svcMgr # hack alert - self._svcMgr.startTaosService() # we start, don't run + gSvcMgr = self._svcMgr = ServiceManager(1) # hack alert + gSvcMgr.startTaosServices() # we start, don't run self._clientMgr = ClientManager() ret = None try: ret = self._clientMgr.run(self._svcMgr) # stop TAOS service inside except requests.exceptions.ConnectionError as err: - logger.warning("Failed to open REST connection to DB: {}".format(err.getMessage())) + Logging.warning("Failed to open REST connection to DB: {}".format(err.getMessage())) # don't raise return ret def runService(self): global gSvcMgr - self._svcMgr = SvcManager() - gSvcMgr = self._svcMgr # save it in a global variable TODO: hack alert + gSvcMgr = self._svcMgr = ServiceManager(gConfig.num_dnodes) # save it in a global variable TODO: hack alert - self._svcMgr.run() # run to some end state - self._svcMgr = None - gSvcMgr = None + gSvcMgr.run() # run to some end state + gSvcMgr = self._svcMgr = None - def runTemp(self): # for debugging purposes - # # Hack to exercise reading from disk, imcreasing coverage. TODO: fix - # dbc = dbState.getDbConn() - # sTbName = dbState.getFixedSuperTableName() - # dbc.execute("create database if not exists db") - # if not dbState.getState().equals(StateEmpty()): - # dbc.execute("use db") + def init(self): # TODO: refactor + global gContainer + gContainer = Container() # micky-mouse DI - # rTables = None - # try: # the super table may not exist - # sql = "select TBNAME from db.{}".format(sTbName) - # logger.info("Finding out tables in super table: {}".format(sql)) - # dbc.query(sql) # TODO: analyze result set later - # logger.info("Fetching result") - # rTables = dbc.getQueryResult() - # logger.info("Result: {}".format(rTables)) - # except taos.error.ProgrammingError as err: - # logger.info("Initial Super table OPS error: {}".format(err)) + global gSvcMgr # TODO: refactor away + gSvcMgr = None - # # sys.exit() - # if ( not rTables == None): - # # print("rTables[0] = {}, type = {}".format(rTables[0], type(rTables[0]))) - # try: - # for rTbName in rTables : # regular tables - # ds = dbState - # logger.info("Inserting into table: {}".format(rTbName[0])) - # sql = "insert into db.{} values ('{}', {});".format( - # rTbName[0], - # ds.getNextTick(), ds.getNextInt()) - # dbc.execute(sql) - # for rTbName in rTables : # regular tables - # dbc.query("select * from db.{}".format(rTbName[0])) # TODO: check success failure - # logger.info("Initial READING operation is successful") - # except taos.error.ProgrammingError as err: - # logger.info("Initial WRITE/READ error: {}".format(err)) + # Super cool Python argument library: + # https://docs.python.org/3/library/argparse.html + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, + description=textwrap.dedent('''\ + TDengine Auto Crash Generator (PLEASE NOTICE the Prerequisites Below) + --------------------------------------------------------------------- + 1. You build TDengine in the top level ./build directory, as described in offical docs + 2. You run the server there before this script: ./build/bin/taosd -c test/cfg - # Sandbox testing code - # dbc = dbState.getDbConn() - # while True: - # rows = dbc.query("show databases") - # print("Rows: {}, time={}".format(rows, time.time())) - return + ''')) + + parser.add_argument( + '-a', + '--auto-start-service', + action='store_true', + help='Automatically start/stop the TDengine service (default: false)') + parser.add_argument( + '-b', + '--max-dbs', + action='store', + default=0, + type=int, + help='Maximum number of DBs to keep, set to disable dropping DB. (default: 0)') + parser.add_argument( + '-c', + '--connector-type', + action='store', + default='native', + type=str, + help='Connector type to use: native, rest, or mixed (default: 10)') + parser.add_argument( + '-d', + '--debug', + action='store_true', + help='Turn on DEBUG mode for more logging (default: false)') + parser.add_argument( + '-e', + '--run-tdengine', + action='store_true', + help='Run TDengine service in foreground (default: false)') + parser.add_argument( + '-g', + '--ignore-errors', + action='store', + default=None, + type=str, + help='Ignore error codes, comma separated, 0x supported (default: None)') + parser.add_argument( + '-i', + '--max-replicas', + action='store', + default=1, + type=int, + help='Maximum number of replicas to use, when testing against clusters. (default: 1)') + parser.add_argument( + '-l', + '--larger-data', + action='store_true', + help='Write larger amount of data during write operations (default: false)') + parser.add_argument( + '-n', + '--dynamic-db-table-names', + action='store_true', + help='Use non-fixed names for dbs/tables, useful for multi-instance executions (default: false)') + parser.add_argument( + '-o', + '--num-dnodes', + action='store', + default=1, + type=int, + help='Number of Dnodes to initialize, used with -e option. (default: 1)') + parser.add_argument( + '-p', + '--per-thread-db-connection', + action='store_true', + help='Use a single shared db connection (default: false)') + parser.add_argument( + '-r', + '--record-ops', + action='store_true', + help='Use a pair of always-fsynced fils to record operations performing + performed, for power-off tests (default: false)') + parser.add_argument( + '-s', + '--max-steps', + action='store', + default=1000, + type=int, + help='Maximum number of steps to run (default: 100)') + parser.add_argument( + '-t', + '--num-threads', + action='store', + default=5, + type=int, + help='Number of threads to run (default: 10)') + parser.add_argument( + '-v', + '--verify-data', + action='store_true', + help='Verify data written in a number of places by reading back (default: false)') + parser.add_argument( + '-x', + '--continue-on-exception', + action='store_true', + help='Continue execution after encountering unexpected/disallowed errors/exceptions (default: false)') + + global gConfig + gConfig = parser.parse_args() + + Logging.clsInit(gConfig) + + Dice.seed(0) # initial seeding of dice + + def run(self): + if gConfig.run_tdengine: # run server + try: + self.runService() + return 0 # success + except ConnectionError as err: + Logging.error("Failed to make DB connection, please check DB instance manually") + return -1 # failure + else: + return self.runClient() -def main(): - # Super cool Python argument library: - # https://docs.python.org/3/library/argparse.html - parser = argparse.ArgumentParser( - formatter_class=argparse.RawDescriptionHelpFormatter, - description=textwrap.dedent('''\ - TDengine Auto Crash Generator (PLEASE NOTICE the Prerequisites Below) - --------------------------------------------------------------------- - 1. You build TDengine in the top level ./build directory, as described in offical docs - 2. You run the server there before this script: ./build/bin/taosd -c test/cfg +class Container(): + _propertyList = {'defTdeInstance'} - ''')) + def __init__(self): + self._cargo = {} # No cargo at the beginning - # parser.add_argument('-a', '--auto-start-service', action='store_true', - # help='Automatically start/stop the TDengine service (default: false)') - # parser.add_argument('-c', '--connector-type', action='store', default='native', type=str, - # help='Connector type to use: native, rest, or mixed (default: 10)') - # parser.add_argument('-d', '--debug', action='store_true', - # help='Turn on DEBUG mode for more logging (default: false)') - # parser.add_argument('-e', '--run-tdengine', action='store_true', - # help='Run TDengine service in foreground (default: false)') - # parser.add_argument('-l', '--larger-data', action='store_true', - # help='Write larger amount of data during write operations (default: false)') - # parser.add_argument('-p', '--per-thread-db-connection', action='store_true', - # help='Use a single shared db connection (default: false)') - # parser.add_argument('-r', '--record-ops', action='store_true', - # help='Use a pair of always-fsynced fils to record operations performing + performed, for power-off tests (default: false)') - # parser.add_argument('-s', '--max-steps', action='store', default=1000, type=int, - # help='Maximum number of steps to run (default: 100)') - # parser.add_argument('-t', '--num-threads', action='store', default=5, type=int, - # help='Number of threads to run (default: 10)') - # parser.add_argument('-x', '--continue-on-exception', action='store_true', - # help='Continue execution after encountering unexpected/disallowed errors/exceptions (default: false)') + def _verifyValidProperty(self, name): + if not name in self._propertyList: + raise CrashGenError("Invalid container property: {}".format(name)) - parser.add_argument( - '-a', - '--auto-start-service', - action='store_true', - help='Automatically start/stop the TDengine service (default: false)') - parser.add_argument( - '-b', - '--max-dbs', - action='store', - default=0, - type=int, - help='Maximum number of DBs to keep, set to disable dropping DB. (default: 0)') - parser.add_argument( - '-c', - '--connector-type', - action='store', - default='native', - type=str, - help='Connector type to use: native, rest, or mixed (default: 10)') - parser.add_argument( - '-d', - '--debug', - action='store_true', - help='Turn on DEBUG mode for more logging (default: false)') - parser.add_argument( - '-e', - '--run-tdengine', - action='store_true', - help='Run TDengine service in foreground (default: false)') - parser.add_argument( - '-i', - '--max-replicas', - action='store', - default=1, - type=int, - help='Maximum number of replicas to use, when testing against clusters. (default: 1)') - parser.add_argument( - '-l', - '--larger-data', - action='store_true', - help='Write larger amount of data during write operations (default: false)') - parser.add_argument( - '-p', - '--per-thread-db-connection', - action='store_true', - help='Use a single shared db connection (default: false)') - parser.add_argument( - '-r', - '--record-ops', - action='store_true', - help='Use a pair of always-fsynced fils to record operations performing + performed, for power-off tests (default: false)') - parser.add_argument( - '-s', - '--max-steps', - action='store', - default=1000, - type=int, - help='Maximum number of steps to run (default: 100)') - parser.add_argument( - '-t', - '--num-threads', - action='store', - default=5, - type=int, - help='Number of threads to run (default: 10)') - parser.add_argument( - '-v', - '--verify-data', - action='store_true', - help='Verify data written in a number of places by reading back (default: false)') - parser.add_argument( - '-x', - '--continue-on-exception', - action='store_true', - help='Continue execution after encountering unexpected/disallowed errors/exceptions (default: false)') + # Called for an attribute, when other mechanisms fail (compare to __getattribute__) + def __getattr__(self, name): + self._verifyValidProperty(name) + return self._cargo[name] # just a simple lookup - global gConfig - gConfig = parser.parse_args() + def __setattr__(self, name, value): + if name == '_cargo' : # reserved vars + super().__setattr__(name, value) + return + self._verifyValidProperty(name) + self._cargo[name] = value - # Logging Stuff - global logger - _logger = logging.getLogger('CrashGen') # real logger - _logger.addFilter(LoggingFilter()) - ch = logging.StreamHandler() - _logger.addHandler(ch) - - # Logging adapter, to be used as a logger - logger = MyLoggingAdapter(_logger, []) - - if (gConfig.debug): - logger.setLevel(logging.DEBUG) # default seems to be INFO - else: - logger.setLevel(logging.INFO) - - Dice.seed(0) # initial seeding of dice - - # Run server or client - mExec = MainExec() - if gConfig.run_tdengine: # run server - mExec.runService() - else: - return mExec.runClient() - - -if __name__ == "__main__": - exitCode = main() - # print("Exiting with code: {}".format(exitCode)) - sys.exit(exitCode) diff --git a/tests/pytest/crash_gen/db.py b/tests/pytest/crash_gen/db.py new file mode 100644 index 0000000000..43c855647c --- /dev/null +++ b/tests/pytest/crash_gen/db.py @@ -0,0 +1,435 @@ +from __future__ import annotations + +import sys +import time +import threading +import requests +from requests.auth import HTTPBasicAuth + +import taos +from util.sql import * +from util.cases import * +from util.dnodes import * +from util.log import * + +from .misc import Logging, CrashGenError, Helper, Dice +import os +import datetime +# from .service_manager import TdeInstance + +class DbConn: + TYPE_NATIVE = "native-c" + TYPE_REST = "rest-api" + TYPE_INVALID = "invalid" + + @classmethod + def create(cls, connType, dbTarget): + if connType == cls.TYPE_NATIVE: + return DbConnNative(dbTarget) + elif connType == cls.TYPE_REST: + return DbConnRest(dbTarget) + else: + raise RuntimeError( + "Unexpected connection type: {}".format(connType)) + + @classmethod + def createNative(cls, dbTarget) -> DbConn: + return cls.create(cls.TYPE_NATIVE, dbTarget) + + @classmethod + def createRest(cls, dbTarget) -> DbConn: + return cls.create(cls.TYPE_REST, dbTarget) + + def __init__(self, dbTarget): + self.isOpen = False + self._type = self.TYPE_INVALID + self._lastSql = None + self._dbTarget = dbTarget + + def __repr__(self): + return "[DbConn: type={}, target={}]".format(self._type, self._dbTarget) + + def getLastSql(self): + return self._lastSql + + def open(self): + if (self.isOpen): + raise RuntimeError("Cannot re-open an existing DB connection") + + # below implemented by child classes + self.openByType() + + Logging.debug("[DB] data connection opened: {}".format(self)) + self.isOpen = True + + def close(self): + raise RuntimeError("Unexpected execution, should be overriden") + + def queryScalar(self, sql) -> int: + return self._queryAny(sql) + + def queryString(self, sql) -> str: + return self._queryAny(sql) + + def _queryAny(self, sql): # actual query result as an int + if (not self.isOpen): + raise RuntimeError("Cannot query database until connection is open") + nRows = self.query(sql) + if nRows != 1: + raise taos.error.ProgrammingError( + "Unexpected result for query: {}, rows = {}".format(sql, nRows), + (0x991 if nRows==0 else 0x992) + ) + if self.getResultRows() != 1 or self.getResultCols() != 1: + raise RuntimeError("Unexpected result set for query: {}".format(sql)) + return self.getQueryResult()[0][0] + + def use(self, dbName): + self.execute("use {}".format(dbName)) + + def existsDatabase(self, dbName: str): + ''' Check if a certain database exists ''' + self.query("show databases") + dbs = [v[0] for v in self.getQueryResult()] # ref: https://stackoverflow.com/questions/643823/python-list-transformation + # ret2 = dbName in dbs + # print("dbs = {}, str = {}, ret2={}, type2={}".format(dbs, dbName,ret2, type(dbName))) + return dbName in dbs # TODO: super weird type mangling seen, once here + + def hasTables(self): + return self.query("show tables") > 0 + + def execute(self, sql): + ''' Return the number of rows affected''' + raise RuntimeError("Unexpected execution, should be overriden") + + def safeExecute(self, sql): + '''Safely execute any SQL query, returning True/False upon success/failure''' + try: + self.execute(sql) + return True # ignore num of results, return success + except taos.error.ProgrammingError as err: + return False # failed, for whatever TAOS reason + # Not possile to reach here, non-TAOS exception would have been thrown + + def query(self, sql) -> int: # return num rows returned + ''' Return the number of rows affected''' + raise RuntimeError("Unexpected execution, should be overriden") + + def openByType(self): + raise RuntimeError("Unexpected execution, should be overriden") + + def getQueryResult(self): + raise RuntimeError("Unexpected execution, should be overriden") + + def getResultRows(self): + raise RuntimeError("Unexpected execution, should be overriden") + + def getResultCols(self): + raise RuntimeError("Unexpected execution, should be overriden") + +# Sample: curl -u root:taosdata -d "show databases" localhost:6020/rest/sql + + +class DbConnRest(DbConn): + REST_PORT_INCREMENT = 11 + + def __init__(self, dbTarget: DbTarget): + super().__init__(dbTarget) + self._type = self.TYPE_REST + restPort = dbTarget.port + 11 + self._url = "http://{}:{}/rest/sql".format( + dbTarget.hostAddr, dbTarget.port + self.REST_PORT_INCREMENT) + self._result = None + + def openByType(self): # Open connection + pass # do nothing, always open + + def close(self): + if (not self.isOpen): + raise RuntimeError("Cannot clean up database until connection is open") + # Do nothing for REST + Logging.debug("[DB] REST Database connection closed") + self.isOpen = False + + def _doSql(self, sql): + self._lastSql = sql # remember this, last SQL attempted + try: + r = requests.post(self._url, + data = sql, + auth = HTTPBasicAuth('root', 'taosdata')) + except: + print("REST API Failure (TODO: more info here)") + raise + rj = r.json() + # Sanity check for the "Json Result" + if ('status' not in rj): + raise RuntimeError("No status in REST response") + + if rj['status'] == 'error': # clearly reported error + if ('code' not in rj): # error without code + raise RuntimeError("REST error return without code") + errno = rj['code'] # May need to massage this in the future + # print("Raising programming error with REST return: {}".format(rj)) + raise taos.error.ProgrammingError( + rj['desc'], errno) # todo: check existance of 'desc' + + if rj['status'] != 'succ': # better be this + raise RuntimeError( + "Unexpected REST return status: {}".format( + rj['status'])) + + nRows = rj['rows'] if ('rows' in rj) else 0 + self._result = rj + return nRows + + def execute(self, sql): + if (not self.isOpen): + raise RuntimeError( + "Cannot execute database commands until connection is open") + Logging.debug("[SQL-REST] Executing SQL: {}".format(sql)) + nRows = self._doSql(sql) + Logging.debug( + "[SQL-REST] Execution Result, nRows = {}, SQL = {}".format(nRows, sql)) + return nRows + + def query(self, sql): # return rows affected + return self.execute(sql) + + def getQueryResult(self): + return self._result['data'] + + def getResultRows(self): + print(self._result) + raise RuntimeError("TBD") # TODO: finish here to support -v under -c rest + # return self._tdSql.queryRows + + def getResultCols(self): + print(self._result) + raise RuntimeError("TBD") + + # Duplicate code from TDMySQL, TODO: merge all this into DbConnNative + + +class MyTDSql: + # Class variables + _clsLock = threading.Lock() # class wide locking + longestQuery = None # type: str + longestQueryTime = 0.0 # seconds + lqStartTime = 0.0 + # lqEndTime = 0.0 # Not needed, as we have the two above already + + def __init__(self, hostAddr, cfgPath): + # Make the DB connection + self._conn = taos.connect(host=hostAddr, config=cfgPath) + self._cursor = self._conn.cursor() + + self.queryRows = 0 + self.queryCols = 0 + self.affectedRows = 0 + + # def init(self, cursor, log=True): + # self.cursor = cursor + # if (log): + # caller = inspect.getframeinfo(inspect.stack()[1][0]) + # self.cursor.log(caller.filename + ".sql") + + def close(self): + self._cursor.close() # can we double close? + self._conn.close() # TODO: very important, cursor close does NOT close DB connection! + self._cursor.close() + + def _execInternal(self, sql): + startTime = time.time() + ret = self._cursor.execute(sql) + # print("\nSQL success: {}".format(sql)) + queryTime = time.time() - startTime + # Record the query time + cls = self.__class__ + if queryTime > (cls.longestQueryTime + 0.01) : + with cls._clsLock: + cls.longestQuery = sql + cls.longestQueryTime = queryTime + cls.lqStartTime = startTime + return ret + + def query(self, sql): + self.sql = sql + try: + self._execInternal(sql) + self.queryResult = self._cursor.fetchall() + self.queryRows = len(self.queryResult) + self.queryCols = len(self._cursor.description) + except Exception as e: + # caller = inspect.getframeinfo(inspect.stack()[1][0]) + # args = (caller.filename, caller.lineno, sql, repr(e)) + # tdLog.exit("%s(%d) failed: sql:%s, %s" % args) + raise + return self.queryRows + + def execute(self, sql): + self.sql = sql + try: + self.affectedRows = self._execInternal(sql) + except Exception as e: + # caller = inspect.getframeinfo(inspect.stack()[1][0]) + # args = (caller.filename, caller.lineno, sql, repr(e)) + # tdLog.exit("%s(%d) failed: sql:%s, %s" % args) + raise + return self.affectedRows + +class DbTarget: + def __init__(self, cfgPath, hostAddr, port): + self.cfgPath = cfgPath + self.hostAddr = hostAddr + self.port = port + + def __repr__(self): + return "[DbTarget: cfgPath={}, host={}:{}]".format( + Helper.getFriendlyPath(self.cfgPath), self.hostAddr, self.port) + + def getEp(self): + return "{}:{}".format(self.hostAddr, self.port) + +class DbConnNative(DbConn): + # Class variables + _lock = threading.Lock() + # _connInfoDisplayed = False # TODO: find another way to display this + totalConnections = 0 # Not private + + def __init__(self, dbTarget): + super().__init__(dbTarget) + self._type = self.TYPE_NATIVE + self._conn = None + # self._cursor = None + + def openByType(self): # Open connection + # global gContainer + # tInst = tInst or gContainer.defTdeInstance # set up in ClientManager, type: TdeInstance + # cfgPath = self.getBuildPath() + "/test/cfg" + # cfgPath = tInst.getCfgDir() + # hostAddr = tInst.getHostAddr() + + cls = self.__class__ # Get the class, to access class variables + with cls._lock: # force single threading for opening DB connections. # TODO: whaaat??!!! + dbTarget = self._dbTarget + # if not cls._connInfoDisplayed: + # cls._connInfoDisplayed = True # updating CLASS variable + Logging.debug("Initiating TAOS native connection to {}".format(dbTarget)) + # Make the connection + # self._conn = taos.connect(host=hostAddr, config=cfgPath) # TODO: make configurable + # self._cursor = self._conn.cursor() + # Record the count in the class + self._tdSql = MyTDSql(dbTarget.hostAddr, dbTarget.cfgPath) # making DB connection + cls.totalConnections += 1 + + self._tdSql.execute('reset query cache') + # self._cursor.execute('use db') # do this at the beginning of every + + # Open connection + # self._tdSql = MyTDSql() + # self._tdSql.init(self._cursor) + + def close(self): + if (not self.isOpen): + raise RuntimeError("Cannot clean up database until connection is open") + self._tdSql.close() + # Decrement the class wide counter + cls = self.__class__ # Get the class, to access class variables + with cls._lock: + cls.totalConnections -= 1 + + Logging.debug("[DB] Database connection closed") + self.isOpen = False + + def execute(self, sql): + if (not self.isOpen): + raise RuntimeError("Cannot execute database commands until connection is open") + Logging.debug("[SQL] Executing SQL: {}".format(sql)) + self._lastSql = sql + nRows = self._tdSql.execute(sql) + Logging.debug( + "[SQL] Execution Result, nRows = {}, SQL = {}".format( + nRows, sql)) + return nRows + + def query(self, sql): # return rows affected + if (not self.isOpen): + raise RuntimeError( + "Cannot query database until connection is open") + Logging.debug("[SQL] Executing SQL: {}".format(sql)) + self._lastSql = sql + nRows = self._tdSql.query(sql) + Logging.debug( + "[SQL] Query Result, nRows = {}, SQL = {}".format( + nRows, sql)) + return nRows + # results are in: return self._tdSql.queryResult + + def getQueryResult(self): + return self._tdSql.queryResult + + def getResultRows(self): + return self._tdSql.queryRows + + def getResultCols(self): + return self._tdSql.queryCols + + +class DbManager(): + ''' This is a wrapper around DbConn(), to make it easier to use. + + TODO: rename this to DbConnManager + ''' + def __init__(self, cType, dbTarget): + # self.tableNumQueue = LinearQueue() # TODO: delete? + # self.openDbServerConnection() + self._dbConn = DbConn.createNative(dbTarget) if ( + cType == 'native') else DbConn.createRest(dbTarget) + try: + self._dbConn.open() # may throw taos.error.ProgrammingError: disconnected + except taos.error.ProgrammingError as err: + # print("Error type: {}, msg: {}, value: {}".format(type(err), err.msg, err)) + if (err.msg == 'client disconnected'): # cannot open DB connection + print( + "Cannot establish DB connection, please re-run script without parameter, and follow the instructions.") + sys.exit(2) + else: + print("Failed to connect to DB, errno = {}, msg: {}" + .format(Helper.convertErrno(err.errno), err.msg)) + raise + except BaseException: + print("[=] Unexpected exception") + raise + + # Do this after dbConn is in proper shape + # Moved to Database() + # self._stateMachine = StateMechine(self._dbConn) + + def getDbConn(self): + return self._dbConn + + # TODO: not used any more, to delete + def pickAndAllocateTable(self): # pick any table, and "use" it + return self.tableNumQueue.pickAndAllocate() + + # TODO: Not used any more, to delete + def addTable(self): + with self._lock: + tIndex = self.tableNumQueue.push() + return tIndex + + # Not used any more, to delete + def releaseTable(self, i): # return the table back, so others can use it + self.tableNumQueue.release(i) + + # TODO: not used any more, delete + def getTableNameToDelete(self): + tblNum = self.tableNumQueue.pop() # TODO: race condition! + if (not tblNum): # maybe false + return False + + return "table_{}".format(tblNum) + + def cleanUp(self): + self._dbConn.close() + diff --git a/tests/pytest/crash_gen/misc.py b/tests/pytest/crash_gen/misc.py new file mode 100644 index 0000000000..34a33c6af6 --- /dev/null +++ b/tests/pytest/crash_gen/misc.py @@ -0,0 +1,181 @@ +import threading +import random +import logging +import os + + +class CrashGenError(Exception): + def __init__(self, msg=None, errno=None): + self.msg = msg + self.errno = errno + + def __str__(self): + return self.msg + + +class LoggingFilter(logging.Filter): + def filter(self, record: logging.LogRecord): + if (record.levelno >= logging.INFO): + return True # info or above always log + + # Commenting out below to adjust... + + # if msg.startswith("[TRD]"): + # return False + return True + + +class MyLoggingAdapter(logging.LoggerAdapter): + def process(self, msg, kwargs): + return "[{}] {}".format(threading.get_ident() % 10000, msg), kwargs + # return '[%s] %s' % (self.extra['connid'], msg), kwargs + + +class Logging: + logger = None + + @classmethod + def getLogger(cls): + return logger + + @classmethod + def clsInit(cls, gConfig): # TODO: refactor away gConfig + if cls.logger: + return + + # Logging Stuff + # global misc.logger + _logger = logging.getLogger('CrashGen') # real logger + _logger.addFilter(LoggingFilter()) + ch = logging.StreamHandler() + _logger.addHandler(ch) + + # Logging adapter, to be used as a logger + print("setting logger variable") + # global logger + cls.logger = MyLoggingAdapter(_logger, []) + + if (gConfig.debug): + cls.logger.setLevel(logging.DEBUG) # default seems to be INFO + else: + cls.logger.setLevel(logging.INFO) + + @classmethod + def info(cls, msg): + cls.logger.info(msg) + + @classmethod + def debug(cls, msg): + cls.logger.debug(msg) + + @classmethod + def warning(cls, msg): + cls.logger.warning(msg) + + @classmethod + def error(cls, msg): + cls.logger.error(msg) + +class Status: + STATUS_STARTING = 1 + STATUS_RUNNING = 2 + STATUS_STOPPING = 3 + STATUS_STOPPED = 4 + + def __init__(self, status): + self.set(status) + + def __repr__(self): + return "[Status: v={}]".format(self._status) + + def set(self, status): + self._status = status + + def get(self): + return self._status + + def isStarting(self): + return self._status == Status.STATUS_STARTING + + def isRunning(self): + # return self._thread and self._thread.is_alive() + return self._status == Status.STATUS_RUNNING + + def isStopping(self): + return self._status == Status.STATUS_STOPPING + + def isStopped(self): + return self._status == Status.STATUS_STOPPED + + def isStable(self): + return self.isRunning() or self.isStopped() + +# Deterministic random number generator +class Dice(): + seeded = False # static, uninitialized + + @classmethod + def seed(cls, s): # static + if (cls.seeded): + raise RuntimeError( + "Cannot seed the random generator more than once") + cls.verifyRNG() + random.seed(s) + cls.seeded = True # TODO: protect against multi-threading + + @classmethod + def verifyRNG(cls): # Verify that the RNG is determinstic + random.seed(0) + x1 = random.randrange(0, 1000) + x2 = random.randrange(0, 1000) + x3 = random.randrange(0, 1000) + if (x1 != 864 or x2 != 394 or x3 != 776): + raise RuntimeError("System RNG is not deterministic") + + @classmethod + def throw(cls, stop): # get 0 to stop-1 + return cls.throwRange(0, stop) + + @classmethod + def throwRange(cls, start, stop): # up to stop-1 + if (not cls.seeded): + raise RuntimeError("Cannot throw dice before seeding it") + return random.randrange(start, stop) + + @classmethod + def choice(cls, cList): + return random.choice(cList) + +class Helper: + @classmethod + def convertErrno(cls, errno): + return errno if (errno > 0) else 0x80000000 + errno + + @classmethod + def getFriendlyPath(cls, path): # returns .../xxx/yyy + ht1 = os.path.split(path) + ht2 = os.path.split(ht1[0]) + return ".../" + ht2[1] + '/' + ht1[1] + + +class Progress: + STEP_BOUNDARY = 0 + BEGIN_THREAD_STEP = 1 + END_THREAD_STEP = 2 + SERVICE_HEART_BEAT= 3 + SERVICE_RECONNECT_START = 4 + SERVICE_RECONNECT_SUCCESS = 5 + SERVICE_RECONNECT_FAILURE = 6 + tokens = { + STEP_BOUNDARY: '.', + BEGIN_THREAD_STEP: '[', + END_THREAD_STEP: '] ', + SERVICE_HEART_BEAT: '.Y.', + SERVICE_RECONNECT_START: '', + SERVICE_RECONNECT_FAILURE: '.xr>', + } + + @classmethod + def emit(cls, token): + print(cls.tokens[token], end="", flush=True) diff --git a/tests/pytest/crash_gen/service_manager.py b/tests/pytest/crash_gen/service_manager.py new file mode 100644 index 0000000000..196e9d944a --- /dev/null +++ b/tests/pytest/crash_gen/service_manager.py @@ -0,0 +1,738 @@ +import os +import io +import sys +import threading +import signal +import logging +import time +import subprocess + +from typing import IO, List + +try: + import psutil +except: + print("Psutil module needed, please install: sudo pip3 install psutil") + sys.exit(-1) + +from queue import Queue, Empty + +from .misc import Logging, Status, CrashGenError, Dice, Helper, Progress +from .db import DbConn, DbTarget + +class TdeInstance(): + """ + A class to capture the *static* information of a TDengine instance, + including the location of the various files/directories, and basica + configuration. + """ + + @classmethod + def _getBuildPath(cls): + selfPath = os.path.dirname(os.path.realpath(__file__)) + if ("community" in selfPath): + projPath = selfPath[:selfPath.find("communit")] + else: + projPath = selfPath[:selfPath.find("tests")] + + buildPath = None + for root, dirs, files in os.walk(projPath): + if ("taosd" in files): + rootRealPath = os.path.dirname(os.path.realpath(root)) + if ("packaging" not in rootRealPath): + buildPath = root[:len(root) - len("/build/bin")] + break + if buildPath == None: + raise RuntimeError("Failed to determine buildPath, selfPath={}, projPath={}" + .format(selfPath, projPath)) + return buildPath + + def __init__(self, subdir='test', tInstNum=0, port=6030, fepPort=6030): + self._buildDir = self._getBuildPath() + self._subdir = '/' + subdir # TODO: tolerate "/" + self._port = port # TODO: support different IP address too + self._fepPort = fepPort + + self._tInstNum = tInstNum + self._smThread = ServiceManagerThread() + + def getDbTarget(self): + return DbTarget(self.getCfgDir(), self.getHostAddr(), self._port) + + def getPort(self): + return self._port + + def __repr__(self): + return "[TdeInstance: {}, subdir={}]".format( + self._buildDir, Helper.getFriendlyPath(self._subdir)) + + def generateCfgFile(self): + # print("Logger = {}".format(logger)) + # buildPath = self.getBuildPath() + # taosdPath = self._buildPath + "/build/bin/taosd" + + cfgDir = self.getCfgDir() + cfgFile = cfgDir + "/taos.cfg" # TODO: inquire if this is fixed + if os.path.exists(cfgFile): + if os.path.isfile(cfgFile): + Logging.warning("Config file exists already, skip creation: {}".format(cfgFile)) + return # cfg file already exists, nothing to do + else: + raise CrashGenError("Invalid config file: {}".format(cfgFile)) + # Now that the cfg file doesn't exist + if os.path.exists(cfgDir): + if not os.path.isdir(cfgDir): + raise CrashGenError("Invalid config dir: {}".format(cfgDir)) + # else: good path + else: + os.makedirs(cfgDir, exist_ok=True) # like "mkdir -p" + # Now we have a good cfg dir + cfgValues = { + 'runDir': self.getRunDir(), + 'ip': '127.0.0.1', # TODO: change to a network addressable ip + 'port': self._port, + 'fepPort': self._fepPort, + } + cfgTemplate = """ +dataDir {runDir}/data +logDir {runDir}/log + +charset UTF-8 + +firstEp {ip}:{fepPort} +fqdn {ip} +serverPort {port} + +# was all 135 below +dDebugFlag 135 +cDebugFlag 135 +rpcDebugFlag 135 +qDebugFlag 135 +# httpDebugFlag 143 +# asyncLog 0 +# tables 10 +maxtablesPerVnode 10 +rpcMaxTime 101 +# cache 2 +keep 36500 +# walLevel 2 +walLevel 1 +# +# maxConnections 100 +""" + cfgContent = cfgTemplate.format_map(cfgValues) + f = open(cfgFile, "w") + f.write(cfgContent) + f.close() + + def rotateLogs(self): + logPath = self.getLogDir() + # ref: https://stackoverflow.com/questions/1995373/deleting-all-files-in-a-directory-with-python/1995397 + if os.path.exists(logPath): + logPathSaved = logPath + "_" + time.strftime('%Y-%m-%d-%H-%M-%S') + Logging.info("Saving old log files to: {}".format(logPathSaved)) + os.rename(logPath, logPathSaved) + # os.mkdir(logPath) # recreate, no need actually, TDengine will auto-create with proper perms + + + def getExecFile(self): # .../taosd + return self._buildDir + "/build/bin/taosd" + + def getRunDir(self): # TODO: rename to "root dir" ?! + return self._buildDir + self._subdir + + def getCfgDir(self): # path, not file + return self.getRunDir() + "/cfg" + + def getLogDir(self): + return self.getRunDir() + "/log" + + def getHostAddr(self): + return "127.0.0.1" + + def getServiceCmdLine(self): # to start the instance + return [self.getExecFile(), '-c', self.getCfgDir()] # used in subproce.Popen() + + def _getDnodes(self, dbc): + dbc.query("show dnodes") + cols = dbc.getQueryResult() # id,end_point,vnodes,cores,status,role,create_time,offline reason + return {c[1]:c[4] for c in cols} # {'xxx:6030':'ready', 'xxx:6130':'ready'} + + def createDnode(self, dbt: DbTarget): + """ + With a connection to the "first" EP, let's create a dnode for someone else who + wants to join. + """ + dbc = DbConn.createNative(self.getDbTarget()) + dbc.open() + + if dbt.getEp() in self._getDnodes(dbc): + Logging.info("Skipping DNode creation for: {}".format(dbt)) + dbc.close() + return + + sql = "CREATE DNODE \"{}\"".format(dbt.getEp()) + dbc.execute(sql) + dbc.close() + + def getStatus(self): + return self._smThread.getStatus() + + def getSmThread(self): + return self._smThread + + def start(self): + if not self.getStatus().isStopped(): + raise CrashGenError("Cannot start instance from status: {}".format(self.getStatus())) + + Logging.info("Starting TDengine instance: {}".format(self)) + self.generateCfgFile() # service side generates config file, client does not + self.rotateLogs() + + self._smThread.start(self.getServiceCmdLine()) + + def stop(self): + self._smThread.stop() + + def isFirst(self): + return self._tInstNum == 0 + + +class TdeSubProcess: + """ + A class to to represent the actual sub process that is the run-time + of a TDengine instance. + + It takes a TdeInstance object as its parameter, with the rationale being + "a sub process runs an instance". + """ + + # RET_ALREADY_STOPPED = -1 + # RET_TIME_OUT = -3 + # RET_SUCCESS = -4 + + def __init__(self): + self.subProcess = None + # if tInst is None: + # raise CrashGenError("Empty instance not allowed in TdeSubProcess") + # self._tInst = tInst # Default create at ServiceManagerThread + + def getStdOut(self): + return self.subProcess.stdout + + def getStdErr(self): + return self.subProcess.stderr + + def isRunning(self): + return self.subProcess is not None + + def getPid(self): + return self.subProcess.pid + + def start(self, cmdLine): + ON_POSIX = 'posix' in sys.builtin_module_names + + # Sanity check + if self.subProcess: # already there + raise RuntimeError("Corrupt process state") + + self.subProcess = subprocess.Popen( + cmdLine, + shell=False, + # svcCmdSingle, shell=True, # capture core dump? + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + # bufsize=1, # not supported in binary mode + close_fds=ON_POSIX + ) # had text=True, which interferred with reading EOF + + def stop(self): + """ + Stop a sub process, and try to return a meaningful return code. + + Common POSIX signal values (from man -7 signal): + SIGHUP 1 + SIGINT 2 + SIGQUIT 3 + SIGILL 4 + SIGTRAP 5 + SIGABRT 6 + SIGIOT 6 + SIGBUS 7 + SIGEMT - + SIGFPE 8 + SIGKILL 9 + SIGUSR1 10 + SIGSEGV 11 + SIGUSR2 12 + """ + if not self.subProcess: + print("Sub process already stopped") + return # -1 + + retCode = self.subProcess.poll() # ret -N means killed with signal N, otherwise it's from exit(N) + if retCode: # valid return code, process ended + retCode = -retCode # only if valid + Logging.warning("TSP.stop(): process ended itself") + self.subProcess = None + return retCode + + # process still alive, let's interrupt it + print("Terminate running process, send SIG_INT and wait...") + # sub process should end, then IPC queue should end, causing IO thread to end + # sig = signal.SIGINT + sig = signal.SIGKILL + self.subProcess.send_signal(sig) # SIGNINT or SIGKILL + self.subProcess.wait(20) + retCode = self.subProcess.returncode # should always be there + # May throw subprocess.TimeoutExpired exception above, therefore + # The process is guranteed to have ended by now + self.subProcess = None + if retCode != 0: # != (- signal.SIGINT): + Logging.error("TSP.stop(): Failed to stop sub proc properly w/ SIG {}, retCode={}".format(sig, retCode)) + else: + Logging.info("TSP.stop(): sub proc successfully terminated with SIG {}".format(sig)) + return - retCode + +class ServiceManager: + PAUSE_BETWEEN_IPC_CHECK = 1.2 # seconds between checks on STDOUT of sub process + + def __init__(self, numDnodes): # >1 when we run a cluster + Logging.info("TDengine Service Manager (TSM) created") + self._numDnodes = numDnodes # >1 means we have a cluster + self._lock = threading.Lock() + # signal.signal(signal.SIGTERM, self.sigIntHandler) # Moved to MainExec + # signal.signal(signal.SIGINT, self.sigIntHandler) + # signal.signal(signal.SIGUSR1, self.sigUsrHandler) # different handler! + + self.inSigHandler = False + # self._status = MainExec.STATUS_RUNNING # set inside + # _startTaosService() + self._runCluster = (numDnodes > 1) + self._tInsts : List[TdeInstance] = [] + for i in range(0, numDnodes): + ti = self._createTdeInstance(i) # construct tInst + self._tInsts.append(ti) + + # self.svcMgrThreads : List[ServiceManagerThread] = [] + # for i in range(0, numDnodes): + # thread = self._createThread(i) # construct tInst + # self.svcMgrThreads.append(thread) + + def _createTdeInstance(self, dnIndex): + if not self._runCluster: # single instance + subdir = 'test' + else: # Create all threads in a cluster + subdir = 'cluster_dnode_{}'.format(dnIndex) + fepPort= 6030 # firstEP Port + port = fepPort + dnIndex * 100 + return TdeInstance(subdir, dnIndex, port, fepPort) + # return ServiceManagerThread(dnIndex, ti) + + def _doMenu(self): + choice = "" + while True: + print("\nInterrupting Service Program, Choose an Action: ") + print("1: Resume") + print("2: Terminate") + print("3: Restart") + # Remember to update the if range below + # print("Enter Choice: ", end="", flush=True) + while choice == "": + choice = input("Enter Choice: ") + if choice != "": + break # done with reading repeated input + if choice in ["1", "2", "3"]: + break # we are done with whole method + print("Invalid choice, please try again.") + choice = "" # reset + return choice + + def sigUsrHandler(self, signalNumber, frame): + print("Interrupting main thread execution upon SIGUSR1") + if self.inSigHandler: # already + print("Ignoring repeated SIG...") + return # do nothing if it's already not running + self.inSigHandler = True + + choice = self._doMenu() + if choice == "1": + self.sigHandlerResume() # TODO: can the sub-process be blocked due to us not reading from queue? + elif choice == "2": + self.stopTaosServices() + elif choice == "3": # Restart + self.restart() + else: + raise RuntimeError("Invalid menu choice: {}".format(choice)) + + self.inSigHandler = False + + def sigIntHandler(self, signalNumber, frame): + print("ServiceManager: INT Signal Handler starting...") + if self.inSigHandler: + print("Ignoring repeated SIG_INT...") + return + self.inSigHandler = True + + self.stopTaosServices() + print("ServiceManager: INT Signal Handler returning...") + self.inSigHandler = False + + def sigHandlerResume(self): + print("Resuming TDengine service manager (main thread)...\n\n") + + # def _updateThreadStatus(self): + # if self.svcMgrThread: # valid svc mgr thread + # if self.svcMgrThread.isStopped(): # done? + # self.svcMgrThread.procIpcBatch() # one last time. TODO: appropriate? + # self.svcMgrThread = None # no more + + def isActive(self): + """ + Determine if the service/cluster is active at all, i.e. at least + one thread is not "stopped". + """ + for ti in self._tInsts: + if not ti.getStatus().isStopped(): + return True + return False + + def isRunning(self): + for ti in self._tInsts: + if not ti.getStatus().isRunning(): + return False + return True + + + # def isRestarting(self): + # """ + # Determine if the service/cluster is being "restarted", i.e., at least + # one thread is in "restarting" status + # """ + # for thread in self.svcMgrThreads: + # if thread.isRestarting(): + # return True + # return False + + def isStable(self): + """ + Determine if the service/cluster is "stable", i.e. all of the + threads are in "stable" status. + """ + for ti in self._tInsts: + if not ti.getStatus().isStable(): + return False + return True + + def _procIpcAll(self): + while self.isActive(): + Progress.emit(Progress.SERVICE_HEART_BEAT) + for ti in self._tInsts: # all thread objects should always be valid + # while self.isRunning() or self.isRestarting() : # for as long as the svc mgr thread is still here + status = ti.getStatus() + if status.isRunning(): + th = ti.getSmThread() + th.procIpcBatch() # regular processing, + if status.isStopped(): + th.procIpcBatch() # one last time? + # self._updateThreadStatus() + + time.sleep(self.PAUSE_BETWEEN_IPC_CHECK) # pause, before next round + # raise CrashGenError("dummy") + print("Service Manager Thread (with subprocess) ended, main thread exiting...") + + def _getFirstInstance(self): + return self._tInsts[0] + + def startTaosServices(self): + with self._lock: + if self.isActive(): + raise RuntimeError("Cannot start TAOS service(s) when one/some may already be running") + + # Find if there's already a taosd service, and then kill it + for proc in psutil.process_iter(): + if proc.name() == 'taosd': + print("Killing an existing TAOSD process in 2 seconds... press CTRL-C to interrupt") + time.sleep(2.0) + proc.kill() + # print("Process: {}".format(proc.name())) + + # self.svcMgrThread = ServiceManagerThread() # create the object + + for ti in self._tInsts: + ti.start() + if not ti.isFirst(): + tFirst = self._getFirstInstance() + tFirst.createDnode(ti.getDbTarget()) + ti.getSmThread().procIpcBatch(trimToTarget=10, forceOutput=True) # for printing 10 lines + + def stopTaosServices(self): + with self._lock: + if not self.isActive(): + Logging.warning("Cannot stop TAOS service(s), already not active") + return + + for ti in self._tInsts: + ti.stop() + + def run(self): + self.startTaosServices() + self._procIpcAll() # pump/process all the messages, may encounter SIG + restart + if self.isActive(): # if sig handler hasn't destroyed it by now + self.stopTaosServices() # should have started already + + def restart(self): + if not self.isStable(): + Logging.warning("Cannot restart service/cluster, when not stable") + return + + # self._isRestarting = True + if self.isActive(): + self.stopTaosServices() + else: + Logging.warning("Service not active when restart requested") + + self.startTaosServices() + # self._isRestarting = False + + # def isRunning(self): + # return self.svcMgrThread != None + + # def isRestarting(self): + # return self._isRestarting + +class ServiceManagerThread: + """ + A class representing a dedicated thread which manages the "sub process" + of the TDengine service, interacting with its STDOUT/ERR. + + It takes a TdeInstance parameter at creation time, or create a default + """ + MAX_QUEUE_SIZE = 10000 + + def __init__(self): + # Set the sub process + self._tdeSubProcess = None # type: TdeSubProcess + + # Arrange the TDengine instance + # self._tInstNum = tInstNum # instance serial number in cluster, ZERO based + # self._tInst = tInst or TdeInstance() # Need an instance + + self._thread = None # The actual thread, # type: threading.Thread + self._status = Status(Status.STATUS_STOPPED) # The status of the underlying service, actually. + + def __repr__(self): + return "[SvcMgrThread: status={}, subProc={}]".format( + self.getStatus(), self._tdeSubProcess) + + def getStatus(self): + return self._status + + # Start the thread (with sub process), and wait for the sub service + # to become fully operational + def start(self, cmdLine): + if self._thread: + raise RuntimeError("Unexpected _thread") + if self._tdeSubProcess: + raise RuntimeError("TDengine sub process already created/running") + + Logging.info("Attempting to start TAOS service: {}".format(self)) + + self._status.set(Status.STATUS_STARTING) + self._tdeSubProcess = TdeSubProcess() + self._tdeSubProcess.start(cmdLine) + + self._ipcQueue = Queue() + self._thread = threading.Thread( # First thread captures server OUTPUT + target=self.svcOutputReader, + args=(self._tdeSubProcess.getStdOut(), self._ipcQueue)) + self._thread.daemon = True # thread dies with the program + self._thread.start() + + self._thread2 = threading.Thread( # 2nd thread captures server ERRORs + target=self.svcErrorReader, + args=(self._tdeSubProcess.getStdErr(), self._ipcQueue)) + self._thread2.daemon = True # thread dies with the program + self._thread2.start() + + # wait for service to start + for i in range(0, 100): + time.sleep(1.0) + # self.procIpcBatch() # don't pump message during start up + print("_zz_", end="", flush=True) + if self._status.isRunning(): + Logging.info("[] TDengine service READY to process requests") + Logging.info("[] TAOS service started: {}".format(self)) + # self._verifyDnode(self._tInst) # query and ensure dnode is ready + # Logging.debug("[] TAOS Dnode verified: {}".format(self)) + return # now we've started + # TODO: handle failure-to-start better? + self.procIpcBatch(100, True) # display output before cronking out, trim to last 20 msgs, force output + raise RuntimeError("TDengine service did not start successfully: {}".format(self)) + + def _verifyDnode(self, tInst: TdeInstance): + dbc = DbConn.createNative(tInst.getDbTarget()) + dbc.open() + dbc.query("show dnodes") + # dbc.query("DESCRIBE {}.{}".format(dbName, self._stName)) + cols = dbc.getQueryResult() # id,end_point,vnodes,cores,status,role,create_time,offline reason + # ret = {row[0]:row[1] for row in stCols if row[3]=='TAG'} # name:type + isValid = False + for col in cols: + # print("col = {}".format(col)) + ep = col[1].split(':') # 10.1.30.2:6030 + print("Found ep={}".format(ep)) + if tInst.getPort() == int(ep[1]): # That's us + # print("Valid Dnode matched!") + isValid = True # now we are valid + break + if not isValid: + print("Failed to start dnode, sleep for a while") + time.sleep(600) + raise RuntimeError("Failed to start Dnode, expected port not found: {}". + format(tInst.getPort())) + dbc.close() + + def stop(self): + # can be called from both main thread or signal handler + print("Terminating TDengine service running as the sub process...") + if self.getStatus().isStopped(): + print("Service already stopped") + return + if self.getStatus().isStopping(): + print("Service is already being stopped") + return + # Linux will send Control-C generated SIGINT to the TDengine process + # already, ref: + # https://unix.stackexchange.com/questions/176235/fork-and-how-signals-are-delivered-to-processes + if not self._tdeSubProcess: + raise RuntimeError("sub process object missing") + + self._status.set(Status.STATUS_STOPPING) + # retCode = self._tdeSubProcess.stop() + try: + retCode = self._tdeSubProcess.stop() + # print("Attempted to stop sub process, got return code: {}".format(retCode)) + if retCode == signal.SIGSEGV : # SGV + Logging.error("[[--ERROR--]]: TDengine service SEGV fault (check core file!)") + except subprocess.TimeoutExpired as err: + print("Time out waiting for TDengine service process to exit") + else: + if self._tdeSubProcess.isRunning(): # still running, should now never happen + print("FAILED to stop sub process, it is still running... pid = {}".format( + self._tdeSubProcess.getPid())) + else: + self._tdeSubProcess = None # not running any more + self.join() # stop the thread, change the status, etc. + + # Check if it's really stopped + outputLines = 10 # for last output + if self.getStatus().isStopped(): + self.procIpcBatch(outputLines) # one last time + Logging.debug("End of TDengine Service Output: {}".format(self)) + Logging.info("----- TDengine Service (managed by SMT) is now terminated -----\n") + else: + print("WARNING: SMT did not terminate as expected: {}".format(self)) + + def join(self): + # TODO: sanity check + if not self.getStatus().isStopping(): + raise RuntimeError( + "SMT.Join(): Unexpected status: {}".format(self._status)) + + if self._thread: + self._thread.join() + self._thread = None + self._status.set(Status.STATUS_STOPPED) + # STD ERR thread + self._thread2.join() + self._thread2 = None + else: + print("Joining empty thread, doing nothing") + + def _trimQueue(self, targetSize): + if targetSize <= 0: + return # do nothing + q = self._ipcQueue + if (q.qsize() <= targetSize): # no need to trim + return + + Logging.debug("Triming IPC queue to target size: {}".format(targetSize)) + itemsToTrim = q.qsize() - targetSize + for i in range(0, itemsToTrim): + try: + q.get_nowait() + except Empty: + break # break out of for loop, no more trimming + + TD_READY_MSG = "TDengine is initialized successfully" + + def procIpcBatch(self, trimToTarget=0, forceOutput=False): + self._trimQueue(trimToTarget) # trim if necessary + # Process all the output generated by the underlying sub process, + # managed by IO thread + print("<", end="", flush=True) + while True: + try: + line = self._ipcQueue.get_nowait() # getting output at fast speed + self._printProgress("_o") + except Empty: + # time.sleep(2.3) # wait only if there's no output + # no more output + print(".>", end="", flush=True) + return # we are done with THIS BATCH + else: # got line, printing out + if forceOutput: + Logging.info(line) + else: + Logging.debug(line) + print(">", end="", flush=True) + + _ProgressBars = ["--", "//", "||", "\\\\"] + + def _printProgress(self, msg): # TODO: assuming 2 chars + print(msg, end="", flush=True) + pBar = self._ProgressBars[Dice.throw(4)] + print(pBar, end="", flush=True) + print('\b\b\b\b', end="", flush=True) + + def svcOutputReader(self, out: IO, queue): + # Important Reference: https://stackoverflow.com/questions/375427/non-blocking-read-on-a-subprocess-pipe-in-python + # print("This is the svcOutput Reader...") + # for line in out : + for line in iter(out.readline, b''): + # print("Finished reading a line: {}".format(line)) + # print("Adding item to queue...") + try: + line = line.decode("utf-8").rstrip() + except UnicodeError: + print("\nNon-UTF8 server output: {}\n".format(line)) + + # This might block, and then causing "out" buffer to block + queue.put(line) + self._printProgress("_i") + + if self._status.isStarting(): # we are starting, let's see if we have started + if line.find(self.TD_READY_MSG) != -1: # found + Logging.info("Waiting for the service to become FULLY READY") + time.sleep(1.0) # wait for the server to truly start. TODO: remove this + Logging.info("Service is now FULLY READY") # TODO: more ID info here? + self._status.set(Status.STATUS_RUNNING) + + # Trim the queue if necessary: TODO: try this 1 out of 10 times + self._trimQueue(self.MAX_QUEUE_SIZE * 9 // 10) # trim to 90% size + + if self._status.isStopping(): # TODO: use thread status instead + # WAITING for stopping sub process to finish its outptu + print("_w", end="", flush=True) + + # queue.put(line) + # meaning sub process must have died + Logging.info("\nEnd of stream detected for TDengine STDOUT: {}".format(self)) + out.close() + + def svcErrorReader(self, err: IO, queue): + for line in iter(err.readline, b''): + print("\nTDengine Service (taosd) ERROR (from stderr): {}".format(line)) + Logging.info("\nEnd of stream detected for TDengine STDERR: {}".format(self)) + err.close() \ No newline at end of file diff --git a/tests/pytest/crash_gen_bootstrap.py b/tests/pytest/crash_gen_bootstrap.py new file mode 100644 index 0000000000..a3417d21a8 --- /dev/null +++ b/tests/pytest/crash_gen_bootstrap.py @@ -0,0 +1,23 @@ +# -----!/usr/bin/python3.7 +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +import sys +from crash_gen.crash_gen import MainExec + +if __name__ == "__main__": + + mExec = MainExec() + mExec.init() + exitCode = mExec.run() + + print("Exiting with code: {}".format(exitCode)) + sys.exit(exitCode) diff --git a/tests/pytest/fulltest.sh b/tests/pytest/fulltest.sh index 39d0fa3d94..dadf918efd 100755 --- a/tests/pytest/fulltest.sh +++ b/tests/pytest/fulltest.sh @@ -24,6 +24,7 @@ python3 ./test.py -f table/alter_wal0.py python3 ./test.py -f table/column_name.py python3 ./test.py -f table/column_num.py python3 ./test.py -f table/db_table.py +python3 ./test.py -f table/create_sensitive.py #python3 ./test.py -f table/tablename-boundary.py # tag @@ -186,7 +187,7 @@ python3 ./test.py -f functions/function_leastsquares.py -r 1 python3 ./test.py -f functions/function_max.py -r 1 python3 ./test.py -f functions/function_min.py -r 1 python3 ./test.py -f functions/function_operations.py -r 1 -python3 ./test.py -f functions/function_percentile.py +python3 ./test.py -f functions/function_percentile.py -r 1 python3 ./test.py -f functions/function_spread.py -r 1 python3 ./test.py -f functions/function_stddev.py -r 1 python3 ./test.py -f functions/function_sum.py -r 1 diff --git a/tests/pytest/query/queryJoin.py b/tests/pytest/query/queryJoin.py index 17027cf498..a5e3ab21b3 100644 --- a/tests/pytest/query/queryJoin.py +++ b/tests/pytest/query/queryJoin.py @@ -95,14 +95,16 @@ class TDTestCase: tdSql.error( "select stb_t.ts, stb_t.dscrption, stb_t.temperature, stb_t.id, stb_p.dscrption, stb_p.pressure from stb_p, stb_t where stb_p.ts=stb_t.ts and stb_p.id = stb_t.id group by stb_t.id") tdSql.error( - "select stb_t.ts, stb_t.dscrption, stb_t.temperature, stb_t.id, stb_p.dscrption, stb_p.pressure from stb_p, stb_t where stb_p.ts=stb_t.ts and stb_p.id = stb_t.name;") - tdSql.error( - "select stb_t.ts, stb_t.dscrption, stb_t.temperature, stb_t.id, stb_p.dscrption, stb_p.pressure from stb_p, stb_t where stb_p.ts=stb_t.ts and stb_p.location = stb_t.name") + "select stb_t.ts, stb_t.dscrption, stb_t.temperature, stb_t.id, stb_p.dscrption, stb_p.pressure from stb_p, stb_t where stb_p.ts=stb_t.ts and stb_p.id = stb_t.name;") tdSql.execute("alter table stb_t add tag pid int") tdSql.execute("alter table tb_t1 set tag pid=2") tdSql.execute("alter table tb_t2 set tag pid=1") + tdSql.query( + "select stb_t.ts, stb_t.dscrption, stb_t.temperature, stb_t.id, stb_p.dscrption, stb_p.pressure from stb_p, stb_t where stb_p.ts=stb_t.ts and stb_p.location = stb_t.name") + tdSql.checkRows(0) + tdSql.query("select stb_t.ts, stb_t.dscrption, stb_t.temperature, stb_t.id, stb_p.dscrption, stb_p.pressure from stb_p, stb_t where stb_p.ts=stb_t.ts and stb_p.id = stb_t.pid") tdSql.checkRows(3) diff --git a/tests/pytest/query/queryPerformance.py b/tests/pytest/query/queryPerformance.py index a7fc08c5a3..e09900acc4 100644 --- a/tests/pytest/query/queryPerformance.py +++ b/tests/pytest/query/queryPerformance.py @@ -11,6 +11,7 @@ # -*- coding: utf-8 -*- + import sys import os import taos @@ -32,17 +33,23 @@ class taosdemoQueryPerformace: def query(self): cursor = self.conn.cursor() - cursor.execute("use test") + cursor.execute("use test") totalTime = 0 - for i in range(100): - startTime = time.time() + for i in range(100): + if(sys.argv[1] == '1'): + # root permission is required + os.system("echo 3 > /proc/sys/vm/drop_caches") + startTime = time.time() cursor.execute("select count(*) from test.meters") totalTime += time.time() - startTime print("query time for: select count(*) from test.meters %f seconds" % (totalTime / 100)) totalTime = 0 for i in range(100): + if(sys.argv[1] == '1'): + # root permission is required + os.system("echo 3 > /proc/sys/vm/drop_caches") startTime = time.time() cursor.execute("select avg(f1), max(f2), min(f3) from test.meters") totalTime += time.time() - startTime @@ -50,6 +57,9 @@ class taosdemoQueryPerformace: totalTime = 0 for i in range(100): + if(sys.argv[1] == '1'): + # root permission is required + os.system("echo 3 > /proc/sys/vm/drop_caches") startTime = time.time() cursor.execute("select count(*) from test.meters where loc='beijing'") totalTime += time.time() - startTime @@ -57,6 +67,9 @@ class taosdemoQueryPerformace: totalTime = 0 for i in range(100): + if(sys.argv[1] == '1'): + # root permission is required + os.system("echo 3 > /proc/sys/vm/drop_caches") startTime = time.time() cursor.execute("select avg(f1), max(f2), min(f3) from test.meters where areaid=10") totalTime += time.time() - startTime @@ -64,6 +77,9 @@ class taosdemoQueryPerformace: totalTime = 0 for i in range(100): + if(sys.argv[1] == '1'): + # root permission is required + os.system("echo 3 > /proc/sys/vm/drop_caches") startTime = time.time() cursor.execute("select avg(f1), max(f2), min(f3) from test.t10 interval(10s)") totalTime += time.time() - startTime @@ -71,11 +87,34 @@ class taosdemoQueryPerformace: totalTime = 0 for i in range(100): + if(sys.argv[1] == '1'): + # root permission is required + os.system("echo 3 > /proc/sys/vm/drop_caches") startTime = time.time() cursor.execute("select last_row(*) from meters") totalTime += time.time() - startTime print("query time for: select last_row(*) from meters %f seconds" % (totalTime / 100)) + totalTime = 0 + for i in range(100): + if(sys.argv[1] == '1'): + # root permission is required + os.system("echo 3 > /proc/sys/vm/drop_caches") + startTime = time.time() + cursor.execute("select * from meters") + totalTime += time.time() - startTime + print("query time for: select * from meters %f seconds" % (totalTime / 100)) + + totalTime = 0 + for i in range(100): + if(sys.argv[1] == '1'): + # root permission is required + os.system("echo 3 > /proc/sys/vm/drop_caches") + startTime = time.time() + cursor.execute("select avg(f1), max(f2), min(f3) from meters where ts <= '2017-07-15 10:40:01.000' and ts <= '2017-07-15 14:00:40.000'") + totalTime += time.time() - startTime + print("query time for: select avg(f1), max(f2), min(f3) from meters where ts <= '2017-07-15 10:40:01.000' and ts <= '2017-07-15 14:00:40.000' %f seconds" % (totalTime / 100)) + if __name__ == '__main__': perftest = taosdemoQueryPerformace() perftest.initConnection() diff --git a/tests/pytest/query/querySort.py b/tests/pytest/query/querySort.py index e5d3c8ce1f..649e0dc1cb 100644 --- a/tests/pytest/query/querySort.py +++ b/tests/pytest/query/querySort.py @@ -96,6 +96,12 @@ class TDTestCase: tdSql.query("select * from st order by ts desc") self.checkColumnSorted(0, "desc") + print("======= step 2: verify order for special column =========") + + tdSql.query("select tbcol1 from st order by ts desc") + + tdSql.query("select tbcol6 from st order by ts desc") + for i in range(1, 10): tdSql.error("select * from st order by tbcol%d" % i) tdSql.error("select * from st order by tbcol%d asc" % i) diff --git a/tests/pytest/stream/new.py b/tests/pytest/stream/new.py index eac93dc0e6..12ec6d4507 100644 --- a/tests/pytest/stream/new.py +++ b/tests/pytest/stream/new.py @@ -26,7 +26,6 @@ class TDTestCase: def run(self): rowNum = 200 - totalNum = 200 tdSql.prepare() tdLog.info("=============== step1") @@ -42,7 +41,9 @@ class TDTestCase: tdSql.execute("create table st as select count(*), count(tbcol), count(tbcol2) from mt interval(10s)") tdLog.info("=============== step3") + start = time.time() tdSql.waitedQuery("select * from st", 1, 120) + delay = int(time.time() - start) + 20 v = tdSql.getData(0, 3) if v >= 51: tdLog.exit("value is %d, which is larger than 51" % v) @@ -54,11 +55,18 @@ class TDTestCase: tdSql.execute("insert into tb%d values(now + %ds, %d, %d)" % (i, j, j, j)) tdLog.info("=============== step5") - tdLog.sleep(40) - tdSql.waitedQuery("select * from st order by ts desc", 1, 120) - v = tdSql.getData(0, 3) - if v <= 51: - tdLog.exit("value is %d, which is smaller than 51" % v) + maxValue = 0 + for i in range(delay): + time.sleep(1) + tdSql.query("select * from st order by ts desc") + v = tdSql.getData(0, 3) + if v > maxValue: + maxValue = v + if v > 51: + break + + if maxValue <= 51: + tdLog.exit("value is %d, which is smaller than 51" % maxValue) def stop(self): tdSql.close() diff --git a/tests/pytest/table/create_sensitive.py b/tests/pytest/table/create_sensitive.py new file mode 100644 index 0000000000..1934b662c7 --- /dev/null +++ b/tests/pytest/table/create_sensitive.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- + +import sys +import string +import random +import subprocess +from util.log import * +from util.cases import * +from util.sql import * + + +class TDTestCase: + def init(self, conn, logSql): + tdLog.debug("start to execute %s" % __file__) + tdSql.init(conn.cursor(), logSql) + + def run(self): + tdSql.prepare() + + tdLog.info('=============== step1') + tdLog.info('create table TestSensitiveT(ts timestamp, i int)') + tdSql.execute('create table TestSensitiveT(ts timestamp, i int)') + tdLog.info('create table TestSensitiveSt(ts timestamp,i int) tags(j int)') + tdSql.execute('create table TestSensitiveSt(ts timestamp,i int) tags(j int)') + tdLog.info('create table Abcde using TestSensitiveSt tags(1)') + tdSql.execute('create table AbcdeFgh using TestSensitiveSt tags(1)') + tdLog.info('=============== step2') + tdLog.info('test normal table ') + tdSql.error('create table testsensitivet(ts timestamp, i int)') + tdSql.error('create table testsensitivet(ts timestamp, j int)') + tdSql.error('create table testsensItivet(ts timestamp, j int)') + tdSql.error('create table TESTSENSITIVET(ts timestamp, i int)') + tdLog.info('=============== step3') + tdLog.info('test super table ') + tdSql.error('create table testsensitivest(ts timestamp,i int) tags(j int)') + tdSql.error('create table testsensitivest(ts timestamp,i int) tags(k int)') + tdSql.error('create table TESTSENSITIVEST(ts timestamp,i int) tags(j int)') + tdSql.error('create table Testsensitivest(ts timestamp,i int) tags(j int)') + tdLog.info('=============== step4') + tdLog.info('test subtable ') + tdSql.error('create table abcdefgh using TestSensitiveSt tags(1)') + tdSql.error('create table ABCDEFGH using TestSensitiveSt tags(1)') + tdSql.error('create table Abcdefgh using TestSensitiveSt tags(1)') + tdSql.error('create table abcdeFgh using TestSensitiveSt tags(1)') + tdSql.error('insert into table abcdefgh using TestSensitiveSt tags(1) values(now,1)') + tdSql.error('insert into table ABCDEFGH using TestSensitiveSt tags(1) values(now,1)') + tdSql.error('insert into table Abcdefgh using TestSensitiveSt tags(1) values(now,1)') + tdSql.error('insert into table abcdeFgH using TestSensitiveSt tags(1) values(now,1)') + tdSql.query('show tables') + tdLog.info('tdSql.checkRow(0)') + tdSql.checkRows(2) + + + + + + def stop(self): + tdSql.close() + tdLog.success("%s successfully executed" % __file__) + + +tdCases.addWindows(__file__, TDTestCase()) +tdCases.addLinux(__file__, TDTestCase()) diff --git a/tests/script/general/parser/groupby.sim b/tests/script/general/parser/groupby.sim index bd0d3c1a12..b70fe88e81 100644 --- a/tests/script/general/parser/groupby.sim +++ b/tests/script/general/parser/groupby.sim @@ -27,7 +27,7 @@ $mt = $mtPrefix . $i $tstart = 100000 -sql drop database if exits $db -x step1 +sql drop database if exists $db -x step1 step1: sql create database if not exists $db keep 36500 sql use $db diff --git a/tests/script/general/parser/join.sim b/tests/script/general/parser/join.sim index 254571bda1..79b30ffe92 100644 --- a/tests/script/general/parser/join.sim +++ b/tests/script/general/parser/join.sim @@ -24,7 +24,7 @@ $mt = $mtPrefix . $i $tstart = 100000 -sql drop database if exits $db -x step1 +sql drop database if exists $db -x step1 step1: sql create database if not exists $db keep 36500 sql use $db diff --git a/tests/script/general/parser/join_multivnode.sim b/tests/script/general/parser/join_multivnode.sim index 51f1ef11c7..5968a9cd5e 100644 --- a/tests/script/general/parser/join_multivnode.sim +++ b/tests/script/general/parser/join_multivnode.sim @@ -22,7 +22,7 @@ $mt = $mtPrefix . $i $tstart = 100000 -sql drop database if exits $db -x step1 +sql drop database if exists $db -x step1 step1: sql create database if not exists $db keep 36500 sql use $db diff --git a/tests/script/general/parser/projection_limit_offset.sim b/tests/script/general/parser/projection_limit_offset.sim index fbff99d58f..127ade66c5 100644 --- a/tests/script/general/parser/projection_limit_offset.sim +++ b/tests/script/general/parser/projection_limit_offset.sim @@ -21,7 +21,7 @@ $mt = $mtPrefix . $i $tstart = 100000 -sql drop database if exits $db -x step1 +sql drop database if exists $db -x step1 step1: sql create database if not exists $db keep 36500 sql use $db diff --git a/tests/script/general/parser/sliding.sim b/tests/script/general/parser/sliding.sim index f85211beb8..ec0e31311a 100644 --- a/tests/script/general/parser/sliding.sim +++ b/tests/script/general/parser/sliding.sim @@ -26,7 +26,7 @@ $i = 0 $db = $dbPrefix . $i $mt = $mtPrefix . $i -sql drop database if exits $db -x step1 +sql drop database if exists $db -x step1 step1: sql create database if not exists $db maxtables 4 keep 36500 sql use $db diff --git a/tests/script/general/parser/testSuite.sim b/tests/script/general/parser/testSuite.sim index 3dd80b8e38..b848408925 100644 --- a/tests/script/general/parser/testSuite.sim +++ b/tests/script/general/parser/testSuite.sim @@ -1,51 +1,51 @@ -sleep 2000 -run general/parser/alter.sim -sleep 2000 -run general/parser/alter1.sim -sleep 2000 -run general/parser/alter_stable.sim -sleep 2000 -run general/parser/auto_create_tb.sim -sleep 2000 -run general/parser/auto_create_tb_drop_tb.sim -sleep 2000 -run general/parser/col_arithmetic_operation.sim -sleep 2000 -run general/parser/columnValue.sim -sleep 2000 -run general/parser/commit.sim -sleep 2000 -run general/parser/create_db.sim -sleep 2000 -run general/parser/create_mt.sim -sleep 2000 -run general/parser/create_tb.sim -sleep 2000 -run general/parser/dbtbnameValidate.sim -sleep 2000 -run general/parser/fill.sim -sleep 2000 -run general/parser/fill_stb.sim -sleep 2000 -#run general/parser/fill_us.sim # -sleep 2000 -run general/parser/first_last.sim -sleep 2000 -run general/parser/import_commit1.sim -sleep 2000 -run general/parser/import_commit2.sim -sleep 2000 -run general/parser/import_commit3.sim -sleep 2000 -#run general/parser/import_file.sim -sleep 2000 -run general/parser/insert_tb.sim -sleep 2000 -run general/parser/tags_dynamically_specifiy.sim -sleep 2000 -run general/parser/interp.sim -sleep 2000 -run general/parser/lastrow.sim +#sleep 2000 +#run general/parser/alter.sim +#sleep 2000 +#run general/parser/alter1.sim +#sleep 2000 +#run general/parser/alter_stable.sim +#sleep 2000 +#run general/parser/auto_create_tb.sim +#sleep 2000 +#run general/parser/auto_create_tb_drop_tb.sim +#sleep 2000 +#run general/parser/col_arithmetic_operation.sim +#sleep 2000 +#run general/parser/columnValue.sim +#sleep 2000 +#run general/parser/commit.sim +#sleep 2000 +#run general/parser/create_db.sim +#sleep 2000 +#run general/parser/create_mt.sim +#sleep 2000 +#run general/parser/create_tb.sim +#sleep 2000 +#run general/parser/dbtbnameValidate.sim +#sleep 2000 +#run general/parser/fill.sim +#sleep 2000 +#run general/parser/fill_stb.sim +#sleep 2000 +##run general/parser/fill_us.sim # +#sleep 2000 +#run general/parser/first_last.sim +#sleep 2000 +#run general/parser/import_commit1.sim +#sleep 2000 +#run general/parser/import_commit2.sim +#sleep 2000 +#run general/parser/import_commit3.sim +#sleep 2000 +##run general/parser/import_file.sim +#sleep 2000 +#run general/parser/insert_tb.sim +#sleep 2000 +#run general/parser/tags_dynamically_specifiy.sim +#sleep 2000 +#run general/parser/interp.sim +#sleep 2000 +#run general/parser/lastrow.sim sleep 2000 run general/parser/limit.sim sleep 2000 diff --git a/tests/script/general/parser/union.sim b/tests/script/general/parser/union.sim index 4af482bde0..024b9c76ef 100644 --- a/tests/script/general/parser/union.sim +++ b/tests/script/general/parser/union.sim @@ -27,7 +27,7 @@ $j = 1 $mt1 = $mtPrefix . $j -sql drop database if exits $db -x step1 +sql drop database if exists $db -x step1 step1: sql create database if not exists $db sql use $db diff --git a/tests/script/general/parser/where.sim b/tests/script/general/parser/where.sim index 5cac3f4723..066fac43ad 100644 --- a/tests/script/general/parser/where.sim +++ b/tests/script/general/parser/where.sim @@ -20,7 +20,7 @@ $i = 0 $db = $dbPrefix . $i $mt = $mtPrefix . $i -sql drop database if exits $db -x step1 +sql drop database if exists $db -x step1 step1: sql create database if not exists $db sql use $db