diff --git a/cmake/curl_CMakeLists.txt.in b/cmake/curl_CMakeLists.txt.in index af7f005dda..197d978fb7 100644 --- a/cmake/curl_CMakeLists.txt.in +++ b/cmake/curl_CMakeLists.txt.in @@ -1,6 +1,7 @@ # curl ExternalProject_Add(curl2 - URL https://curl.se/download/curl-8.2.1.tar.gz + URL https://github.com/curl/curl/releases/download/curl-8_2_1/curl-8.2.1.tar.gz + #URL https://curl.se/download/curl-8.2.1.tar.gz URL_HASH MD5=b25588a43556068be05e1624e0e74d41 DOWNLOAD_NO_PROGRESS 1 DOWNLOAD_DIR "${TD_CONTRIB_DIR}/deps-download" diff --git a/include/dnode/vnode/tqCommon.h b/include/dnode/vnode/tqCommon.h index 75dafcdbff..da2325f006 100644 --- a/include/dnode/vnode/tqCommon.h +++ b/include/dnode/vnode/tqCommon.h @@ -30,7 +30,8 @@ int32_t tqStreamTaskProcessCheckpointReadyMsg(SStreamMeta* pMeta, SRpcMsg* pMsg) int32_t tqStreamTaskProcessDeployReq(SStreamMeta* pMeta, int64_t sversion, char* msg, int32_t msgLen, bool isLeader, bool restored); int32_t tqStreamTaskProcessDropReq(SStreamMeta* pMeta, char* msg, int32_t msgLen); int32_t tqStreamTaskProcessRunReq(SStreamMeta* pMeta, SRpcMsg* pMsg, bool isLeader); -int32_t startStreamTasks(SStreamMeta* pMeta); -int32_t resetStreamTaskStatus(SStreamMeta* pMeta); +int32_t tqStreamTaskResetStatus(SStreamMeta* pMeta); +int32_t tqStartTaskCompleteCallback(SStreamMeta* pMeta); +int32_t tqStreamTaskProcessTaskResetReq(SStreamMeta* pMeta, SRpcMsg* pMsg); #endif // TDENGINE_TQ_COMMON_H diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 918e50a9d0..2d2db5c1dc 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -53,6 +53,7 @@ extern "C" { #define STREAM_EXEC_EXTRACT_DATA_IN_WAL_ID (-1) #define STREAM_EXEC_START_ALL_TASKS_ID (-2) #define STREAM_EXEC_RESTART_ALL_TASKS_ID (-3) +#define STREAM_EXEC_STOP_ALL_TASKS_ID (-4) typedef struct SStreamTask SStreamTask; typedef struct SStreamQueue SStreamQueue; @@ -314,6 +315,7 @@ typedef struct SCheckpointInfo { int32_t checkpointNotReadyTasks; bool dispatchCheckpointTrigger; int64_t msgVer; + int32_t transId; } SCheckpointInfo; typedef struct SStreamStatus { @@ -324,6 +326,7 @@ typedef struct SStreamStatus { int8_t keepTaskStatus; bool appendTranstateBlock; // has append the transfer state data block already, todo: remove it int32_t timerActive; // timer is active + int8_t allowedAddInTimer; // allowed to add into timer int32_t inScanHistorySentinel; } SStreamStatus; @@ -460,14 +463,18 @@ struct SStreamTask { char reserve[256]; }; +typedef int32_t (*startComplete_fn_t)(struct SStreamMeta*); + typedef struct STaskStartInfo { int64_t startTs; int64_t readyTs; int32_t tasksWillRestart; - int32_t taskStarting; // restart flag, sentinel to guard the restart procedure. - SHashObj* pReadyTaskSet; // tasks that are all ready for running stream processing - SHashObj* pFailedTaskSet; // tasks that are done the check downstream process, may be successful or failed + int32_t taskStarting; // restart flag, sentinel to guard the restart procedure. + SHashObj* pReadyTaskSet; // tasks that are all ready for running stream processing + SHashObj* pFailedTaskSet; // tasks that are done the check downstream process, may be successful or failed int64_t elapsedTime; + int32_t restartCount; // restart task counter + startComplete_fn_t completeFn; // complete callback function } STaskStartInfo; typedef struct STaskUpdateInfo { @@ -489,8 +496,9 @@ typedef struct SStreamMeta { int32_t vgId; int64_t stage; int32_t role; + bool sendMsgBeforeClosing; // send hb to mnode before close all tasks when switch to follower. STaskStartInfo startInfo; - SRWLatch lock; + TdThreadRwlock lock; int32_t walScanCounter; void* streamBackend; int64_t streamBackendRid; @@ -629,6 +637,7 @@ typedef struct { int32_t nodeId; SEpSet mgmtEps; int32_t mnodeId; + int32_t transId; int64_t expireTime; } SStreamCheckpointSourceReq; @@ -671,8 +680,8 @@ typedef struct STaskStatusEntry { int64_t verStart; // start version in WAL, only valid for source task int64_t verEnd; // end version in WAL, only valid for source task int64_t processedVer; // only valid for source task - int32_t relatedHTask; // has related fill-history task int64_t activeCheckpointId; // current active checkpoint id + int32_t chkpointTransId; // checkpoint trans id bool checkpointFailed; // denote if the checkpoint is failed or not bool inputQChanging; // inputQ is changing or not int64_t inputQUnchangeCounter; @@ -811,6 +820,7 @@ int32_t streamTaskReleaseState(SStreamTask* pTask); int32_t streamTaskReloadState(SStreamTask* pTask); void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId); void streamTaskOpenAllUpstreamInput(SStreamTask* pTask); +int32_t streamTaskSetDb(SStreamMeta* pMeta, void* pTask, char* key); void streamTaskStatusInit(STaskStatusEntry* pEntry, const SStreamTask* pTask); void streamTaskStatusCopy(STaskStatusEntry* pDst, const STaskStatusEntry* pSrc); @@ -828,22 +838,21 @@ int32_t streamProcessScanHistoryFinishRsp(SStreamTask* pTask); // stream task meta void streamMetaInit(); void streamMetaCleanup(); -SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId, int64_t stage); +SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId, int64_t stage, startComplete_fn_t fn); void streamMetaClose(SStreamMeta* streamMeta); int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask); // save to stream meta store int32_t streamMetaRemoveTask(SStreamMeta* pMeta, STaskId* pKey); int32_t streamMetaRegisterTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask, bool* pAdded); int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId); int32_t streamMetaGetNumOfTasks(SStreamMeta* pMeta); +SStreamTask* streamMetaAcquireTaskNoLock(SStreamMeta* pMeta, int64_t streamId, int32_t taskId); SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId); void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask); void streamMetaClear(SStreamMeta* pMeta); void streamMetaInitBackend(SStreamMeta* pMeta); int32_t streamMetaCommit(SStreamMeta* pMeta); -int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta); int64_t streamMetaGetLatestCheckpointId(SStreamMeta* pMeta); void streamMetaNotifyClose(SStreamMeta* pMeta); -int32_t streamTaskSetDb(SStreamMeta* pMeta, void* pTask, char* key); void streamMetaStartHb(SStreamMeta* pMeta); bool streamMetaTaskInTimer(SStreamMeta* pMeta); int32_t streamMetaUpdateTaskDownstreamStatus(SStreamMeta* pMeta, int64_t streamId, int32_t taskId, int64_t startTs, @@ -853,6 +862,11 @@ void streamMetaRUnLock(SStreamMeta* pMeta); void streamMetaWLock(SStreamMeta* pMeta); void streamMetaWUnLock(SStreamMeta* pMeta); void streamMetaResetStartInfo(STaskStartInfo* pMeta); +SArray* streamMetaSendMsgBeforeCloseTasks(SStreamMeta* pMeta); +void streamMetaUpdateStageRole(SStreamMeta* pMeta, int64_t stage, bool isLeader); +int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta); +int32_t streamMetaStartAllTasks(SStreamMeta* pMeta); +int32_t streamMetaStopAllTasks(SStreamMeta* pMeta); // checkpoint int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSourceReq* pReq); diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index fd88098b03..fdac3882c3 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -1432,6 +1432,13 @@ static int32_t taosCfgSetOption(OptionNameAndVar *pOptions, int32_t optionSize, static int32_t taosCfgDynamicOptionsForServer(SConfig *pCfg, char *name) { terrno = TSDB_CODE_SUCCESS; + if (strcasecmp(name, "resetlog") == 0) { + // trigger, no item in cfg + taosResetLog(); + cfgDumpCfg(tsCfg, 0, false); + return 0; + } + SConfigItem *pItem = cfgGetItem(pCfg, name); if (!pItem || (pItem->dynScope & CFG_DYN_SERVER) == 0) { uError("failed to config:%s, not support", name); @@ -1445,12 +1452,6 @@ static int32_t taosCfgDynamicOptionsForServer(SConfig *pCfg, char *name) { return 0; } - if (strcasecmp(name, "resetlog") == 0) { - taosResetLog(); - cfgDumpCfg(tsCfg, 0, false); - return 0; - } - { // 'bool/int32_t/int64_t/float/double' variables with general modification function static OptionNameAndVar debugOptions[] = { {"dDebugFlag", &dDebugFlag}, {"vDebugFlag", &vDebugFlag}, {"mDebugFlag", &mDebugFlag}, @@ -1774,4 +1775,4 @@ void taosSetAllDebugFlag(int32_t flag, bool rewrite) { uInfo("all debug flag are set to %d", flag); } -int8_t taosGranted() { return atomic_load_8(&tsGrant); } \ No newline at end of file +int8_t taosGranted() { return atomic_load_8(&tsGrant); } diff --git a/source/dnode/mgmt/mgmt_snode/src/smHandle.c b/source/dnode/mgmt/mgmt_snode/src/smHandle.c index 6de29f8513..444739e461 100644 --- a/source/dnode/mgmt/mgmt_snode/src/smHandle.c +++ b/source/dnode/mgmt/mgmt_snode/src/smHandle.c @@ -90,7 +90,7 @@ SArray *smGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_SCAN_HISTORY_FINISH, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_SCAN_HISTORY_FINISH_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECKPOINT_READY, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; - + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_RESET, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; code = 0; _OVER: diff --git a/source/dnode/mnode/impl/inc/mndStream.h b/source/dnode/mnode/impl/inc/mndStream.h index b0f6273acc..4d06b16297 100644 --- a/source/dnode/mnode/impl/inc/mndStream.h +++ b/source/dnode/mnode/impl/inc/mndStream.h @@ -44,12 +44,11 @@ typedef struct SStreamTransMgmt { } SStreamTransMgmt; typedef struct SStreamExecInfo { - SArray * pNodeList; + SArray *pNodeList; int64_t ts; // snapshot ts SStreamTransMgmt transMgmt; - int64_t activeCheckpoint; // active check point id - SHashObj * pTaskMap; - SArray * pTaskList; + SHashObj *pTaskMap; + SArray *pTaskList; TdThreadMutex lock; } SStreamExecInfo; diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 62840f7e1f..25f51b85df 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -62,7 +62,7 @@ static void mndCancelGetNextStreamTask(SMnode *pMnode, void *pIter); static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq); static int32_t mndProcessResumeStreamReq(SRpcMsg *pReq); static int32_t mndBuildStreamCheckpointSourceReq2(void **pBuf, int32_t *pLen, int32_t nodeId, int64_t checkpointId, - int64_t streamId, int32_t taskId); + int64_t streamId, int32_t taskId, int32_t transId); static int32_t mndProcessNodeCheck(SRpcMsg *pReq); static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg); static SArray *extractNodeListFromStream(SMnode *pMnode); @@ -685,6 +685,40 @@ _OVER: return -1; } +static int32_t extractNodeEpset(SMnode *pMnode, SEpSet *pEpSet, bool* hasEpset, int32_t taskId, int32_t nodeId) { + *hasEpset = false; + + if (nodeId == SNODE_HANDLE) { + SSnodeObj *pObj = NULL; + void *pIter = NULL; + + pIter = sdbFetch(pMnode->pSdb, SDB_SNODE, pIter, (void **)&pObj); + if (pIter != NULL) { + addEpIntoEpSet(pEpSet, pObj->pDnode->fqdn, pObj->pDnode->port); + sdbRelease(pMnode->pSdb, pObj); + sdbCancelFetch(pMnode->pSdb, pIter); + *hasEpset = true; + return TSDB_CODE_SUCCESS; + } else { + mError("failed to acquire snode epset"); + return TSDB_CODE_INVALID_PARA; + } + } else { + SVgObj *pVgObj = mndAcquireVgroup(pMnode, nodeId); + if (pVgObj != NULL) { + SEpSet epset = mndGetVgroupEpset(pMnode, pVgObj); + mndReleaseVgroup(pMnode, pVgObj); + + epsetAssign(pEpSet, &epset); + *hasEpset = true; + return TSDB_CODE_SUCCESS; + } else { + mDebug("orphaned task:0x%x need to be dropped, nodeId:%d, no redo action", taskId, nodeId); + return TSDB_CODE_SUCCESS; + } + } +} + static int32_t mndPersistTaskDropReq(SMnode *pMnode, STrans *pTrans, SStreamTask *pTask) { SVDropStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVDropStreamTaskReq)); if (pReq == NULL) { @@ -698,28 +732,17 @@ static int32_t mndPersistTaskDropReq(SMnode *pMnode, STrans *pTrans, SStreamTask STransAction action = {0}; SEpSet epset = {0}; - if (pTask->info.nodeId == SNODE_HANDLE) { - SSnodeObj *pObj = NULL; - void *pIter = NULL; - while (1) { - pIter = sdbFetch(pMnode->pSdb, SDB_SNODE, pIter, (void **)&pObj); - if (pIter == NULL) { - break; - } + bool hasEpset = false; - addEpIntoEpSet(&epset, pObj->pDnode->fqdn, pObj->pDnode->port); - sdbRelease(pMnode->pSdb, pObj); - } - } else { - SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->info.nodeId); - if (pVgObj != NULL) { - epset = mndGetVgroupEpset(pMnode, pVgObj); - mndReleaseVgroup(pMnode, pVgObj); - } else { - mDebug("orphaned task:0x%x need to be dropped, nodeId:%d, no redo action", pTask->id.taskId, pTask->info.nodeId); - taosMemoryFree(pReq); - return 0; - } + int32_t code = extractNodeEpset(pMnode, &epset, &hasEpset, pTask->id.taskId, pTask->info.nodeId); + if (code != TSDB_CODE_SUCCESS) { + terrno = code; + return -1; + } + + // no valid epset, return directly without redoAction + if (!hasEpset) { + return TSDB_CODE_SUCCESS; } // The epset of nodeId of this task may have been expired now, let's use the newest epset from mnode. @@ -974,13 +997,14 @@ static int32_t mndProcessStreamRemainChkptTmr(SRpcMsg *pReq) { } static int32_t mndBuildStreamCheckpointSourceReq2(void **pBuf, int32_t *pLen, int32_t nodeId, int64_t checkpointId, - int64_t streamId, int32_t taskId) { + int64_t streamId, int32_t taskId, int32_t transId) { SStreamCheckpointSourceReq req = {0}; req.checkpointId = checkpointId; req.nodeId = nodeId; req.expireTime = -1; req.streamId = streamId; // pTask->id.streamId; req.taskId = taskId; // pTask->id.taskId; + req.transId = transId; int32_t code; int32_t blen; @@ -1070,7 +1094,7 @@ static int32_t mndProcessStreamCheckpointTrans(SMnode *pMnode, SStreamObj *pStre void * buf; int32_t tlen; if (mndBuildStreamCheckpointSourceReq2(&buf, &tlen, pTask->info.nodeId, checkpointId, pTask->id.streamId, - pTask->id.taskId) < 0) { + pTask->id.taskId, pTrans->id) < 0) { mndReleaseVgroup(pMnode, pVgObj); taosWUnLockLatch(&pStream->lock); goto _ERR; @@ -1139,7 +1163,7 @@ static int32_t mndAddStreamCheckpointToTrans(STrans *pTrans, SStreamObj *pStream void * buf; int32_t tlen; if (mndBuildStreamCheckpointSourceReq2(&buf, &tlen, pTask->info.nodeId, chkptId, pTask->id.streamId, - pTask->id.taskId) < 0) { + pTask->id.taskId, pTrans->id) < 0) { mndReleaseVgroup(pMnode, pVgObj); taosWUnLockLatch(&pStream->lock); return -1; @@ -1776,9 +1800,20 @@ static int32_t mndPauseStreamTask(SMnode *pMnode, STrans *pTrans, SStreamTask *p pReq->taskId = pTask->id.taskId; pReq->streamId = pTask->id.streamId; - SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->info.nodeId); - SEpSet epset = mndGetVgroupEpset(pMnode, pVgObj); - mndReleaseVgroup(pMnode, pVgObj); + SEpSet epset; + bool hasEpset = false; + int32_t code = extractNodeEpset(pMnode, &epset, &hasEpset, pTask->id.taskId, pTask->info.nodeId); + if (code != TSDB_CODE_SUCCESS) { + terrno = code; + taosMemoryFree(pReq); + return -1; + } + + // no valid epset, return directly without redoAction + if (!hasEpset) { + taosMemoryFree(pReq); + return TSDB_CODE_SUCCESS; + } STransAction action = {0}; initTransAction(&action, pReq, sizeof(SVPauseStreamTaskReq), TDMT_STREAM_TASK_PAUSE, &epset, 0); @@ -1920,17 +1955,25 @@ static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq) { static int32_t mndResumeStreamTask(STrans *pTrans, SMnode *pMnode, SStreamTask *pTask, int8_t igUntreated) { SVResumeStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVResumeStreamTaskReq)); if (pReq == NULL) { + mError("failed to malloc in resume stream, size:%" PRIzu ", code:%s", sizeof(SVResumeStreamTaskReq), + tstrerror(TSDB_CODE_OUT_OF_MEMORY)); terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } + pReq->head.vgId = htonl(pTask->info.nodeId); pReq->taskId = pTask->id.taskId; pReq->streamId = pTask->id.streamId; pReq->igUntreated = igUntreated; - SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->info.nodeId); - SEpSet epset = mndGetVgroupEpset(pMnode, pVgObj); - mndReleaseVgroup(pMnode, pVgObj); + SEpSet epset; + bool hasEpset = false; + int32_t code = extractNodeEpset(pMnode, &epset, &hasEpset, pTask->id.taskId, pTask->info.nodeId); + if (code != TSDB_CODE_SUCCESS) { + terrno = code; + taosMemoryFree(pReq); + return -1; + } STransAction action = {0}; initTransAction(&action, pReq, sizeof(SVResumeStreamTaskReq), TDMT_STREAM_TASK_RESUME, &epset, 0); @@ -2208,6 +2251,8 @@ static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pP epsetAssign(&updateInfo.newEp, &pCurrent->epset); taosArrayPush(info.pUpdateNodeList, &updateInfo); } + + // todo handle the snode info if (pCurrent->nodeId != SNODE_HANDLE) { SVgObj *pVgroup = mndAcquireVgroup(pMnode, pCurrent->nodeId); taosHashPut(info.pDBMap, pVgroup->dbName, strlen(pVgroup->dbName), NULL, 0); @@ -2286,12 +2331,28 @@ static SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool *allReady) { } static int32_t mndProcessVgroupChange(SMnode *pMnode, SVgroupChangeInfo *pChangeInfo) { - SSdb *pSdb = pMnode->pSdb; - - // check all streams that involved this vnode should update the epset info + SSdb *pSdb = pMnode->pSdb; SStreamObj *pStream = NULL; - void * pIter = NULL; - STrans * pTrans = NULL; + void *pIter = NULL; + STrans *pTrans = NULL; + + // conflict check for nodeUpdate trans, here we randomly chose one stream to add into the trans pool + while(1) { + pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); + if (pIter == NULL) { + break; + } + + bool conflict = streamTransConflictOtherTrans(pMnode, pStream->uid, MND_STREAM_TASK_UPDATE_NAME, false); + sdbRelease(pSdb, pStream); + + if (conflict) { + mWarn("nodeUpdate trans in progress, current nodeUpdate ignored"); + sdbCancelFetch(pSdb, pIter); + return -1; + } + } + while (1) { pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); @@ -2307,6 +2368,8 @@ static int32_t mndProcessVgroupChange(SMnode *pMnode, SVgroupChangeInfo *pChange sdbCancelFetch(pSdb, pIter); return terrno; } + + mndStreamRegisterTrans(pTrans, MND_STREAM_TASK_RESET_NAME, pStream->uid); } void *p = taosHashGet(pChangeInfo->pDBMap, pStream->targetDb, strlen(pStream->targetDb)); @@ -2553,7 +2616,7 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { code = mndProcessVgroupChange(pMnode, &changeInfo); - // keep the new vnode snapshot + // keep the new vnode snapshot if success if (code == TSDB_CODE_SUCCESS || code == TSDB_CODE_ACTION_IN_PROGRESS) { mDebug("create trans successfully, update cached node list"); taosArrayDestroy(execInfo.pNodeList); @@ -2704,9 +2767,18 @@ int32_t createStreamResetStatusTrans(SMnode *pMnode, SStreamObj *pStream) { pReq->taskId = pTask->id.taskId; pReq->streamId = pTask->id.streamId; - SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->info.nodeId); - SEpSet epset = mndGetVgroupEpset(pMnode, pVgObj); - mndReleaseVgroup(pMnode, pVgObj); + SEpSet epset; + bool hasEpset = false; + int32_t code = extractNodeEpset(pMnode, &epset, &hasEpset, pTask->id.taskId, pTask->info.nodeId); + if (code != TSDB_CODE_SUCCESS) { + taosMemoryFree(pReq); + continue; + } + + if (!hasEpset) { + taosMemoryFree(pReq); + continue; + } STransAction action = {0}; initTransAction(&action, pReq, sizeof(SVResetStreamTaskReq), TDMT_VND_STREAM_TASK_RESET, &epset, 0); @@ -2766,39 +2838,36 @@ int32_t killActiveCheckpointTrans(SMnode *pMnode, const char *pDBName, size_t le return TSDB_CODE_SUCCESS; } -static int32_t mndResetStatusFromCheckpoint(SMnode *pMnode, int32_t transId) { +static int32_t mndResetStatusFromCheckpoint(SMnode *pMnode, int64_t streamId, int32_t transId) { + int32_t code = TSDB_CODE_SUCCESS; + STrans *pTrans = mndAcquireTrans(pMnode, transId); if (pTrans != NULL) { mInfo("kill checkpoint transId:%d to reset task status", transId); mndKillTrans(pMnode, pTrans); mndReleaseTrans(pMnode, pTrans); + } else { + mError("failed to acquire checkpoint trans:%d", transId); } - // set all tasks status to be normal, refactor later to be stream level, instead of vnode level. - SSdb * pSdb = pMnode->pSdb; - SStreamObj *pStream = NULL; - void * pIter = NULL; - while (1) { - pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); - if (pIter == NULL) { - break; - } - + SStreamObj *pStream = mndGetStreamObj(pMnode, streamId); + if (pStream == NULL) { + code = TSDB_CODE_STREAM_TASK_NOT_EXIST; + mError("failed to acquire the streamObj:0x%" PRIx64 " to reset checkpoint, may have been dropped", pStream->uid); + } else { bool conflict = streamTransConflictOtherTrans(pMnode, pStream->uid, MND_STREAM_TASK_RESET_NAME, false); if (conflict) { - mError("stream:%s other trans exists in DB:%s & %s failed to start reset-status trans", pStream->name, - pStream->sourceDb, pStream->targetDb); - continue; - } - - mDebug("stream:%s (0x%" PRIx64 ") reset checkpoint procedure, create reset trans", pStream->name, pStream->uid); - int32_t code = createStreamResetStatusTrans(pMnode, pStream); - if (code != TSDB_CODE_SUCCESS) { - sdbCancelFetch(pSdb, pIter); - return code; + mError("stream:%s other trans exists in DB:%s, dstTable:%s failed to start reset-status trans", pStream->name, + pStream->sourceDb, pStream->targetSTbName); + } else { + mDebug("stream:%s (0x%" PRIx64 ") reset checkpoint procedure, transId:%d, create reset trans", pStream->name, + pStream->uid, transId); + code = createStreamResetStatusTrans(pMnode, pStream); + mndReleaseStream(pMnode, pStream); } } - return 0; + + return code; } static SStreamTask *mndGetStreamTask(STaskId *pId, SStreamObj *pStream) { @@ -2878,11 +2947,17 @@ static int32_t mndDropRelatedFillhistoryTask(SMnode *pMnode, STaskStatusEntry *p mDebug("build and send drop related fill-history task for task:0x%x", pTask->id.taskId); - SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->info.nodeId); - SEpSet epset = mndGetVgroupEpset(pMnode, pVgObj); - mndReleaseVgroup(pMnode, pVgObj); + SEpSet epset; + bool hasEpset = false; + int32_t code = extractNodeEpset(pMnode, &epset, &hasEpset, pTask->id.taskId, pTask->info.nodeId); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + if (hasEpset) { + tmsgSendReq(&epset, &msg); + } - tmsgSendReq(&epset, &msg); return TSDB_CODE_SUCCESS; } @@ -2930,6 +3005,8 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { bool checkpointFailed = false; int64_t activeCheckpointId = 0; + int64_t streamId = 0; + int32_t transId = 0; SDecoder decoder = {0}; tDecoderInit(&decoder, pReq->pCont, pReq->contLen); @@ -2972,7 +3049,9 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { if (pTaskEntry->stage != p->stage && pTaskEntry->stage != -1) { updateStageInfo(pTaskEntry, p->stage); - if (pTaskEntry->nodeId == SNODE_HANDLE) snodeChanged = true; + if (pTaskEntry->nodeId == SNODE_HANDLE) { + snodeChanged = true; + } } else { // task is idle for more than 50 sec. if (fabs(pTaskEntry->inputQUsed - p->inputQUsed) <= DBL_EPSILON) { @@ -2996,6 +3075,8 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { if (p->checkpointFailed) { checkpointFailed = p->checkpointFailed; + streamId = p->id.streamId; + transId = p->chkpointTransId; } } } @@ -3034,9 +3115,8 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { if (allReady || snodeChanged) { // if the execInfo.activeCheckpoint == 0, the checkpoint is restoring from wal - mInfo("checkpointId:%" PRId64 " failed, issue task-reset trans to reset all tasks status", - execInfo.activeCheckpoint); - mndResetStatusFromCheckpoint(pMnode, activeCheckpointId); + mInfo("checkpointId:%" PRId64 " failed, issue task-reset trans to reset all tasks status", activeCheckpointId); + mndResetStatusFromCheckpoint(pMnode, streamId, transId); } else { mInfo("not all vgroups are ready, wait for next HB from stream tasks"); } diff --git a/source/dnode/mnode/impl/src/mndStreamTrans.c b/source/dnode/mnode/impl/src/mndStreamTrans.c index 0db3cb0a8a..8f94843500 100644 --- a/source/dnode/mnode/impl/src/mndStreamTrans.c +++ b/source/dnode/mnode/impl/src/mndStreamTrans.c @@ -90,16 +90,20 @@ bool streamTransConflictOtherTrans(SMnode* pMnode, int64_t streamUid, const char if (strcmp(tInfo.name, MND_STREAM_CHECKPOINT_NAME) == 0) { if (strcmp(pTransName, MND_STREAM_DROP_NAME) != 0) { - mWarn("conflict with other transId:%d streamUid:%" PRIx64 ", trans:%s", tInfo.transId, tInfo.streamUid, + mWarn("conflict with other transId:%d streamUid:0x%" PRIx64 ", trans:%s", tInfo.transId, tInfo.streamUid, tInfo.name); return true; + } else { + mDebug("not conflict with checkpoint trans, name:%s, continue create trans", pTransName); } - } else if ((strcmp(tInfo.name, MND_STREAM_CREATE_NAME) == 0) || - (strcmp(tInfo.name, MND_STREAM_DROP_NAME) == 0)) { - mWarn("conflict with other transId:%d streamUid:%" PRIx64 ", trans:%s", tInfo.transId, tInfo.streamUid, + } else if ((strcmp(tInfo.name, MND_STREAM_CREATE_NAME) == 0) || (strcmp(tInfo.name, MND_STREAM_DROP_NAME) == 0) || + (strcmp(tInfo.name, MND_STREAM_TASK_RESET_NAME) == 0)) { + mWarn("conflict with other transId:%d streamUid:0x%" PRIx64 ", trans:%s", tInfo.transId, tInfo.streamUid, tInfo.name); return true; } + } else { + mDebug("stream:0x%"PRIx64" no conflict trans existed, continue create trans", streamUid); } if (lock) { diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index 79a89cb56e..33eb9cd3ed 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -125,7 +125,7 @@ SSnode *sndOpen(const char *path, const SSnodeOpt *pOption) { } pSnode->msgCb = pOption->msgCb; - pSnode->pMeta = streamMetaOpen(path, pSnode, (FTaskExpand *)sndExpandTask, SNODE_HANDLE, taosGetTimestampMs()); + pSnode->pMeta = streamMetaOpen(path, pSnode, (FTaskExpand *)sndExpandTask, SNODE_HANDLE, taosGetTimestampMs(), tqStartTaskCompleteCallback); if (pSnode->pMeta == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; goto FAIL; @@ -147,8 +147,8 @@ FAIL: } int32_t sndInit(SSnode *pSnode) { - resetStreamTaskStatus(pSnode->pMeta); - startStreamTasks(pSnode->pMeta); + tqStreamTaskResetStatus(pSnode->pMeta); + streamMetaStartAllTasks(pSnode->pMeta); return 0; } @@ -202,6 +202,8 @@ int32_t sndProcessWriteMsg(SSnode *pSnode, SRpcMsg *pMsg, SRpcMsg *pRsp) { return tqStreamTaskProcessDropReq(pSnode->pMeta, pMsg->pCont, pMsg->contLen); case TDMT_VND_STREAM_TASK_UPDATE: return tqStreamTaskProcessUpdateReq(pSnode->pMeta, &pSnode->msgCb, pMsg, true); + case TDMT_VND_STREAM_TASK_RESET: + return tqStreamTaskProcessTaskResetReq(pSnode->pMeta, pMsg); default: ASSERT(0); } diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index cf57623a43..0ef29fcb3a 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -152,9 +152,6 @@ void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data); char* tqOffsetBuildFName(const char* path, int32_t fVer); int32_t tqOffsetRestoreFromFile(STqOffsetStore* pStore, const char* fname); -// tqStream -int32_t tqStopStreamTasks(STQ* pTq); - // tq util int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, void** pRefBlock, int32_t type); int32_t tqExtractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg); diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index f3b495675d..84f51827ba 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -232,6 +232,7 @@ int tqPushMsg(STQ*, tmsg_t msgType); int tqRegisterPushHandle(STQ* pTq, void* handle, SRpcMsg* pMsg); int tqUnregisterPushHandle(STQ* pTq, void* pHandle); int tqScanWalAsync(STQ* pTq, bool ckPause); +int32_t tqStopStreamTasksAsync(STQ* pTq); int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp); int32_t tqProcessTaskCheckpointReadyMsg(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg); diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index abe4c3f2fc..4963a0aa3b 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -240,23 +240,23 @@ int32_t tdFetchTbUidList(SSma *pSma, STbUidStore **ppStore, tb_uid_t suid, tb_ui static void tdRSmaTaskInit(SStreamMeta *pMeta, SRSmaInfoItem *pItem, SStreamTaskId *pId) { STaskId id = {.streamId = pId->streamId, .taskId = pId->taskId}; - taosRLockLatch(&pMeta->lock); + streamMetaRLock(pMeta); SStreamTask **ppTask = (SStreamTask **)taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); if (ppTask && *ppTask) { pItem->submitReqVer = (*ppTask)->chkInfo.checkpointVer; pItem->fetchResultVer = (*ppTask)->info.triggerParam; } - taosRUnLockLatch(&pMeta->lock); + streamMetaRUnLock(pMeta); } static void tdRSmaTaskRemove(SStreamMeta *pMeta, int64_t streamId, int32_t taskId) { streamMetaUnregisterTask(pMeta, streamId, taskId); - taosWLockLatch(&pMeta->lock); + streamMetaWLock(pMeta); int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); if (streamMetaCommit(pMeta) < 0) { // persist to disk } - taosWUnLockLatch(&pMeta->lock); + streamMetaWUnLock(pMeta); smaDebug("vgId:%d, rsma task:%" PRIi64 ",%d dropped, remain tasks:%d", pMeta->vgId, streamId, taskId, numOfTasks); } @@ -1301,14 +1301,14 @@ _checkpoint: checkpointBuilt = true; } - taosWLockLatch(&pMeta->lock); + streamMetaWLock(pMeta); if (streamMetaSaveTask(pMeta, pTask)) { - taosWUnLockLatch(&pMeta->lock); + streamMetaWUnLock(pMeta); code = terrno ? terrno : TSDB_CODE_OUT_OF_MEMORY; taosHashCancelIterate(pInfoHash, infoHash); TSDB_CHECK_CODE(code, lino, _exit); } - taosWUnLockLatch(&pMeta->lock); + streamMetaWUnLock(pMeta); smaDebug("vgId:%d, rsma commit, succeed to commit task:%p, submitReqVer:%" PRIi64 ", fetchResultVer:%" PRIi64 ", table:%" PRIi64 ", level:%d", TD_VID(pVnode), pTask, pItem->submitReqVer, pItem->fetchResultVer, pRSmaInfo->suid, i + 1); @@ -1316,13 +1316,13 @@ _checkpoint: } } if (pMeta) { - taosWLockLatch(&pMeta->lock); + streamMetaWLock(pMeta); if (streamMetaCommit(pMeta)) { - taosWUnLockLatch(&pMeta->lock); + streamMetaWUnLock(pMeta); code = terrno ? terrno : TSDB_CODE_OUT_OF_MEMORY; TSDB_CHECK_CODE(code, lino, _exit); } - taosWUnLockLatch(&pMeta->lock); + streamMetaWUnLock(pMeta); } if (checkpointBuilt) { smaInfo("vgId:%d, rsma commit, succeed to commit checkpoint:%" PRIi64, TD_VID(pVnode), checkpointId); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 138c58b45f..c8cc4f9d54 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -96,7 +96,8 @@ int32_t tqInitialize(STQ* pTq) { return -1; } - pTq->pStreamMeta = streamMetaOpen(pTq->path, pTq, (FTaskExpand*)tqExpandTask, pTq->pVnode->config.vgId, -1); + int32_t vgId = TD_VID(pTq->pVnode); + pTq->pStreamMeta = streamMetaOpen(pTq->path, pTq, (FTaskExpand*)tqExpandTask, vgId, -1, tqStartTaskCompleteCallback); if (pTq->pStreamMeta == NULL) { return -1; } @@ -1070,10 +1071,12 @@ int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) { tqScanWal(pTq); return 0; } + int32_t code = tqStreamTaskProcessRunReq(pTq->pStreamMeta, pMsg, vnodeIsRoleLeader(pTq->pVnode)); if(code == 0 && taskId > 0){ tqScanWalAsync(pTq, false); } + return code; } @@ -1256,10 +1259,11 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) if (pTask->status.downstreamReady != 1) { pTask->chkInfo.failedId = req.checkpointId; // record the latest failed checkpoint id pTask->chkInfo.checkpointingId = req.checkpointId; + pTask->chkInfo.transId = req.transId; tqError("s-task:%s not ready for checkpoint, since downstream not ready, ignore this checkpoint:%" PRId64 - ", set it failure", - pTask->id.idStr, req.checkpointId); + ", transId:%d set it failed", + pTask->id.idStr, req.checkpointId, req.transId); streamMetaReleaseTask(pMeta, pTask); SRpcMsg rsp = {0}; @@ -1335,28 +1339,10 @@ int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) { } int32_t tqProcessTaskResetReq(STQ* pTq, SRpcMsg* pMsg) { - SVPauseStreamTaskReq* pReq = (SVPauseStreamTaskReq*)pMsg->pCont; - - SStreamMeta* pMeta = pTq->pStreamMeta; - SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId); - if (pTask == NULL) { - tqError("vgId:%d process task-reset req, failed to acquire task:0x%x, it may have been dropped already", - pMeta->vgId, pReq->taskId); - return TSDB_CODE_SUCCESS; - } - - tqDebug("s-task:%s receive task-reset msg from mnode, reset status and ready for data processing", pTask->id.idStr); - - // clear flag set during do checkpoint, and open inputQ for all upstream tasks - if (streamTaskGetStatus(pTask, NULL) == TASK_STATUS__CK) { - streamTaskClearCheckInfo(pTask, true); - streamTaskSetStatusReady(pTask); - } - - streamMetaReleaseTask(pMeta, pTask); - return TSDB_CODE_SUCCESS; + return tqStreamTaskProcessTaskResetReq(pTq->pStreamMeta, pMsg); } +// NOTE: here we may receive this message more than once, so need to handle this case int32_t tqProcessTaskDropHTask(STQ* pTq, SRpcMsg* pMsg) { SVDropHTaskReq* pReq = (SVDropHTaskReq*)pMsg->pCont; @@ -1375,14 +1361,17 @@ int32_t tqProcessTaskDropHTask(STQ* pTq, SRpcMsg* pMsg) { return TSDB_CODE_SUCCESS; } + taosThreadMutexLock(&pTask->lock); ETaskStatus status = streamTaskGetStatus(pTask, NULL); - ASSERT(status == TASK_STATUS__STREAM_SCAN_HISTORY); - - streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_SCANHIST_DONE); + if (status == TASK_STATUS__STREAM_SCAN_HISTORY) { + streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_SCANHIST_DONE); + } SStreamTaskId id = {.streamId = pTask->hTaskInfo.id.streamId, .taskId = pTask->hTaskInfo.id.taskId}; streamBuildAndSendDropTaskMsg(pTask->pMsgCb, pMeta->vgId, &id); + taosThreadMutexUnlock(&pTask->lock); + // clear the scheduler status streamTaskSetSchedStatusInactive(pTask); tqDebug("s-task:%s set scheduler status:%d after drop fill-history task", pTask->id.idStr, pTask->status.schedStatus); diff --git a/source/dnode/vnode/src/tq/tqStreamTask.c b/source/dnode/vnode/src/tq/tqStreamTask.c index 1b0a76e81c..2bb2609ae9 100644 --- a/source/dnode/vnode/src/tq/tqStreamTask.c +++ b/source/dnode/vnode/src/tq/tqStreamTask.c @@ -109,8 +109,8 @@ int32_t tqScanWalAsync(STQ* pTq, bool ckPause) { return -1; } - tqDebug("vgId:%d create msg to start wal scan to launch stream tasks, numOfTasks:%d, restored:%d", vgId, numOfTasks, - alreadyRestored); + tqDebug("vgId:%d create msg to start wal scan to launch stream tasks, numOfTasks:%d, vnd restored:%d", vgId, + numOfTasks, alreadyRestored); pRunReq->head.vgId = vgId; pRunReq->streamId = 0; @@ -123,33 +123,25 @@ int32_t tqScanWalAsync(STQ* pTq, bool ckPause) { return 0; } -int32_t tqStopStreamTasks(STQ* pTq) { +int32_t tqStopStreamTasksAsync(STQ* pTq) { SStreamMeta* pMeta = pTq->pStreamMeta; - int32_t vgId = TD_VID(pTq->pVnode); - int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); + int32_t vgId = pMeta->vgId; - tqDebug("vgId:%d stop all %d stream task(s)", vgId, numOfTasks); - if (numOfTasks == 0) { - return TSDB_CODE_SUCCESS; + SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq)); + if (pRunReq == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + tqError("vgId:%d failed to create msg to stop tasks, code:%s", vgId, terrstr()); + return -1; } - SArray* pTaskList = NULL; - streamMetaWLock(pMeta); - pTaskList = taosArrayDup(pMeta->pTaskList, NULL); - streamMetaWUnLock(pMeta); + tqDebug("vgId:%d create msg to stop tasks", vgId); - for (int32_t i = 0; i < numOfTasks; ++i) { - SStreamTaskId* pTaskId = taosArrayGet(pTaskList, i); - SStreamTask* pTask = streamMetaAcquireTask(pMeta, pTaskId->streamId, pTaskId->taskId); - if (pTask == NULL) { - continue; - } + pRunReq->head.vgId = vgId; + pRunReq->streamId = 0; + pRunReq->taskId = STREAM_EXEC_STOP_ALL_TASKS_ID; - streamTaskStop(pTask); - streamMetaReleaseTask(pMeta, pTask); - } - - taosArrayDestroy(pTaskList); + SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)}; + tmsgPutToQueue(&pTq->pVnode->msgCb, STREAM_QUEUE, &msg); return 0; } diff --git a/source/dnode/vnode/src/tq/tqStreamTaskSnap.c b/source/dnode/vnode/src/tq/tqStreamTaskSnap.c index f966e90b9a..dda5173ad9 100644 --- a/source/dnode/vnode/src/tq/tqStreamTaskSnap.c +++ b/source/dnode/vnode/src/tq/tqStreamTaskSnap.c @@ -196,7 +196,7 @@ int32_t streamTaskSnapWriterClose(SStreamTaskWriter* pWriter, int8_t rollback) { int32_t code = 0; STQ* pTq = pWriter->pTq; - taosWLockLatch(&pTq->pStreamMeta->lock); + streamMetaWLock(pTq->pStreamMeta); tqDebug("vgId:%d, vnode stream-task snapshot writer closed", TD_VID(pTq->pVnode)); if (rollback) { tdbAbort(pTq->pStreamMeta->db, pTq->pStreamMeta->txn); @@ -212,14 +212,14 @@ int32_t streamTaskSnapWriterClose(SStreamTaskWriter* pWriter, int8_t rollback) { taosMemoryFree(pWriter); goto _err; } - taosWUnLockLatch(&pTq->pStreamMeta->lock); + streamMetaWUnLock(pTq->pStreamMeta); taosMemoryFree(pWriter); return code; _err: tqError("vgId:%d, vnode stream-task snapshot writer failed to close since %s", TD_VID(pWriter->pTq->pVnode), tstrerror(code)); - taosWUnLockLatch(&pTq->pStreamMeta->lock); + streamMetaWUnLock(pTq->pStreamMeta); return code; } @@ -240,13 +240,13 @@ int32_t streamTaskSnapWrite(SStreamTaskWriter* pWriter, uint8_t* pData, uint32_t tDecoderClear(&decoder); int64_t key[2] = {taskId.streamId, taskId.taskId}; - taosWLockLatch(&pTq->pStreamMeta->lock); + streamMetaWLock(pTq->pStreamMeta); if (tdbTbUpsert(pTq->pStreamMeta->pTaskDb, key, sizeof(int64_t) << 1, (uint8_t*)pData + sizeof(SSnapDataHdr), nData - sizeof(SSnapDataHdr), pTq->pStreamMeta->txn) < 0) { - taosWUnLockLatch(&pTq->pStreamMeta->lock); + streamMetaWUnLock(pTq->pStreamMeta); return -1; } - taosWUnLockLatch(&pTq->pStreamMeta->lock); + streamMetaWUnLock(pTq->pStreamMeta); } else if (pHdr->type == SNAP_DATA_STREAM_TASK_CHECKPOINT) { // do nothing } diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index 110cf79b4e..d18455d221 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -35,20 +35,8 @@ int32_t tqInitDataRsp(SMqDataRsp* pRsp, STqOffsetVal pOffset) { } void tqUpdateNodeStage(STQ* pTq, bool isLeader) { - SSyncState state = syncGetState(pTq->pVnode->sync); - SStreamMeta* pMeta = pTq->pStreamMeta; - int64_t stage = pMeta->stage; - - pMeta->stage = state.term; - pMeta->role = (isLeader)? NODE_ROLE_LEADER:NODE_ROLE_FOLLOWER; - if (isLeader) { - tqInfo("vgId:%d update meta stage:%" PRId64 ", prev:%" PRId64 " leader:%d, start to send Hb", pMeta->vgId, - state.term, stage, isLeader); - streamMetaStartHb(pMeta); - } else { - tqInfo("vgId:%d update meta stage:%" PRId64 " prev:%" PRId64 " leader:%d", pMeta->vgId, state.term, stage, - isLeader); - } + SSyncState state = syncGetState(pTq->pVnode->sync); + streamMetaUpdateStageRole(pTq->pStreamMeta, state.term, isLeader); } static int32_t tqInitTaosxRsp(STaosxRsp* pRsp, STqOffsetVal pOffset) { diff --git a/source/dnode/vnode/src/tqCommon/tqCommon.c b/source/dnode/vnode/src/tqCommon/tqCommon.c index 2140b80eef..f576345f64 100644 --- a/source/dnode/vnode/src/tqCommon/tqCommon.c +++ b/source/dnode/vnode/src/tqCommon/tqCommon.c @@ -106,14 +106,15 @@ int32_t tqStreamTaskProcessUpdateReq(SStreamMeta* pMeta, SMsgCb* cb, SRpcMsg* pM return rsp.code; } - streamMetaWUnLock(pMeta); +// streamMetaWUnLock(pMeta); + // todo for test purpose // the following two functions should not be executed within the scope of meta lock to avoid deadlock streamTaskUpdateEpsetInfo(pTask, req.pNodeList); streamTaskResetStatus(pTask); // continue after lock the meta again - streamMetaWLock(pMeta); +// streamMetaWLock(pMeta); SStreamTask** ppHTask = NULL; if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { @@ -166,65 +167,17 @@ int32_t tqStreamTaskProcessUpdateReq(SStreamMeta* pMeta, SMsgCb* cb, SRpcMsg* pM streamMetaWUnLock(pMeta); } else { if (!restored) { - tqDebug("vgId:%d vnode restore not completed, not restart the tasks, clear the start after nodeUpdate flag", - vgId); + tqDebug("vgId:%d vnode restore not completed, not start the tasks, clear the start after nodeUpdate flag", vgId); pMeta->startInfo.tasksWillRestart = 0; streamMetaWUnLock(pMeta); } else { tqDebug("vgId:%d all %d task(s) nodeEp updated and closed", vgId, numOfTasks); -#if 1 +#if 0 + // for test purpose, to trigger the leader election + taosMSleep(5000); +#endif tqStreamTaskStartAsync(pMeta, cb, true); streamMetaWUnLock(pMeta); -#else - streamMetaWUnLock(pMeta); - - // For debug purpose. - // the following procedure consume many CPU resource, result in the re-election of leader - // with high probability. So we employ it as a test case for the stream processing framework, with - // checkpoint/restart/nodeUpdate etc. - while (1) { - int32_t startVal = atomic_val_compare_exchange_32(&pMeta->startInfo.taskStarting, 0, 1); - if (startVal == 0) { - break; - } - - tqDebug("vgId:%d in start stream tasks procedure, wait for 500ms and recheck", vgId); - taosMsleep(500); - } - - while (streamMetaTaskInTimer(pMeta)) { - tqDebug("vgId:%d some tasks in timer, wait for 100ms and recheck", pMeta->vgId); - taosMsleep(100); - } - - streamMetaWLock(pMeta); - - int32_t code = streamMetaReopen(pMeta); - if (code != 0) { - tqError("vgId:%d failed to reopen stream meta", vgId); - streamMetaWUnLock(pMeta); - taosArrayDestroy(req.pNodeList); - return -1; - } - - streamMetaInitBackend(pMeta); - - if (streamMetaLoadAllTasks(pTq->pStreamMeta) < 0) { - tqError("vgId:%d failed to load stream tasks", vgId); - streamMetaWUnLock(pMeta); - taosArrayDestroy(req.pNodeList); - return -1; - } - - if (vnodeIsRoleLeader(pTq->pVnode) && !tsDisableStream) { - tqInfo("vgId:%d start all stream tasks after all being updated", vgId); - resetStreamTaskStatus(pTq->pStreamMeta); - tqStartStreamTaskAsync(pTq, false); - } else { - tqInfo("vgId:%d, follower node not start stream tasks", vgId); - } - streamMetaWUnLock(pMeta); -#endif } } @@ -645,65 +598,7 @@ int32_t tqStreamTaskProcessDropReq(SStreamMeta* pMeta, char* msg, int32_t msgLen return 0; } -int32_t startStreamTasks(SStreamMeta* pMeta) { - int32_t code = TSDB_CODE_SUCCESS; - int32_t vgId = pMeta->vgId; - - int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); - tqDebug("vgId:%d start to check all %d stream task(s) downstream status", vgId, numOfTasks); - if (numOfTasks == 0) { - return TSDB_CODE_SUCCESS; - } - - SArray* pTaskList = NULL; - streamMetaWLock(pMeta); - pTaskList = taosArrayDup(pMeta->pTaskList, NULL); - taosHashClear(pMeta->startInfo.pReadyTaskSet); - taosHashClear(pMeta->startInfo.pFailedTaskSet); - pMeta->startInfo.startTs = taosGetTimestampMs(); - streamMetaWUnLock(pMeta); - - // broadcast the check downstream tasks msg - for (int32_t i = 0; i < numOfTasks; ++i) { - SStreamTaskId* pTaskId = taosArrayGet(pTaskList, i); - SStreamTask* pTask = streamMetaAcquireTask(pMeta, pTaskId->streamId, pTaskId->taskId); - if (pTask == NULL) { - continue; - } - - // fill-history task can only be launched by related stream tasks. - if (pTask->info.fillHistory == 1) { - streamMetaReleaseTask(pMeta, pTask); - continue; - } - - if (pTask->status.downstreamReady == 1) { - if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { - tqDebug("s-task:%s downstream ready, no need to check downstream, check only related fill-history task", - pTask->id.idStr); - streamLaunchFillHistoryTask(pTask); - } - - streamMetaUpdateTaskDownstreamStatus(pTask->pMeta, pTask->id.streamId, pTask->id.taskId, pTask->execInfo.init, - pTask->execInfo.start, true); - streamMetaReleaseTask(pMeta, pTask); - continue; - } - - EStreamTaskEvent event = (HAS_RELATED_FILLHISTORY_TASK(pTask)) ? TASK_EVENT_INIT_STREAM_SCANHIST : TASK_EVENT_INIT; - int32_t ret = streamTaskHandleEvent(pTask->status.pSM, event); - if (ret != TSDB_CODE_SUCCESS) { - code = ret; - } - - streamMetaReleaseTask(pMeta, pTask); - } - - taosArrayDestroy(pTaskList); - return code; -} - -int32_t resetStreamTaskStatus(SStreamMeta* pMeta) { +int32_t tqStreamTaskResetStatus(SStreamMeta* pMeta) { int32_t vgId = pMeta->vgId; int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); @@ -728,16 +623,18 @@ static int32_t restartStreamTasks(SStreamMeta* pMeta, bool isLeader) { int32_t code = 0; int64_t st = taosGetTimestampMs(); - while (1) { - int32_t startVal = atomic_val_compare_exchange_32(&pMeta->startInfo.taskStarting, 0, 1); - if (startVal == 0) { - break; - } - - tqDebug("vgId:%d in start stream tasks procedure, wait for 500ms and recheck", vgId); - taosMsleep(500); + streamMetaWLock(pMeta); + if (pMeta->startInfo.taskStarting == 1) { + pMeta->startInfo.restartCount += 1; + tqDebug("vgId:%d in start tasks procedure, inc restartCounter by 1, remaining restart:%d", vgId, + pMeta->startInfo.restartCount); + streamMetaWUnLock(pMeta); + return TSDB_CODE_SUCCESS; } + pMeta->startInfo.taskStarting = 1; + streamMetaWUnLock(pMeta); + terrno = 0; tqInfo("vgId:%d tasks are all updated and stopped, restart all tasks, triggered by transId:%d", vgId, pMeta->updateInfo.transId); @@ -762,11 +659,9 @@ static int32_t restartStreamTasks(SStreamMeta* pMeta, bool isLeader) { } if (isLeader && !tsDisableStream) { - resetStreamTaskStatus(pMeta); + tqStreamTaskResetStatus(pMeta); streamMetaWUnLock(pMeta); - tqInfo("vgId:%d restart all stream tasks after all tasks being updated", vgId); - - startStreamTasks(pMeta); + streamMetaStartAllTasks(pMeta); } else { streamMetaResetStartInfo(&pMeta->startInfo); streamMetaWUnLock(pMeta); @@ -784,11 +679,14 @@ int32_t tqStreamTaskProcessRunReq(SStreamMeta* pMeta, SRpcMsg* pMsg, bool isLead int32_t vgId = pMeta->vgId; if (taskId == STREAM_EXEC_START_ALL_TASKS_ID) { - startStreamTasks(pMeta); + streamMetaStartAllTasks(pMeta); return 0; } else if (taskId == STREAM_EXEC_RESTART_ALL_TASKS_ID) { restartStreamTasks(pMeta, isLeader); return 0; + } else if (taskId == STREAM_EXEC_STOP_ALL_TASKS_ID) { + streamMetaStopAllTasks(pMeta); + return 0; } SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, taskId); @@ -812,3 +710,46 @@ int32_t tqStreamTaskProcessRunReq(SStreamMeta* pMeta, SRpcMsg* pMsg, bool isLead return -1; } } + +int32_t tqStartTaskCompleteCallback(SStreamMeta* pMeta) { + STaskStartInfo* pStartInfo = &pMeta->startInfo; + streamMetaWLock(pMeta); + + if (pStartInfo->restartCount > 0) { + pStartInfo->restartCount -= 1; + + ASSERT(pStartInfo->taskStarting == 0); + tqDebug("vgId:%d role:%d need to restart all tasks again, restartCounter:%d", pMeta->vgId, pMeta->role, + pStartInfo->restartCount); + + streamMetaWUnLock(pMeta); + restartStreamTasks(pMeta, (pMeta->role == NODE_ROLE_LEADER)); + } else { + streamMetaWUnLock(pMeta); + tqDebug("vgId:%d start all tasks completed", pMeta->vgId); + } + + return TSDB_CODE_SUCCESS; +} + +int32_t tqStreamTaskProcessTaskResetReq(SStreamMeta* pMeta, SRpcMsg* pMsg) { + SVPauseStreamTaskReq* pReq = (SVPauseStreamTaskReq*)pMsg->pCont; + + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId); + if (pTask == NULL) { + tqError("vgId:%d process task-reset req, failed to acquire task:0x%x, it may have been dropped already", + pMeta->vgId, pReq->taskId); + return TSDB_CODE_SUCCESS; + } + + tqDebug("s-task:%s receive task-reset msg from mnode, reset status and ready for data processing", pTask->id.idStr); + + // clear flag set during do checkpoint, and open inputQ for all upstream tasks + if (streamTaskGetStatus(pTask, NULL) == TASK_STATUS__CK) { + streamTaskClearCheckInfo(pTask, true); + streamTaskSetStatusReady(pTask); + } + + streamMetaReleaseTask(pMeta, pTask); + return TSDB_CODE_SUCCESS; +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c index fd179dba2e..5ad067c113 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead2.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -1360,7 +1360,8 @@ static bool tryCopyDistinctRowFromFileBlock(STsdbReader* pReader, SBlockData* pB static bool nextRowFromSttBlocks(SSttBlockReader* pSttBlockReader, STableBlockScanInfo* pScanInfo, SVersionRange* pVerRange) { - int32_t step = ASCENDING_TRAVERSE(pSttBlockReader->order) ? 1 : -1; + int32_t order = pSttBlockReader->order; + int32_t step = ASCENDING_TRAVERSE(order) ? 1 : -1; while (1) { bool hasVal = tMergeTreeNext(&pSttBlockReader->mergeTree); @@ -1377,8 +1378,12 @@ static bool nextRowFromSttBlocks(SSttBlockReader* pSttBlockReader, STableBlockSc pSttBlockReader->currentKey = key; pScanInfo->sttKeyInfo.nextProcKey = key; - if (!hasBeenDropped(pScanInfo->delSkyline, &pScanInfo->sttBlockDelIndex, key, ver, pSttBlockReader->order, - pVerRange)) { + if (pScanInfo->delSkyline != NULL && TARRAY_SIZE(pScanInfo->delSkyline) > 0) { + if (!hasBeenDropped(pScanInfo->delSkyline, &pScanInfo->sttBlockDelIndex, key, ver, order, pVerRange)) { + pScanInfo->sttKeyInfo.status = STT_FILE_HAS_DATA; + return true; + } + } else { pScanInfo->sttKeyInfo.status = STT_FILE_HAS_DATA; return true; } @@ -2054,9 +2059,12 @@ static bool isValidFileBlockRow(SBlockData* pBlockData, SFileBlockDumpInfo* pDum return false; } - if (hasBeenDropped(pBlockScanInfo->delSkyline, &pBlockScanInfo->fileDelIndex, ts, ver, pReader->info.order, - &pReader->info.verRange)) { - return false; + if (pBlockScanInfo->delSkyline != NULL && TARRAY_SIZE(pBlockScanInfo->delSkyline) > 0) { + bool dropped = hasBeenDropped(pBlockScanInfo->delSkyline, &pBlockScanInfo->fileDelIndex, ts, ver, + pReader->info.order, &pReader->info.verRange); + if (dropped) { + return false; + } } return true; @@ -3219,7 +3227,7 @@ SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_ bool hasBeenDropped(const SArray* pDelList, int32_t* index, int64_t key, int64_t ver, int32_t order, SVersionRange* pVerRange) { - if (pDelList == NULL || (taosArrayGetSize(pDelList) == 0)) { + if (pDelList == NULL || (TARRAY_SIZE(pDelList) == 0)) { return false; } @@ -3327,16 +3335,22 @@ TSDBROW* getValidMemRow(SIterInfo* pIter, const SArray* pDelList, STsdbReader* p TSDBROW* pRow = tsdbTbDataIterGet(pIter->iter); TSDBKEY key = TSDBROW_KEY(pRow); - + int32_t order = pReader->info.order; if (outOfTimeWindow(key.ts, &pReader->info.window)) { pIter->hasVal = false; return NULL; } // it is a valid data version - if ((key.version <= pReader->info.verRange.maxVer && key.version >= pReader->info.verRange.minVer) && - (!hasBeenDropped(pDelList, &pIter->index, key.ts, key.version, pReader->info.order, &pReader->info.verRange))) { - return pRow; + if (key.version <= pReader->info.verRange.maxVer && key.version >= pReader->info.verRange.minVer) { + if (pDelList == NULL || TARRAY_SIZE(pDelList) == 0) { + return pRow; + } else { + bool dropped = hasBeenDropped(pDelList, &pIter->index, key.ts, key.version, order, &pReader->info.verRange); + if (!dropped) { + return pRow; + } + } } while (1) { @@ -3353,9 +3367,15 @@ TSDBROW* getValidMemRow(SIterInfo* pIter, const SArray* pDelList, STsdbReader* p return NULL; } - if (key.version <= pReader->info.verRange.maxVer && key.version >= pReader->info.verRange.minVer && - (!hasBeenDropped(pDelList, &pIter->index, key.ts, key.version, pReader->info.order, &pReader->info.verRange))) { - return pRow; + if (key.version <= pReader->info.verRange.maxVer && key.version >= pReader->info.verRange.minVer) { + if (pDelList == NULL || TARRAY_SIZE(pDelList) == 0) { + return pRow; + } else { + bool dropped = hasBeenDropped(pDelList, &pIter->index, key.ts, key.version, order, &pReader->info.verRange); + if (!dropped) { + return pRow; + } + } } } } diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 5871a60c9e..048092131d 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -570,7 +570,7 @@ static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx) vInfo("vgId:%d, sync restore finished, not launch stream tasks, since stream tasks are disabled", vgId); } else { vInfo("vgId:%d sync restore finished, start to launch stream tasks", pVnode->config.vgId); - resetStreamTaskStatus(pVnode->pTq->pStreamMeta); + tqStreamTaskResetStatus(pVnode->pTq->pStreamMeta); tqStreamTaskStartAsync(pMeta, &pVnode->msgCb, false); } } else { @@ -594,7 +594,7 @@ static void vnodeBecomeFollower(const SSyncFSM *pFsm) { if (pVnode->pTq) { tqUpdateNodeStage(pVnode->pTq, false); - tqStopStreamTasks(pVnode->pTq); + tqStopStreamTasksAsync(pVnode->pTq); } } diff --git a/source/libs/executor/src/streamtimewindowoperator.c b/source/libs/executor/src/streamtimewindowoperator.c index 7b68b65c17..c9490e2c55 100644 --- a/source/libs/executor/src/streamtimewindowoperator.c +++ b/source/libs/executor/src/streamtimewindowoperator.c @@ -1623,6 +1623,7 @@ void initDummyFunction(SqlFunctionCtx* pDummy, SqlFunctionCtx* pCtx, int32_t num pDummy[i].functionId = pCtx[i].functionId; pDummy[i].isNotNullFunc = pCtx[i].isNotNullFunc; pDummy[i].isPseudoFunc = pCtx[i].isPseudoFunc; + pDummy[i].fpSet.init = pCtx[i].fpSet.init; } } @@ -2774,6 +2775,9 @@ void streamSessionSemiReloadState(SOperatorInfo* pOperator) { for (int32_t i = 0; i < num; i++) { SResultWindowInfo winInfo = {0}; getSessionWindowInfoByKey(pAggSup, pSeKeyBuf + i, &winInfo); + if (!IS_VALID_SESSION_WIN(winInfo)) { + continue; + } compactSessionSemiWindow(pOperator, &winInfo); saveSessionOutputBuf(pAggSup, &winInfo); } diff --git a/source/libs/function/inc/tfunctionInt.h b/source/libs/function/inc/tfunctionInt.h index 821544106f..b4c48abf37 100644 --- a/source/libs/function/inc/tfunctionInt.h +++ b/source/libs/function/inc/tfunctionInt.h @@ -28,8 +28,6 @@ extern "C" { #include "tudf.h" #include "tvariant.h" -bool topbot_datablock_filter(SqlFunctionCtx *pCtx, const char *minval, const char *maxval); - /** * the numOfRes should be kept, since it may be used later * and allow the ResultInfo to be re initialized diff --git a/source/libs/function/src/tfunctionInt.c b/source/libs/function/src/tfunctionInt.c index 3da5d63fa1..80b869b2d2 100644 --- a/source/libs/function/src/tfunctionInt.c +++ b/source/libs/function/src/tfunctionInt.c @@ -27,33 +27,3 @@ #include "tpercentile.h" #include "ttszip.h" #include "tudf.h" - -int32_t getNumOfResult(SqlFunctionCtx* pCtx, int32_t num, SSDataBlock* pResBlock) { - int32_t maxRows = 0; - - for (int32_t j = 0; j < num; ++j) { - SResultRowEntryInfo* pResInfo = GET_RES_INFO(&pCtx[j]); - if (pResInfo != NULL && maxRows < pResInfo->numOfRes) { - maxRows = pResInfo->numOfRes; - } - } - - blockDataEnsureCapacity(pResBlock, maxRows); - for (int32_t i = 0; i < num; ++i) { - SColumnInfoData* pCol = taosArrayGet(pResBlock->pDataBlock, i); - - SResultRowEntryInfo* pResInfo = GET_RES_INFO(&pCtx[i]); - if (pResInfo->numOfRes == 0) { - for (int32_t j = 0; j < pResInfo->numOfRes; ++j) { - colDataSetVal(pCol, j, NULL, true); // TODO add set null data api - } - } else { - for (int32_t j = 0; j < pResInfo->numOfRes; ++j) { - colDataSetVal(pCol, j, GET_ROWCELL_INTERBUF(pResInfo), false); - } - } - } - - pResBlock->info.rows = maxRows; - return maxRows; -} diff --git a/source/libs/function/src/tpercentile.c b/source/libs/function/src/tpercentile.c index 8101b342a4..93008b565a 100644 --- a/source/libs/function/src/tpercentile.c +++ b/source/libs/function/src/tpercentile.c @@ -142,13 +142,13 @@ int32_t tBucketIntHash(tMemBucket *pBucket, const void *value) { } // divide the value range into 1024 buckets - uint64_t span = pBucket->range.i64MaxVal - pBucket->range.i64MinVal; + uint64_t span = (uint64_t)(pBucket->range.i64MaxVal - pBucket->range.i64MinVal); if (span < pBucket->numOfSlots) { int64_t delta = v - pBucket->range.i64MinVal; index = (delta % pBucket->numOfSlots); } else { double slotSpan = ((double)span) / pBucket->numOfSlots; - uint64_t delta = v - pBucket->range.i64MinVal; + uint64_t delta = (uint64_t)(v - pBucket->range.i64MinVal); index = (int32_t)(delta / slotSpan); if (v == pBucket->range.i64MaxVal || index == pBucket->numOfSlots) { diff --git a/source/libs/parser/CMakeLists.txt b/source/libs/parser/CMakeLists.txt index 4e4a4def1d..c5ee1a00c4 100644 --- a/source/libs/parser/CMakeLists.txt +++ b/source/libs/parser/CMakeLists.txt @@ -4,9 +4,6 @@ IF (TD_ENTERPRISE) LIST(APPEND PARSER_SRC ${TD_ENTERPRISE_DIR}/src/plugins/view/src/parserView.c) ENDIF () -IF (TD_BI_SUPPORT) - LIST(APPEND PARSER_SRC ${TD_ENTERPRISE_DIR}/src/plugins/bi/src/biRewriteQuery.c) -ENDIF () add_library(parser STATIC ${PARSER_SRC}) target_include_directories( parser diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 3bb24566c2..6f33cd66a3 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -1104,11 +1104,192 @@ static EDealRes translateColumnUseAlias(STranslateContext* pCxt, SColumnNode** p return DEAL_RES_CONTINUE; } -#ifndef TD_ENTERPRISE +static void biMakeAliasNameInMD5(char* pExprStr, int32_t len, char* pAlias) { + T_MD5_CTX ctx; + tMD5Init(&ctx); + tMD5Update(&ctx, pExprStr, len); + tMD5Final(&ctx); + char* p = pAlias; + for (uint8_t i = 0; i < tListLen(ctx.digest); ++i) { + sprintf(p, "%02x", ctx.digest[i]); + p += 2; + } +} + +static SNode* biMakeTbnameProjectAstNode(char* funcName, char* tableAlias) { + SValueNode* valNode = NULL; + if (tableAlias != NULL) { + SValueNode* n = (SValueNode*)nodesMakeNode(QUERY_NODE_VALUE); + n->literal = tstrdup(tableAlias); + n->node.resType.type = TSDB_DATA_TYPE_BINARY; + n->node.resType.bytes = strlen(n->literal); + n->isDuration = false; + n->translate = false; + valNode = n; + } + + SFunctionNode* tbNameFunc = (SFunctionNode*)nodesMakeNode(QUERY_NODE_FUNCTION); + tstrncpy(tbNameFunc->functionName, "tbname", TSDB_FUNC_NAME_LEN); + if (valNode != NULL) { + nodesListMakeAppend(&tbNameFunc->pParameterList, (SNode*)valNode); + } + snprintf(tbNameFunc->node.userAlias, sizeof(tbNameFunc->node.userAlias), + (tableAlias)? "%s.tbname" : "%stbname", + (tableAlias)? tableAlias : ""); + strncpy(tbNameFunc->node.aliasName, tbNameFunc->functionName, TSDB_COL_NAME_LEN); + + if (funcName == NULL) { + return (SNode*)tbNameFunc; + } else { + SFunctionNode* multiResFunc = (SFunctionNode*)nodesMakeNode(QUERY_NODE_FUNCTION); + tstrncpy(multiResFunc->functionName, funcName, TSDB_FUNC_NAME_LEN); + nodesListMakeAppend(&multiResFunc->pParameterList, (SNode*)tbNameFunc); + + if (tsKeepColumnName) { + snprintf(multiResFunc->node.userAlias, sizeof(tbNameFunc->node.userAlias), + (tableAlias)? "%s.tbname" : "%stbname", + (tableAlias)? tableAlias : ""); + strcpy(multiResFunc->node.aliasName, tbNameFunc->functionName); + } else { + snprintf(multiResFunc->node.userAlias, sizeof(multiResFunc->node.userAlias), + tableAlias? "%s(%s.tbname)" : "%s(%stbname)", funcName, + tableAlias? tableAlias: ""); + biMakeAliasNameInMD5(multiResFunc->node.userAlias, strlen(multiResFunc->node.userAlias), multiResFunc->node.aliasName); + } + + return (SNode*)multiResFunc; + } +} + +static int32_t biRewriteSelectFuncParamStar(STranslateContext* pCxt, SSelectStmt* pSelect, SNode* pNode, SListCell* pSelectListCell) { + SNodeList* pTbnameNodeList = nodesMakeList(); + + SFunctionNode* pFunc = (SFunctionNode*)pNode; + if (strcasecmp(pFunc->functionName, "last") == 0 || + strcasecmp(pFunc->functionName, "last_row") == 0 || + strcasecmp(pFunc->functionName, "first") == 0) { + SNodeList* pParams = pFunc->pParameterList; + SNode* pPara = NULL; + FOREACH(pPara, pParams) { + if (nodesIsStar(pPara)) { + SArray* pTables = taosArrayGetP(pCxt->pNsLevel, pCxt->currLevel); + size_t n = taosArrayGetSize(pTables); + for (int32_t i = 0; i < n; ++i) { + STableNode* pTable = taosArrayGetP(pTables, i); + if (nodeType(pTable) == QUERY_NODE_REAL_TABLE && ((SRealTableNode*)pTable)->pMeta != NULL && + ((SRealTableNode*)pTable)->pMeta->tableType == TSDB_SUPER_TABLE) { + SNode* pTbnameNode = biMakeTbnameProjectAstNode(pFunc->functionName, NULL); + nodesListAppend(pTbnameNodeList, pTbnameNode); + } + } + if (LIST_LENGTH(pTbnameNodeList) > 0) { + nodesListInsertListAfterPos(pSelect->pProjectionList, pSelectListCell, pTbnameNodeList); + } + } else if (nodesIsTableStar(pPara)) { + char* pTableAlias = ((SColumnNode*)pPara)->tableAlias; + STableNode* pTable = NULL; + int32_t code = findTable(pCxt, pTableAlias, &pTable); + if (TSDB_CODE_SUCCESS == code && nodeType(pTable) == QUERY_NODE_REAL_TABLE && + ((SRealTableNode*)pTable)->pMeta != NULL && + ((SRealTableNode*)pTable)->pMeta->tableType == TSDB_SUPER_TABLE) { + SNode* pTbnameNode = biMakeTbnameProjectAstNode(pFunc->functionName, pTableAlias); + nodesListAppend(pTbnameNodeList, pTbnameNode); + } + if (LIST_LENGTH(pTbnameNodeList) > 0) { + nodesListInsertListAfterPos(pSelect->pProjectionList, pSelectListCell, pTbnameNodeList); + } + } + } + } + return TSDB_CODE_SUCCESS; +} + +// after translate from +// before translate select list +int32_t biRewriteSelectStar(STranslateContext* pCxt, SSelectStmt* pSelect) { + SNode* pNode = NULL; + SNodeList* pTbnameNodeList = nodesMakeList(); + WHERE_EACH(pNode, pSelect->pProjectionList) { + if (nodesIsStar(pNode)) { + SArray* pTables = taosArrayGetP(pCxt->pNsLevel, pCxt->currLevel); + size_t n = taosArrayGetSize(pTables); + for (int32_t i = 0; i < n; ++i) { + STableNode* pTable = taosArrayGetP(pTables, i); + if (nodeType(pTable) == QUERY_NODE_REAL_TABLE && + ((SRealTableNode*)pTable)->pMeta != NULL && + ((SRealTableNode*)pTable)->pMeta->tableType == TSDB_SUPER_TABLE) { + SNode* pTbnameNode = biMakeTbnameProjectAstNode(NULL, NULL); + nodesListAppend(pTbnameNodeList, pTbnameNode); + } + } + if (LIST_LENGTH(pTbnameNodeList) > 0) { + nodesListInsertListAfterPos(pSelect->pProjectionList, cell, pTbnameNodeList); + } + } else if (nodesIsTableStar(pNode)) { + char* pTableAlias = ((SColumnNode*)pNode)->tableAlias; + STableNode* pTable = NULL; + int32_t code = findTable(pCxt, pTableAlias, &pTable); + if (TSDB_CODE_SUCCESS == code && + nodeType(pTable) == QUERY_NODE_REAL_TABLE && + ((SRealTableNode*)pTable)->pMeta != NULL && + ((SRealTableNode*)pTable)->pMeta->tableType == TSDB_SUPER_TABLE) { + SNode* pTbnameNode = biMakeTbnameProjectAstNode(NULL, pTableAlias); + nodesListAppend(pTbnameNodeList, pTbnameNode); + } + if (LIST_LENGTH(pTbnameNodeList) > 0) { + nodesListInsertListAfterPos(pSelect->pProjectionList, cell, pTbnameNodeList); + } + } else if (nodeType(pNode) == QUERY_NODE_FUNCTION) { + biRewriteSelectFuncParamStar(pCxt, pSelect, pNode, cell); + } + WHERE_NEXT; + } + + return TSDB_CODE_SUCCESS; +} + bool biRewriteToTbnameFunc(STranslateContext* pCxt, SNode** ppNode) { + SColumnNode* pCol = (SColumnNode*)(*ppNode); + if ((strcasecmp(pCol->colName, "tbname") == 0) && + ((SSelectStmt*)pCxt->pCurrStmt)->pFromTable && + QUERY_NODE_REAL_TABLE == nodeType(((SSelectStmt*)pCxt->pCurrStmt)->pFromTable)) { + SFunctionNode* tbnameFuncNode = NULL; + tbnameFuncNode = (SFunctionNode*)biMakeTbnameProjectAstNode(NULL, (pCol->tableAlias[0]!='\0') ? pCol->tableAlias : NULL); + tbnameFuncNode->node.resType = pCol->node.resType; + strcpy(tbnameFuncNode->node.aliasName, pCol->node.aliasName); + strcpy(tbnameFuncNode->node.userAlias, pCol->node.userAlias); + + nodesDestroyNode(*ppNode); + *ppNode = (SNode*)tbnameFuncNode; + return true; + } + return false; } -#endif + +int32_t biCheckCreateTableTbnameCol(STranslateContext* pCxt, SCreateTableStmt* pStmt) { + if (pStmt->pTags) { + SNode* pNode = NULL; + FOREACH(pNode, pStmt->pTags) { + SColumnDefNode* pTag = (SColumnDefNode*)pNode; + if (strcasecmp(pTag->colName, "tbname") == 0) { + int32_t code = generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_TAG_NAME, "tbname can not used for tags in BI mode"); + return code; + } + } + } + if (pStmt->pCols) { + SNode* pNode = NULL; + FOREACH(pNode, pStmt->pCols) { + SColumnDefNode* pCol = (SColumnDefNode*)pNode; + if (strcasecmp(pCol->colName, "tbname") == 0) { + int32_t code = generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_COLUMN, "tbname can not used for columns in BI mode"); + return code; + } + } + } + return TSDB_CODE_SUCCESS; +} static EDealRes translateColumn(STranslateContext* pCxt, SColumnNode** pCol) { if (NULL == pCxt->pCurrStmt || @@ -3178,10 +3359,6 @@ static int32_t createTags(STranslateContext* pCxt, SNodeList** pOutput) { return TSDB_CODE_SUCCESS; } -#ifndef TD_ENTERPRISE -int32_t biRewriteSelectStar(STranslateContext* pCxt, SSelectStmt* pSelect) { return TSDB_CODE_SUCCESS; } -#endif - static int32_t translateStar(STranslateContext* pCxt, SSelectStmt* pSelect) { SNode* pNode = NULL; WHERE_EACH(pNode, pSelect->pProjectionList) { @@ -5743,11 +5920,6 @@ static int32_t checkTableDeleteMarkOption(STranslateContext* pCxt, STableOptions return code; } -#ifndef TD_ENTERPRISE -int32_t biCheckCreateTableTbnameCol(STranslateContext* pCxt, SCreateTableStmt* pStmt) { - return TSDB_CODE_SUCCESS; -} -#endif static int32_t checkCreateTable(STranslateContext* pCxt, SCreateTableStmt* pStmt, bool createStable) { if (NULL != strchr(pStmt->tableName, '.')) { @@ -8769,7 +8941,7 @@ static int32_t extractShowVariablesResultSchema(int32_t* numOfCols, SSchema** pS (*pSchema)[0].type = TSDB_DATA_TYPE_BINARY; (*pSchema)[0].bytes = TSDB_CONFIG_OPTION_LEN; - strcpy((*pSchema)[0].name, "name"); + strcpy((*pSchema)[0].name, "result"); (*pSchema)[1].type = TSDB_DATA_TYPE_BINARY; (*pSchema)[1].bytes = TSDB_CONFIG_VALUE_LEN; diff --git a/source/libs/stream/inc/streamInt.h b/source/libs/stream/inc/streamInt.h index f709741b57..9b5134d449 100644 --- a/source/libs/stream/inc/streamInt.h +++ b/source/libs/stream/inc/streamInt.h @@ -109,13 +109,12 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock) int32_t tEncodeStreamRetrieveReq(SEncoder* pEncoder, const SStreamRetrieveReq* pReq); int32_t streamSaveTaskCheckpointInfo(SStreamTask* p, int64_t checkpointId); -int32_t streamTaskBuildCheckpoint(SStreamTask* pTask); -int32_t streamSaveAllTaskStatus(SStreamMeta* pMeta, int64_t checkpointId); int32_t streamSendCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet); int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t srcTaskId, int32_t index, int64_t checkpointId); int32_t streamTaskSendCheckpointReadyMsg(SStreamTask* pTask); int32_t streamTaskSendCheckpointSourceRsp(SStreamTask* pTask); +void streamTaskSetCheckpointFailedId(SStreamTask* pTask); int32_t streamTaskGetNumOfDownstream(const SStreamTask* pTask); int32_t streamTaskInitTokenBucket(STokenBucket* pBucket, int32_t numCap, int32_t numRate, float quotaRate, const char*); STaskId streamTaskExtractKey(const SStreamTask* pTask); @@ -137,17 +136,6 @@ int32_t streamAddEndScanHistoryMsg(SStreamTask* pTask, SRpcHandleInfo* pRpcInfo, int32_t streamNotifyUpstreamContinue(SStreamTask* pTask); int32_t streamTransferStateToStreamTask(SStreamTask* pTask); -// <<<<<<< HEAD -// void streamClearChkptReadyMsg(SStreamTask* pTask); - -// int32_t streamTaskInitTokenBucket(STokenBucket* pBucket, int32_t numCap, int32_t numRate, float quotaRate, const -// char*); STaskId streamTaskExtractKey(const SStreamTask* pTask); void streamTaskInitForLaunchHTask(SHistoryTaskInfo* -// pInfo); void streamTaskSetRetryInfoForLaunch(SHistoryTaskInfo* pInfo); - -// void streamMetaResetStartInfo(STaskStartInfo* pMeta); - -// ======= -// >>>>>>> 3.0 SStreamQueue* streamQueueOpen(int64_t cap); void streamQueueClose(SStreamQueue* pQueue, int32_t taskId); void streamQueueProcessSuccess(SStreamQueue* queue); diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index cf0682f037..b54adb0f96 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -25,6 +25,7 @@ typedef struct { SStreamTask* pTask; } SAsyncUploadArg; + int32_t tEncodeStreamCheckpointSourceReq(SEncoder* pEncoder, const SStreamCheckpointSourceReq* pReq) { if (tStartEncode(pEncoder) < 0) return -1; if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1; @@ -34,6 +35,7 @@ int32_t tEncodeStreamCheckpointSourceReq(SEncoder* pEncoder, const SStreamCheckp if (tEncodeSEpSet(pEncoder, &pReq->mgmtEps) < 0) return -1; if (tEncodeI32(pEncoder, pReq->mnodeId) < 0) return -1; if (tEncodeI64(pEncoder, pReq->expireTime) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->transId) < 0) return -1; tEndEncode(pEncoder); return pEncoder->pos; } @@ -47,6 +49,7 @@ int32_t tDecodeStreamCheckpointSourceReq(SDecoder* pDecoder, SStreamCheckpointSo if (tDecodeSEpSet(pDecoder, &pReq->mgmtEps) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->mnodeId) < 0) return -1; if (tDecodeI64(pDecoder, &pReq->expireTime) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->transId) < 0) return -1; tEndDecode(pDecoder); return 0; } @@ -111,6 +114,7 @@ static int32_t streamAlignCheckpoint(SStreamTask* pTask) { return atomic_sub_fetch_32(&pTask->chkInfo.downstreamAlignNum, 1); } +// todo handle down the transId of checkpoint to sink/agg tasks. static int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpointType) { SStreamDataBlock* pChkpoint = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, sizeof(SSDataBlock)); if (pChkpoint == NULL) { @@ -149,6 +153,7 @@ int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSo // 1. set task status to be prepared for check point, no data are allowed to put into inputQ. streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_GEN_CHECKPOINT); + pTask->chkInfo.transId = pReq->transId; pTask->chkInfo.checkpointingId = pReq->checkpointId; pTask->chkInfo.checkpointNotReadyTasks = streamTaskGetNumOfDownstream(pTask); pTask->chkInfo.startTs = taosGetTimestampMs(); @@ -273,8 +278,9 @@ void streamTaskClearCheckInfo(SStreamTask* pTask, bool clearChkpReadyMsg) { pTask->chkInfo.failedId = 0; pTask->chkInfo.startTs = 0; // clear the recorded start time pTask->chkInfo.checkpointNotReadyTasks = 0; - // pTask->chkInfo.checkpointAlignCnt = 0; + pTask->chkInfo.transId = 0; pTask->chkInfo.dispatchCheckpointTrigger = false; + streamTaskOpenAllUpstreamInput(pTask); // open inputQ for all upstream tasks if (clearChkpReadyMsg) { streamClearChkptReadyMsg(pTask); @@ -341,9 +347,8 @@ int32_t streamSaveTaskCheckpointInfo(SStreamTask* p, int64_t checkpointId) { return code; } -void streamTaskSetFailedId(SStreamTask* pTask) { +void streamTaskSetCheckpointFailedId(SStreamTask* pTask) { pTask->chkInfo.failedId = pTask->chkInfo.checkpointingId; - pTask->chkInfo.checkpointId = pTask->chkInfo.checkpointingId; } int32_t getChkpMeta(char* id, char* path, SArray* list) { @@ -485,7 +490,7 @@ int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) { code = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_CHECKPOINT_DONE); taosThreadMutexUnlock(&pTask->lock); - streamTaskSetFailedId(pTask); + streamTaskSetCheckpointFailedId(pTask); stDebug("s-task:%s clear checkpoint flag since gen checkpoint failed, checkpointId:%" PRId64, pTask->id.idStr, ckId); } diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index 5fb7db233f..bfa269b0d5 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -352,6 +352,7 @@ static int32_t doBuildDispatchMsg(SStreamTask* pTask, const SStreamDataBlock* pD return code; } + // it's a new vnode to receive dispatch msg, so add one if (pReqs[j].blockNum == 0) { atomic_add_fetch_32(&pTask->outputInfo.shuffleDispatcher.waitingRspCnt, 1); } @@ -372,8 +373,15 @@ static int32_t doBuildDispatchMsg(SStreamTask* pTask, const SStreamDataBlock* pD pTask->msgInfo.pData = pReqs; } - stDebug("s-task:%s build dispatch msg success, msgId:%d, stage:%" PRId64, pTask->id.idStr, pTask->execInfo.dispatch, - pTask->pMeta->stage); + if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) { + stDebug("s-task:%s build dispatch msg success, msgId:%d, stage:%" PRId64 " %p", pTask->id.idStr, + pTask->execInfo.dispatch, pTask->pMeta->stage, pTask->msgInfo.pData); + } else { + stDebug("s-task:%s build dispatch msg success, msgId:%d, stage:%" PRId64 " dstVgNum:%d %p", pTask->id.idStr, + pTask->execInfo.dispatch, pTask->pMeta->stage, pTask->outputInfo.shuffleDispatcher.waitingRspCnt, + pTask->msgInfo.pData); + } + return code; } @@ -395,9 +403,11 @@ static int32_t sendDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pDispatch SArray* vgInfo = pTask->outputInfo.shuffleDispatcher.dbInfo.pVgroupInfos; int32_t numOfVgroups = taosArrayGetSize(vgInfo); - stDebug("s-task:%s (child taskId:%d) start to shuffle-dispatch blocks to %d vgroup(s), msgId:%d", id, - pTask->info.selfChildId, numOfVgroups, msgId); + int32_t actualVgroups = pTask->outputInfo.shuffleDispatcher.waitingRspCnt; + stDebug("s-task:%s (child taskId:%d) start to shuffle-dispatch blocks to %d/%d vgroup(s), msgId:%d", id, + pTask->info.selfChildId, actualVgroups, numOfVgroups, msgId); + int32_t numOfSend = 0; for (int32_t i = 0; i < numOfVgroups; i++) { if (pDispatchMsg[i].blockNum > 0) { SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i); @@ -408,6 +418,11 @@ static int32_t sendDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pDispatch if (code < 0) { break; } + + // no need to try remain, all already send. + if (++numOfSend == actualVgroups) { + break; + } } } @@ -1081,6 +1096,7 @@ int32_t streamNotifyUpstreamContinue(SStreamTask* pTask) { // this message has been sent successfully, let's try next one. static int32_t handleDispatchSuccessRsp(SStreamTask* pTask, int32_t downstreamId) { + stDebug("s-task:%s destroy dispatch msg:%p", pTask->id.idStr, pTask->msgInfo.pData); destroyDispatchMsg(pTask->msgInfo.pData, getNumOfDispatchBranch(pTask)); bool delayDispatch = (pTask->msgInfo.dispatchMsgType == STREAM_INPUT__CHECKPOINT_TRIGGER); diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 6c5b1ef994..d09d370a36 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -266,11 +266,12 @@ int32_t streamTaskSetDb(SStreamMeta* pMeta, void* arg, char* key) { if (pBackend == NULL) { taosThreadMutexUnlock(&pMeta->backendMutex); taosMsleep(1000); - stDebug("backed holded by other task, restart later, path: %s, key: %s", pMeta->path, key); + stDebug("backend held by other task, restart later, path:%s, key:%s", pMeta->path, key); } else { taosThreadMutexUnlock(&pMeta->backendMutex); break; } + taosThreadMutexLock(&pMeta->backendMutex); pBackend = taskDbOpen(pMeta->path, key, chkpId); } @@ -297,7 +298,9 @@ void streamMetaRemoveDB(void* arg, char* key) { taosThreadMutexUnlock(&pMeta->backendMutex); } -SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId, int64_t stage) { + +SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId, int64_t stage, + startComplete_fn_t fn) { int32_t code = -1; SStreamMeta* pMeta = taosMemoryCalloc(1, sizeof(SStreamMeta)); if (pMeta == NULL) { @@ -363,20 +366,9 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF pMeta->stage = stage; pMeta->role = (vgId == SNODE_HANDLE) ? NODE_ROLE_LEADER : NODE_ROLE_UNINIT; + pMeta->startInfo.completeFn = fn; pMeta->pTaskDbUnique = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); - // pMeta->chkpId = streamGetLatestCheckpointId(pMeta); - // pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId); - // while (pMeta->streamBackend == NULL) { - // qError("vgId:%d failed to init stream backend", pMeta->vgId); - // taosMsleep(2 * 1000); - // qInfo("vgId:%d retry to init stream backend", pMeta->vgId); - // pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId); - // if (pMeta->streamBackend == NULL) { - // } - // } - // pMeta->streamBackendRid = taosAddRef(streamBackendId, pMeta->streamBackend); - pMeta->numOfPausedTasks = 0; pMeta->numOfStreamTasks = 0; stInfo("vgId:%d open stream meta successfully, latest checkpoint:%" PRId64 ", stage:%" PRId64, vgId, pMeta->chkpId, @@ -384,6 +376,17 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF pMeta->rid = taosAddRef(streamMetaId, pMeta); + // set the attribute when running on Linux OS +#if defined LINUX + TdThreadRwlockAttr attr; + taosThreadRwlockAttrInit(&attr); + + pthread_rwlockattr_setkind_np(&attr, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP); + taosThreadRwlockInit(&pMeta->lock, &attr); + + taosThreadRwlockAttrDestroy(&attr); +#endif + int64_t* pRid = taosMemoryMalloc(sizeof(int64_t)); memcpy(pRid, &pMeta->rid, sizeof(pMeta->rid)); metaRefMgtAdd(pMeta->vgId, pRid); @@ -458,7 +461,6 @@ void streamMetaClear(SStreamMeta* pMeta) { taosRemoveRef(streamBackendId, pMeta->streamBackendRid); taosHashClear(pMeta->pTasksMap); - taosHashClear(pMeta->pTaskDbUnique); taosArrayClear(pMeta->pTaskList); taosArrayClear(pMeta->chkpSaved); @@ -617,22 +619,23 @@ int32_t streamMetaGetNumOfTasks(SStreamMeta* pMeta) { return (int32_t)size; } -SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId) { - streamMetaRLock(pMeta); - +SStreamTask* streamMetaAcquireTaskNoLock(SStreamMeta* pMeta, int64_t streamId, int32_t taskId) { STaskId id = {.streamId = streamId, .taskId = taskId}; SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); - if (ppTask != NULL) { - if (!streamTaskShouldStop(*ppTask)) { - int32_t ref = atomic_add_fetch_32(&(*ppTask)->refCnt, 1); - streamMetaRUnLock(pMeta); - stTrace("s-task:%s acquire task, ref:%d", (*ppTask)->id.idStr, ref); - return *ppTask; - } + if (ppTask == NULL || streamTaskShouldStop(*ppTask)) { + return NULL; } + int32_t ref = atomic_add_fetch_32(&(*ppTask)->refCnt, 1); + stTrace("s-task:%s acquire task, ref:%d", (*ppTask)->id.idStr, ref); + return *ppTask; +} + +SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId) { + streamMetaRLock(pMeta); + SStreamTask* p = streamMetaAcquireTaskNoLock(pMeta, streamId, taskId); streamMetaRUnLock(pMeta); - return NULL; + return p; } void streamMetaReleaseTask(SStreamMeta* UNUSED_PARAM(pMeta), SStreamTask* pTask) { @@ -950,6 +953,7 @@ int32_t tEncodeStreamHbMsg(SEncoder* pEncoder, const SStreamHbMsg* pReq) { if (tEncodeI64(pEncoder, ps->verEnd) < 0) return -1; if (tEncodeI64(pEncoder, ps->activeCheckpointId) < 0) return -1; if (tEncodeI8(pEncoder, ps->checkpointFailed) < 0) return -1; + if (tEncodeI32(pEncoder, ps->chkpointTransId) < 0) return -1; } int32_t numOfVgs = taosArrayGetSize(pReq->pUpdateNodes); @@ -988,6 +992,7 @@ int32_t tDecodeStreamHbMsg(SDecoder* pDecoder, SStreamHbMsg* pReq) { if (tDecodeI64(pDecoder, &entry.verEnd) < 0) return -1; if (tDecodeI64(pDecoder, &entry.activeCheckpointId) < 0) return -1; if (tDecodeI8(pDecoder, (int8_t*)&entry.checkpointFailed) < 0) return -1; + if (tDecodeI32(pDecoder, &entry.chkpointTransId) < 0) return -1; entry.id.taskId = taskId; taosArrayPush(pReq->pTaskStatus, &entry); @@ -1061,68 +1066,20 @@ static void addUpdateNodeIntoHbMsg(SStreamTask* pTask, SStreamHbMsg* pMsg) { taosThreadMutexUnlock(&pTask->lock); } -void metaHbToMnode(void* param, void* tmrId) { - int64_t rid = *(int64_t*)param; - - SStreamMeta* pMeta = taosAcquireRef(streamMetaId, rid); - if (pMeta == NULL) { - return; - } - - // need to stop, stop now - if (pMeta->pHbInfo->stopFlag == STREAM_META_WILL_STOP) { - pMeta->pHbInfo->stopFlag = STREAM_META_OK_TO_STOP; - stDebug("vgId:%d jump out of meta timer", pMeta->vgId); - taosReleaseRef(streamMetaId, rid); - return; - } - - // not leader not send msg - if (pMeta->role != NODE_ROLE_LEADER) { - stInfo("vgId:%d role:%d not leader not send hb to mnode", pMeta->vgId, pMeta->role); - taosReleaseRef(streamMetaId, rid); - pMeta->pHbInfo->hbStart = 0; - return; - } - - // set the hb start time - if (pMeta->pHbInfo->hbStart == 0) { - pMeta->pHbInfo->hbStart = taosGetTimestampMs(); - } - - if (!waitForEnoughDuration(pMeta->pHbInfo)) { - taosTmrReset(metaHbToMnode, META_HB_CHECK_INTERVAL, param, streamTimer, &pMeta->pHbInfo->hbTmr); - taosReleaseRef(streamMetaId, rid); - return; - } - - stDebug("vgId:%d build stream task hb, leader:%d", pMeta->vgId, (pMeta->role == NODE_ROLE_LEADER)); - +static int32_t metaHeartbeatToMnodeImpl(SStreamMeta* pMeta) { SStreamHbMsg hbMsg = {0}; SEpSet epset = {0}; bool hasMnodeEpset = false; - int64_t stage = 0; + int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); - streamMetaRLock(pMeta); - - int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); hbMsg.vgId = pMeta->vgId; - stage = pMeta->stage; - - SArray* pIdList = taosArrayDup(pMeta->pTaskList, NULL); - - streamMetaRUnLock(pMeta); - hbMsg.pTaskStatus = taosArrayInit(numOfTasks, sizeof(STaskStatusEntry)); hbMsg.pUpdateNodes = taosArrayInit(numOfTasks, sizeof(int32_t)); for (int32_t i = 0; i < numOfTasks; ++i) { - STaskId* pId = taosArrayGet(pIdList, i); + STaskId* pId = taosArrayGet(pMeta->pTaskList, i); - streamMetaRLock(pMeta); SStreamTask** pTask = taosHashGet(pMeta->pTasksMap, pId, sizeof(*pId)); - streamMetaRUnLock(pMeta); - if (pTask == NULL) { continue; } @@ -1136,7 +1093,7 @@ void metaHbToMnode(void* param, void* tmrId) { .id = *pId, .status = streamTaskGetStatus(*pTask, NULL), .nodeId = hbMsg.vgId, - .stage = stage, + .stage = pMeta->stage, .inputQUsed = SIZE_IN_MiB(streamQueueGetItemSize((*pTask)->inputq.queue)), }; @@ -1149,6 +1106,11 @@ void metaHbToMnode(void* param, void* tmrId) { if ((*pTask)->chkInfo.checkpointingId != 0) { entry.checkpointFailed = ((*pTask)->chkInfo.failedId >= (*pTask)->chkInfo.checkpointingId); entry.activeCheckpointId = (*pTask)->chkInfo.checkpointingId; + entry.chkpointTransId = (*pTask)->chkInfo.transId; + + if (entry.checkpointFailed) { + stInfo("s-task:%s send kill checkpoint trans info, transId:%d", (*pTask)->id.idStr, (*pTask)->chkInfo.transId); + } } if ((*pTask)->exec.pWalReader != NULL) { @@ -1191,30 +1153,70 @@ void metaHbToMnode(void* param, void* tmrId) { } tEncoderClear(&encoder); - SRpcMsg msg = { - .info.noResp = 1, - }; + SRpcMsg msg = {.info.noResp = 1}; initRpcMsg(&msg, TDMT_MND_STREAM_HEARTBEAT, buf, tlen); pMeta->pHbInfo->hbCount += 1; - stDebug("vgId:%d build and send hb to mnode, numOfTasks:%d total:%d", pMeta->vgId, hbMsg.numOfTasks, pMeta->pHbInfo->hbCount); + tmsgSendReq(&epset, &msg); } else { stDebug("vgId:%d no tasks and no mnd epset, not send stream hb to mnode", pMeta->vgId); } -_end: + _end: streamMetaClearHbMsg(&hbMsg); - taosArrayDestroy(pIdList); + return TSDB_CODE_SUCCESS; +} + +void metaHbToMnode(void* param, void* tmrId) { + int64_t rid = *(int64_t*)param; + + SStreamMeta* pMeta = taosAcquireRef(streamMetaId, rid); + if (pMeta == NULL) { + return; + } + + // need to stop, stop now + if (pMeta->pHbInfo->stopFlag == STREAM_META_WILL_STOP) { + pMeta->pHbInfo->stopFlag = STREAM_META_OK_TO_STOP; + stDebug("vgId:%d jump out of meta timer", pMeta->vgId); + taosReleaseRef(streamMetaId, rid); + return; + } + + // not leader not send msg + if (pMeta->role != NODE_ROLE_LEADER) { + stInfo("vgId:%d role:%d not leader not send hb to mnode", pMeta->vgId, pMeta->role); + taosReleaseRef(streamMetaId, rid); + pMeta->pHbInfo->hbStart = 0; + return; + } + + // set the hb start time + if (pMeta->pHbInfo->hbStart == 0) { + pMeta->pHbInfo->hbStart = taosGetTimestampMs(); + } + + if (!waitForEnoughDuration(pMeta->pHbInfo)) { + taosTmrReset(metaHbToMnode, META_HB_CHECK_INTERVAL, param, streamTimer, &pMeta->pHbInfo->hbTmr); + taosReleaseRef(streamMetaId, rid); + return; + } + + stDebug("vgId:%d build stream task hb, leader:%d", pMeta->vgId, (pMeta->role == NODE_ROLE_LEADER)); + streamMetaRLock(pMeta); + metaHeartbeatToMnodeImpl(pMeta); + streamMetaRUnLock(pMeta); + taosTmrReset(metaHbToMnode, META_HB_CHECK_INTERVAL, param, streamTimer, &pMeta->pHbInfo->hbTmr); taosReleaseRef(streamMetaId, rid); } bool streamMetaTaskInTimer(SStreamMeta* pMeta) { bool inTimer = false; - streamMetaWLock(pMeta); + streamMetaRLock(pMeta); void* pIter = NULL; while (1) { @@ -1225,11 +1227,12 @@ bool streamMetaTaskInTimer(SStreamMeta* pMeta) { SStreamTask* pTask = *(SStreamTask**)pIter; if (pTask->status.timerActive >= 1) { + stDebug("s-task:%s in timer, blocking tasks in vgId:%d restart", pTask->id.idStr, pMeta->vgId); inTimer = true; } } - streamMetaWUnLock(pMeta); + streamMetaRUnLock(pMeta); return inTimer; } @@ -1288,26 +1291,37 @@ void streamMetaResetStartInfo(STaskStartInfo* pStartInfo) { taosHashClear(pStartInfo->pFailedTaskSet); pStartInfo->tasksWillRestart = 0; pStartInfo->readyTs = 0; + // reset the sentinel flag value to be 0 - atomic_store_32(&pStartInfo->taskStarting, 0); + pStartInfo->taskStarting = 0; } void streamMetaRLock(SStreamMeta* pMeta) { stTrace("vgId:%d meta-rlock", pMeta->vgId); - taosRLockLatch(&pMeta->lock); + taosThreadRwlockRdlock(&pMeta->lock); } + void streamMetaRUnLock(SStreamMeta* pMeta) { stTrace("vgId:%d meta-runlock", pMeta->vgId); - taosRUnLockLatch(&pMeta->lock); + int32_t code = taosThreadRwlockUnlock(&pMeta->lock); + if (code != TSDB_CODE_SUCCESS) { + stError("vgId:%d meta-runlock failed, code:%d", pMeta->vgId, code); + } else { + stDebug("vgId:%d meta-runlock completed", pMeta->vgId); + } } + void streamMetaWLock(SStreamMeta* pMeta) { stTrace("vgId:%d meta-wlock", pMeta->vgId); - taosWLockLatch(&pMeta->lock); + taosThreadRwlockWrlock(&pMeta->lock); + stTrace("vgId:%d meta-wlock completed", pMeta->vgId); } + void streamMetaWUnLock(SStreamMeta* pMeta) { stTrace("vgId:%d meta-wunlock", pMeta->vgId); - taosWUnLockLatch(&pMeta->lock); + taosThreadRwlockUnlock(&pMeta->lock); } + static void execHelper(struct SSchedMsg* pSchedMsg) { __async_exec_fn_t execFn = (__async_exec_fn_t)pSchedMsg->ahandle; int32_t code = execFn(pSchedMsg->thandle); @@ -1324,3 +1338,165 @@ int32_t streamMetaAsyncExec(SStreamMeta* pMeta, __stream_async_exec_fn_t fn, voi schedMsg.msg = code; return taosScheduleTask(pMeta->qHandle, &schedMsg); } + +SArray* streamMetaSendMsgBeforeCloseTasks(SStreamMeta* pMeta) { + SArray* pTaskList = taosArrayDup(pMeta->pTaskList, NULL); + + bool sendMsg = pMeta->sendMsgBeforeClosing; + if (!sendMsg) { + stDebug("vgId:%d no need to send msg to mnode before closing tasks", pMeta->vgId); + return pTaskList; + } + + stDebug("vgId:%d send msg to mnode before closing all tasks", pMeta->vgId); + + // send hb msg to mnode before closing all tasks. + int32_t numOfTasks = taosArrayGetSize(pTaskList); + for (int32_t i = 0; i < numOfTasks; ++i) { + SStreamTaskId* pTaskId = taosArrayGet(pTaskList, i); + SStreamTask* pTask = streamMetaAcquireTaskNoLock(pMeta, pTaskId->streamId, pTaskId->taskId); + if (pTask == NULL) { + continue; + } + + taosThreadMutexLock(&pTask->lock); + char* p = NULL; + ETaskStatus s = streamTaskGetStatus(pTask, &p); + if (s == TASK_STATUS__CK) { + streamTaskSetCheckpointFailedId(pTask); + stDebug("s-task:%s mark the checkpoint:%"PRId64" failed", pTask->id.idStr, pTask->chkInfo.checkpointingId); + } else { + stDebug("s-task:%s status:%s not reset the checkpoint", pTask->id.idStr, p); + } + + taosThreadMutexUnlock(&pTask->lock); + streamMetaReleaseTask(pMeta, pTask); + } + + metaHeartbeatToMnodeImpl(pMeta); + pMeta->sendMsgBeforeClosing = false; + return pTaskList; +} + +void streamMetaUpdateStageRole(SStreamMeta* pMeta, int64_t stage, bool isLeader) { + streamMetaWLock(pMeta); + + int64_t prevStage = pMeta->stage; + pMeta->stage = stage; + + // mark the sign to send msg before close all tasks + if ((!isLeader) && (pMeta->role == NODE_ROLE_LEADER)) { + pMeta->sendMsgBeforeClosing = true; + } + + pMeta->role = (isLeader)? NODE_ROLE_LEADER:NODE_ROLE_FOLLOWER; + streamMetaWUnLock(pMeta); + + if (isLeader) { + stInfo("vgId:%d update meta stage:%" PRId64 ", prev:%" PRId64 " leader:%d, start to send Hb", pMeta->vgId, + prevStage, stage, isLeader); + streamMetaStartHb(pMeta); + } else { + stInfo("vgId:%d update meta stage:%" PRId64 " prev:%" PRId64 " leader:%d sendMsg beforeClosing:%d", pMeta->vgId, + prevStage, stage, isLeader, pMeta->sendMsgBeforeClosing); + } +} + +int32_t streamMetaStopAllTasks(SStreamMeta* pMeta) { + streamMetaRLock(pMeta); + + int32_t num = taosArrayGetSize(pMeta->pTaskList); + stDebug("vgId:%d stop all %d stream task(s)", pMeta->vgId, num); + if (num == 0) { + streamMetaRUnLock(pMeta); + return TSDB_CODE_SUCCESS; + } + + // send hb msg to mnode before closing all tasks. + SArray* pTaskList = streamMetaSendMsgBeforeCloseTasks(pMeta); + int32_t numOfTasks = taosArrayGetSize(pTaskList); + + for (int32_t i = 0; i < numOfTasks; ++i) { + SStreamTaskId* pTaskId = taosArrayGet(pTaskList, i); + SStreamTask* pTask = streamMetaAcquireTaskNoLock(pMeta, pTaskId->streamId, pTaskId->taskId); + if (pTask == NULL) { + continue; + } + + streamTaskStop(pTask); + streamMetaReleaseTask(pMeta, pTask); + } + + taosArrayDestroy(pTaskList); + + streamMetaRUnLock(pMeta); + return 0; +} + +int32_t streamMetaStartAllTasks(SStreamMeta* pMeta) { + int32_t code = TSDB_CODE_SUCCESS; + int32_t vgId = pMeta->vgId; + + int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); + stInfo("vgId:%d start to check all %d stream task(s) downstream status", vgId, numOfTasks); + if (numOfTasks == 0) { + stInfo("vgId:%d start tasks completed", pMeta->vgId); + return TSDB_CODE_SUCCESS; + } + + SArray* pTaskList = NULL; + streamMetaWLock(pMeta); + pTaskList = taosArrayDup(pMeta->pTaskList, NULL); + taosHashClear(pMeta->startInfo.pReadyTaskSet); + taosHashClear(pMeta->startInfo.pFailedTaskSet); + pMeta->startInfo.startTs = taosGetTimestampMs(); + streamMetaWUnLock(pMeta); + + // broadcast the check downstream tasks msg + int64_t now = taosGetTimestampMs(); + + for (int32_t i = 0; i < numOfTasks; ++i) { + SStreamTaskId* pTaskId = taosArrayGet(pTaskList, i); + + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pTaskId->streamId, pTaskId->taskId); + if (pTask == NULL) { + stError("vgId:%d failed to acquire task:0x%x during start tasks", pMeta->vgId, pTaskId->taskId); + streamMetaUpdateTaskDownstreamStatus(pMeta, pTaskId->streamId, pTaskId->taskId, 0, now, false); + continue; + } + + // fill-history task can only be launched by related stream tasks. + STaskExecStatisInfo* pInfo = &pTask->execInfo; + if (pTask->info.fillHistory == 1) { + streamMetaReleaseTask(pMeta, pTask); + continue; + } + + if (pTask->status.downstreamReady == 1) { + if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { + stDebug("s-task:%s downstream ready, no need to check downstream, check only related fill-history task", + pTask->id.idStr); + streamLaunchFillHistoryTask(pTask); + } + + streamMetaUpdateTaskDownstreamStatus(pMeta, pTaskId->streamId, pTaskId->taskId, pInfo->init, pInfo->start, true); + streamMetaReleaseTask(pMeta, pTask); + continue; + } + + EStreamTaskEvent event = (HAS_RELATED_FILLHISTORY_TASK(pTask)) ? TASK_EVENT_INIT_STREAM_SCANHIST : TASK_EVENT_INIT; + int32_t ret = streamTaskHandleEvent(pTask->status.pSM, event); + if (ret != TSDB_CODE_SUCCESS) { + stError("vgId:%d failed to handle event:%d", pMeta->vgId, event); + code = ret; + + streamMetaUpdateTaskDownstreamStatus(pMeta, pTaskId->streamId, pTaskId->taskId, pInfo->init, pInfo->start, false); + } + + streamMetaReleaseTask(pMeta, pTask); + } + + stInfo("vgId:%d start tasks completed", pMeta->vgId); + taosArrayDestroy(pTaskList); + return code; +} \ No newline at end of file diff --git a/source/libs/stream/src/streamStart.c b/source/libs/stream/src/streamStart.c index cda5eca612..ea5e2edc09 100644 --- a/source/libs/stream/src/streamStart.c +++ b/source/libs/stream/src/streamStart.c @@ -48,9 +48,9 @@ static void tryLaunchHistoryTask(void* param, void* tmrId); static void doProcessDownstreamReadyRsp(SStreamTask* pTask); int32_t streamTaskSetReady(SStreamTask* pTask) { - char* p = NULL; - int32_t numOfDowns = streamTaskGetNumOfDownstream(pTask); - ETaskStatus status = streamTaskGetStatus(pTask, &p); + char* p = NULL; + int32_t numOfDowns = streamTaskGetNumOfDownstream(pTask); + ETaskStatus status = streamTaskGetStatus(pTask, &p); if ((status == TASK_STATUS__SCAN_HISTORY || status == TASK_STATUS__STREAM_SCAN_HISTORY) && pTask->info.taskLevel != TASK_LEVEL__SOURCE) { @@ -66,9 +66,6 @@ int32_t streamTaskSetReady(SStreamTask* pTask) { int64_t el = (pTask->execInfo.start - pTask->execInfo.init); stDebug("s-task:%s all %d downstream ready, init completed, elapsed time:%" PRId64 "ms, task status:%s", pTask->id.idStr, numOfDowns, el, p); - - streamMetaUpdateTaskDownstreamStatus(pTask->pMeta, pTask->id.streamId, pTask->id.taskId, pTask->execInfo.init, - pTask->execInfo.start, true); return TSDB_CODE_SUCCESS; } @@ -209,7 +206,11 @@ void streamTaskCheckDownstream(SStreamTask* pTask) { int32_t numOfVgs = taosArrayGetSize(vgInfo); pTask->notReadyTasks = numOfVgs; - pTask->checkReqIds = taosArrayInit(numOfVgs, sizeof(int64_t)); + if (pTask->checkReqIds == NULL) { + pTask->checkReqIds = taosArrayInit(numOfVgs, sizeof(int64_t)); + } else { + taosArrayClear(pTask->checkReqIds); + } stDebug("s-task:%s check %d downstream tasks, ver:%" PRId64 "-%" PRId64 " window:%" PRId64 "-%" PRId64, pTask->id.idStr, numOfVgs, pRange->range.minVer, pRange->range.maxVer, pWindow->skey, pWindow->ekey); @@ -312,9 +313,24 @@ int32_t streamTaskCheckStatus(SStreamTask* pTask, int32_t upstreamTaskId, int32_ stError("s-task:%s receive check msg from upstream task:0x%x(vgId:%d), new stage received:%" PRId64 ", prev:%" PRId64, id, upstreamTaskId, vgId, stage, pInfo->stage); + // record the checkpoint failure id and sent to mnode + taosThreadMutexLock(&pTask->lock); + ETaskStatus status = streamTaskGetStatus(pTask, NULL); + if (status == TASK_STATUS__CK) { + streamTaskSetCheckpointFailedId(pTask); + } + taosThreadMutexUnlock(&pTask->lock); } if (pInfo->stage != stage) { + + taosThreadMutexLock(&pTask->lock); + ETaskStatus status = streamTaskGetStatus(pTask, NULL); + if (status == TASK_STATUS__CK) { + streamTaskSetCheckpointFailedId(pTask); + } + taosThreadMutexUnlock(&pTask->lock); + return TASK_UPSTREAM_NEW_STAGE; } else if (pTask->status.downstreamReady != 1) { stDebug("s-task:%s vgId:%d leader:%d, downstream not ready", id, vgId, (pTask->pMeta->role == NODE_ROLE_LEADER)); @@ -363,10 +379,11 @@ int32_t streamTaskOnScanhistoryTaskReady(SStreamTask* pTask) { stDebug("s-task:%s enter into scan-history data stage, status:%s", id, p); streamTaskStartScanHistory(pTask); - // start the related fill-history task, when current task is ready - if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { - streamLaunchFillHistoryTask(pTask); - } + // NOTE: there will be an deadlock if launch fill history here. +// // start the related fill-history task, when current task is ready +// if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { +// streamLaunchFillHistoryTask(pTask); +// } return TSDB_CODE_SUCCESS; } @@ -380,6 +397,17 @@ void doProcessDownstreamReadyRsp(SStreamTask* pTask) { } streamTaskOnHandleEventSuccess(pTask->status.pSM, event); + + int64_t initTs = pTask->execInfo.init; + int64_t startTs = pTask->execInfo.start; + streamMetaUpdateTaskDownstreamStatus(pTask->pMeta, pTask->id.streamId, pTask->id.taskId, initTs, startTs, true); + + // start the related fill-history task, when current task is ready + // not invoke in success callback due to the deadlock. + if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { + stDebug("s-task:%s try to launch related fill-history task", pTask->id.idStr); + streamLaunchFillHistoryTask(pTask); + } } static void addIntoNodeUpdateList(SStreamTask* pTask, int32_t nodeId) { @@ -436,8 +464,7 @@ int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRs ASSERT(left >= 0); if (left == 0) { - taosArrayDestroy(pTask->checkReqIds); - pTask->checkReqIds = NULL; + pTask->checkReqIds = taosArrayDestroy(pTask->checkReqIds);; doProcessDownstreamReadyRsp(pTask); } else { @@ -458,8 +485,7 @@ int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRs if (pRsp->status == TASK_UPSTREAM_NEW_STAGE) { stError("s-task:%s vgId:%d self vnode-transfer/leader-change/restart detected, old stage:%" PRId64 ", current stage:%" PRId64 - ", " - "not check wait for downstream task nodeUpdate, and all tasks restart", + ", not check wait for downstream task nodeUpdate, and all tasks restart", id, pRsp->upstreamNodeId, pRsp->oldStage, pTask->pMeta->stage); addIntoNodeUpdateList(pTask, pRsp->upstreamNodeId); } else { @@ -1096,19 +1122,23 @@ int32_t streamMetaUpdateTaskDownstreamStatus(SStreamMeta* pMeta, int64_t streamI pStartInfo->readyTs = taosGetTimestampMs(); pStartInfo->elapsedTime = (pStartInfo->startTs != 0) ? pStartInfo->readyTs - pStartInfo->startTs : 0; - stDebug("vgId:%d all %d task(s) check downstream completed, last completed task:0x%x startTs:%" PRId64 + stDebug("vgId:%d all %d task(s) check downstream completed, last completed task:0x%x (succ:%d) startTs:%" PRId64 ", readyTs:%" PRId64 " total elapsed time:%.2fs", - pMeta->vgId, numOfTotal, taskId, pStartInfo->startTs, pStartInfo->readyTs, + pMeta->vgId, numOfTotal, taskId, ready, pStartInfo->startTs, pStartInfo->readyTs, pStartInfo->elapsedTime / 1000.0); // print the initialization elapsed time and info displayStatusInfo(pMeta, pStartInfo->pReadyTaskSet, true); displayStatusInfo(pMeta, pStartInfo->pFailedTaskSet, false); streamMetaResetStartInfo(pStartInfo); + streamMetaWUnLock(pMeta); + + pStartInfo->completeFn(pMeta); } else { - stDebug("vgId:%d recv check down results:%d, total:%d", pMeta->vgId, numOfRecv, numOfTotal); + streamMetaWUnLock(pMeta); + stDebug("vgId:%d recv check downstream results, s-task:0x%x succ:%d, received:%d, total:%d", pMeta->vgId, taskId, + ready, numOfRecv, numOfTotal); } - streamMetaWUnLock(pMeta); return TSDB_CODE_SUCCESS; } diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 335f9d27d5..4d343a484d 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -796,5 +796,6 @@ void streamTaskStatusCopy(STaskStatusEntry* pDst, const STaskStatusEntry* pSrc) pDst->sinkDataSize = pSrc->sinkDataSize; pDst->activeCheckpointId = pSrc->activeCheckpointId; pDst->checkpointFailed = pSrc->checkpointFailed; + pDst->chkpointTransId = pSrc->chkpointTransId; } diff --git a/source/libs/stream/src/streamTaskSm.c b/source/libs/stream/src/streamTaskSm.c index 68ae1cce36..51e6ad23fe 100644 --- a/source/libs/stream/src/streamTaskSm.c +++ b/source/libs/stream/src/streamTaskSm.c @@ -160,6 +160,45 @@ static STaskStateTrans* streamTaskFindTransform(ETaskStatus state, const EStream return NULL; } +static int32_t doHandleWaitingEvent(SStreamTaskSM* pSM, const char* pEventName, SStreamTask* pTask) { + int32_t code = TSDB_CODE_SUCCESS; + int64_t el = (taosGetTimestampMs() - pSM->startTs); + stDebug("s-task:%s handle event:%s completed, elapsed time:%" PRId64 "ms state:%s -> %s", pTask->id.idStr, + pEventName, el, pSM->prev.state.name, pSM->current.name); + + SAttachedEventInfo* pEvtInfo = taosArrayGet(pSM->pWaitingEventList, 0); + + // OK, let's handle the attached event, since the task has reached the required status now + if (pSM->current.state == pEvtInfo->status) { + stDebug("s-task:%s handle the event:%s in waiting list, state:%s", pTask->id.idStr, + GET_EVT_NAME(pEvtInfo->event), pSM->current.name); + + // remove it + taosArrayPop(pSM->pWaitingEventList); + + STaskStateTrans* pNextTrans = streamTaskFindTransform(pSM->current.state, pEvtInfo->event); + ASSERT(pSM->pActiveTrans == NULL && pNextTrans != NULL); + + pSM->pActiveTrans = pNextTrans; + pSM->startTs = taosGetTimestampMs(); + taosThreadMutexUnlock(&pTask->lock); + + code = pNextTrans->pAction(pSM->pTask); + if (pNextTrans->autoInvokeEndFn) { + return streamTaskOnHandleEventSuccess(pSM, pNextTrans->event); + } else { + return code; + } + } else { + taosThreadMutexUnlock(&pTask->lock); + stDebug("s-task:%s state:%s event:%s in waiting list, req state:%s not fulfilled, put it back", pTask->id.idStr, + pSM->current.name, GET_EVT_NAME(pEvtInfo->event), + StreamTaskStatusList[pEvtInfo->status].name); + } + + return code; +} + void streamTaskRestoreStatus(SStreamTask* pTask) { SStreamTaskSM* pSM = pTask->status.pSM; @@ -175,7 +214,13 @@ void streamTaskRestoreStatus(SStreamTask* pTask) { pSM->prev.evt = 0; pSM->startTs = taosGetTimestampMs(); - stDebug("s-task:%s restore status, %s -> %s", pTask->id.idStr, pSM->prev.state.name, pSM->current.name); + + if (taosArrayGetSize(pSM->pWaitingEventList) > 0) { + stDebug("s-task:%s restore status, %s -> %s, and then handle waiting event", pTask->id.idStr, pSM->prev.state.name, pSM->current.name); + doHandleWaitingEvent(pSM, "restore-pause/halt", pTask); + } else { + stDebug("s-task:%s restore status, %s -> %s", pTask->id.idStr, pSM->prev.state.name, pSM->current.name); + } taosThreadMutexUnlock(&pTask->lock); } @@ -343,39 +388,7 @@ int32_t streamTaskOnHandleEventSuccess(SStreamTaskSM* pSM, EStreamTaskEvent even pTrans->pSuccAction(pTask); if (taosArrayGetSize(pSM->pWaitingEventList) > 0) { - int64_t el = (taosGetTimestampMs() - pSM->startTs); - stDebug("s-task:%s handle event:%s completed, elapsed time:%" PRId64 "ms state:%s -> %s", pTask->id.idStr, - GET_EVT_NAME(pTrans->event), el, pSM->prev.state.name, pSM->current.name); - - SAttachedEventInfo* pEvtInfo = taosArrayGet(pSM->pWaitingEventList, 0); - - // OK, let's handle the attached event, since the task has reached the required status now - if (pSM->current.state == pEvtInfo->status) { - stDebug("s-task:%s handle the event:%s in waiting list, state:%s", pTask->id.idStr, - GET_EVT_NAME(pEvtInfo->event), pSM->current.name); - - // remove it - taosArrayPop(pSM->pWaitingEventList); - - STaskStateTrans* pNextTrans = streamTaskFindTransform(pSM->current.state, pEvtInfo->event); - ASSERT(pSM->pActiveTrans == NULL && pNextTrans != NULL); - - pSM->pActiveTrans = pNextTrans; - pSM->startTs = taosGetTimestampMs(); - taosThreadMutexUnlock(&pTask->lock); - - int32_t code = pNextTrans->pAction(pSM->pTask); - if (pNextTrans->autoInvokeEndFn) { - return streamTaskOnHandleEventSuccess(pSM, pNextTrans->event); - } else { - return code; - } - } else { - taosThreadMutexUnlock(&pTask->lock); - stDebug("s-task:%s state:%s event:%s in waiting list, req state:%s not fulfilled, put it back", pTask->id.idStr, - pSM->current.name, GET_EVT_NAME(pEvtInfo->event), - StreamTaskStatusList[pEvtInfo->status].name); - } + doHandleWaitingEvent(pSM, GET_EVT_NAME(pTrans->event), pTask); } else { taosThreadMutexUnlock(&pTask->lock); diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index b8740a2858..6f3b3fdf98 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -1162,9 +1162,12 @@ int32_t syncNodeRestore(SSyncNode* pSyncNode) { ASSERTS(pSyncNode->pLogStore != NULL, "log store not created"); ASSERTS(pSyncNode->pLogBuf != NULL, "ring log buffer not created"); + taosThreadMutexLock(&pSyncNode->pLogBuf->mutex); SyncIndex lastVer = pSyncNode->pLogStore->syncLogLastIndex(pSyncNode->pLogStore); SyncIndex commitIndex = pSyncNode->pLogStore->syncLogCommitIndex(pSyncNode->pLogStore); SyncIndex endIndex = pSyncNode->pLogBuf->endIndex; + taosThreadMutexUnlock(&pSyncNode->pLogBuf->mutex); + if (lastVer != -1 && endIndex != lastVer + 1) { terrno = TSDB_CODE_WAL_LOG_INCOMPLETE; sError("vgId:%d, failed to restore sync node since %s. expected lastLogIndex:%" PRId64 ", lastVer:%" PRId64 "", diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index f060e9da13..2277b70c8f 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -1087,7 +1087,10 @@ static int32_t syncSnapSenderExchgSnapInfo(SSyncNode *pSyncNode, SSyncSnapshotSe // sender static int32_t syncNodeOnSnapshotPrepRsp(SSyncNode *pSyncNode, SSyncSnapshotSender *pSender, SyncSnapshotRsp *pMsg) { + int32_t code = -1; SSnapshot snapshot = {0}; + + taosThreadMutexLock(&pSender->pSndBuf->mutex); pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapshot); // prepare @@ -1103,20 +1106,24 @@ static int32_t syncNodeOnSnapshotPrepRsp(SSyncNode *pSyncNode, SSyncSnapshotSend // start reader if (pMsg->payloadType == TDMT_SYNC_PREP_SNAPSHOT_REPLY) { if (syncSnapSenderExchgSnapInfo(pSyncNode, pSender, pMsg) != 0) { - return -1; + goto _out; } } - int32_t code = pSyncNode->pFsm->FpSnapshotStartRead(pSyncNode->pFsm, &pSender->snapshotParam, &pSender->pReader); + code = pSyncNode->pFsm->FpSnapshotStartRead(pSyncNode->pFsm, &pSender->snapshotParam, &pSender->pReader); if (code != 0) { sSError(pSender, "prepare snapshot failed since %s", terrstr()); - return -1; + goto _out; } // update next index syncIndexMgrSetIndex(pSyncNode->pNextIndex, &pMsg->srcId, snapshot.lastApplyIndex + 1); - return snapshotSend(pSender); + code = snapshotSend(pSender); + +_out: + taosThreadMutexUnlock(&pSender->pSndBuf->mutex); + return code; } static int32_t snapshotSenderSignatureCmp(SSyncSnapshotSender *pSender, SyncSnapshotRsp *pMsg) { diff --git a/source/os/test/osTests.cpp b/source/os/test/osTests.cpp index 16660a9477..4d9ad9b5bd 100644 --- a/source/os/test/osTests.cpp +++ b/source/os/test/osTests.cpp @@ -72,7 +72,7 @@ TEST(osTest, osSystem) { const int sysLen = 64; char osSysName[sysLen]; int ret = taosGetOsReleaseName(osSysName, NULL, NULL, sysLen); - printf("os systeme name:%s\n", osSysName); + printf("os system name:%s\n", osSysName); ASSERT_EQ(ret, 0); } diff --git a/source/util/src/tlog.c b/source/util/src/tlog.c index 184e18fc67..e113a95fcc 100644 --- a/source/util/src/tlog.c +++ b/source/util/src/tlog.c @@ -337,14 +337,10 @@ static int32_t taosOpenNewLogFile() { } void taosResetLog() { - char lastName[LOG_FILE_NAME_LEN + 20]; - sprintf(lastName, "%s.%d", tsLogObj.logName, tsLogObj.flag); - // force create a new log file tsLogObj.lines = tsNumOfLogLines + 10; taosOpenNewLogFile(); - (void)taosRemoveFile(lastName); uInfo("=================================="); uInfo(" reset log file "); diff --git a/tests/army/enterprise/multi-level/mlevel_basic.json b/tests/army/enterprise/multi-level/mlevel_basic.json new file mode 100644 index 0000000000..1c2b9274d2 --- /dev/null +++ b/tests/army/enterprise/multi-level/mlevel_basic.json @@ -0,0 +1,58 @@ +{ + "filetype": "insert", + "cfgdir": "/etc/taos", + "host": "127.0.0.1", + "port": 6030, + "user": "root", + "password": "taosdata", + "connection_pool_size": 8, + "num_of_records_per_req": 2000, + "thread_count": 2, + "create_table_thread_count": 1, + "confirm_parameter_prompt": "no", + "databases": [ + { + "dbinfo": { + "name": "db", + "drop": "yes", + "vgroups": 2, + "replica": 1, + "duration":"1d", + "keep": "3d,6d,30d" + }, + "super_tables": [ + { + "name": "stb", + "child_table_exists": "no", + "childtable_count": 4, + "insert_rows": 1000000, + "childtable_prefix": "d", + "insert_mode": "taosc", + "timestamp_step": 1000, + "start_timestamp":"now-12d", + "columns": [ + { "type": "bool", "name": "bc"}, + { "type": "float", "name": "fc" }, + { "type": "double", "name": "dc"}, + { "type": "tinyint", "name": "ti", "values":["1"]}, + { "type": "smallint", "name": "si" }, + { "type": "int", "name": "ic" }, + { "type": "bigint", "name": "bi" }, + { "type": "utinyint", "name": "uti"}, + { "type": "usmallint", "name": "usi"}, + { "type": "uint", "name": "ui" }, + { "type": "ubigint", "name": "ubi"}, + { "type": "binary", "name": "bin", "len": 32}, + { "type": "nchar", "name": "nch", "len": 64} + ], + "tags": [ + {"type": "tinyint", "name": "groupid","max": 10,"min": 1}, + {"name": "location","type": "binary", "len": 16, "values": + ["San Francisco", "Los Angles", "San Diego", "San Jose", "Palo Alto", "Campbell", "Mountain View","Sunnyvale", "Santa Clara", "Cupertino"] + } + ] + } + ] + } + ] +} diff --git a/tests/army/enterprise/multi-level/mlevel_basic.py b/tests/army/enterprise/multi-level/mlevel_basic.py index e3a3f57c74..3bec2bfb72 100644 --- a/tests/army/enterprise/multi-level/mlevel_basic.py +++ b/tests/army/enterprise/multi-level/mlevel_basic.py @@ -15,26 +15,62 @@ import sys import time import taos +import frame +import frame.etool + from frame.log import * from frame.cases import * from frame.sql import * +from frame.caseBase import * +from frame import * -class TDTestCase: - # init - def init(self, conn, logSql, replicaVar=1): - self.replicaVar = int(replicaVar) - tdLog.debug(f"start to excute {__file__}") - tdSql.init(conn.cursor(), True) + +class TDTestCase(TBase): + + + def insertData(self): + tdLog.info(f"insert data.") + # taosBenchmark run + json = etool.curFile(__file__, "mlevel_basic.json") + etool.runBenchmark(json=json) + + tdSql.execute(f"use {self.db}") + # set insert data information + self.childtable_count = 4 + self.insert_rows = 1000000 + self.timestamp_step = 1000 + + def doAction(self): + tdLog.info(f"do action.") + self.flushDb() + self.trimDb() + self.compactDb() # run def run(self): - # check two db query result same - tdLog.info(f"hello world.") + tdLog.debug(f"start to excute {__file__}") + + # insert data + self.insertData() + + # check insert data correct + self.checkInsertCorrect() + + # save + self.snapshotAgg() + + # do action + self.doAction() + + # check save agg result correct + self.checkAggCorrect() + + # check insert correct again + self.checkInsertCorrect() - # stop - def stop(self): - tdSql.close() tdLog.success(f"{__file__} successfully executed") + + tdCases.addLinux(__file__, TDTestCase()) -tdCases.addWindows(__file__, TDTestCase()) \ No newline at end of file +tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/army/frame/caseBase.py b/tests/army/frame/caseBase.py new file mode 100644 index 0000000000..441196f050 --- /dev/null +++ b/tests/army/frame/caseBase.py @@ -0,0 +1,106 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import sys +import os +import time +import datetime + +from frame.log import * +from frame.sql import * + +# test case base +class TBase: + +# +# frame call +# + + # init + def init(self, conn, logSql, replicaVar=1): + # save param + self.replicaVar = int(replicaVar) + tdSql.init(conn.cursor(), True) + + # record server information + self.dnodeNum = 0 + self.mnodeNum = 0 + self.mLevel = 0 + self.mLevelDisk = 0 + + # test case information + self.db = "db" + self.stb = "stb" + + # variant in taosBenchmark json + self.childtable_count = 2 + self.insert_rows = 1000000 + self.timestamp_step = 1000 + + # sql + self.sqlSum = f"select sum(ic) from {self.stb}" + self.sqlMax = f"select max(ic) from {self.stb}" + self.sqlMin = f"select min(ic) from {self.stb}" + self.sqlAvg = f"select avg(ic) from {self.stb}" + + + # stop + def stop(self): + tdSql.close() + + +# +# db action +# + + def trimDb(self): + tdSql.execute(f"trim database {self.db}") + + def compactDb(self): + tdSql.execute(f"compact database {self.db}") + + def flushDb(self): + tdSql.execute(f"flush database {self.db}") + +# +# check db correct +# + + # basic + def checkInsertCorrect(self): + # check count + sql = f"select count(*) from {self.stb}" + tdSql.checkAgg(sql, self.childtable_count * self.insert_rows) + + # check child table count + sql = f" select count(*) from (select count(*) as cnt , tbname from {self.stb} group by tbname) where cnt = {self.insert_rows} " + tdSql.checkAgg(sql, self.childtable_count) + + # check step + sql = f"select count(*) from (select diff(ts) as dif from {self.stb} partition by tbname) where dif != {self.timestamp_step}" + tdSql.checkAgg(sql, 0) + + # save agg result + def snapshotAgg(self): + + self.sum = tdSql.getFirstValue(self.sqlSum) + self.avg = tdSql.getFirstValue(self.sqlAvg) + self.min = tdSql.getFirstValue(self.sqlMin) + self.max = tdSql.getFirstValue(self.sqlMax) + + # check agg + def checkAggCorrect(self): + tdSql.checkAgg(self.sqlSum, self.sum) + tdSql.checkAgg(self.sqlAvg, self.avg) + tdSql.checkAgg(self.sqlMin, self.min) + tdSql.checkAgg(self.sqlMax, self.max) diff --git a/tests/army/frame/common.py b/tests/army/frame/common.py index 93059ff078..5cdb3f9f46 100644 --- a/tests/army/frame/common.py +++ b/tests/army/frame/common.py @@ -23,7 +23,7 @@ import taos from frame.log import * from frame.sql import * from frame.cases import * -from frame.dnodes import * +from frame.server.dnodes import * from frame.common import * from frame.constant import * from dataclasses import dataclass,field diff --git a/tests/army/frame/dnodes-default.py b/tests/army/frame/dnodes-default.py deleted file mode 100644 index a1d1698b00..0000000000 --- a/tests/army/frame/dnodes-default.py +++ /dev/null @@ -1,502 +0,0 @@ -################################################################### -# Copyright (c) 2016 by TAOS Technologies, Inc. -# All rights reserved. -# -# This file is proprietary and confidential to TAOS Technologies. -# No part of this file may be reproduced, stored, transmitted, -# disclosed or used in any form or by any means other than as -# expressly provided by the written permission from Jianhui Tao -# -################################################################### - -# -*- coding: utf-8 -*- - -import sys -import os -import os.path -import subprocess -from frame.log import * - - -class TDSimClient: - def __init__(self): - self.testCluster = False - - self.cfgDict = { - "numOfLogLines": "100000000", - "numOfThreadsPerCore": "2.0", - "locale": "en_US.UTF-8", - "charset": "UTF-8", - "asyncLog": "0", - "minTablesPerVnode": "4", - "maxTablesPerVnode": "1000", - "tableIncStepPerVnode": "10000", - "maxVgroupsPerDb": "1000", - "sdbDebugFlag": "143", - "rpcDebugFlag": "135", - "tmrDebugFlag": "131", - "cDebugFlag": "135", - "udebugFlag": "135", - "jnidebugFlag": "135", - "qdebugFlag": "135", - "telemetryReporting": "0", - } - def init(self, path): - self.__init__() - self.path = path - - def getLogDir(self): - self.logDir = os.path.join(self.path,"sim","psim","log") - return self.logDir - - def getCfgDir(self): - self.cfgDir = os.path.join(self.path,"sim","psim","cfg") - return self.cfgDir - - def setTestCluster(self, value): - self.testCluster = value - - def addExtraCfg(self, option, value): - self.cfgDict.update({option: value}) - - def cfg(self, option, value): - cmd = "echo %s %s >> %s" % (option, value, self.cfgPath) - if os.system(cmd) != 0: - tdLog.exit(cmd) - - def deploy(self): - self.logDir = os.path.join(self.path,"sim","psim","log") - self.cfgDir = os.path.join(self.path,"sim","psim","cfg") - self.cfgPath = os.path.join(self.path,"sim","psim","cfg","taos.cfg") - - cmd = "rm -rf " + self.logDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "mkdir -p " + self.logDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "rm -rf " + self.cfgDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "mkdir -p " + self.cfgDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "touch " + self.cfgPath - if os.system(cmd) != 0: - tdLog.exit(cmd) - - if self.testCluster: - self.cfg("masterIp", "192.168.0.1") - self.cfg("secondIp", "192.168.0.2") - self.cfg("logDir", self.logDir) - - for key, value in self.cfgDict.items(): - self.cfg(key, value) - - tdLog.debug("psim is deployed and configured by %s" % (self.cfgPath)) - - -class TDDnode: - def __init__(self, index): - self.index = index - self.running = 0 - self.deployed = 0 - self.testCluster = False - self.valgrind = 0 - - def init(self, path): - self.path = path - - def setTestCluster(self, value): - self.testCluster = value - - def setValgrind(self, value): - self.valgrind = value - - def getDataSize(self): - totalSize = 0 - - if (self.deployed == 1): - for dirpath, dirnames, filenames in os.walk(self.dataDir): - for f in filenames: - fp = os.path.join(dirpath, f) - - if not os.path.islink(fp): - totalSize = totalSize + os.path.getsize(fp) - - return totalSize - - def deploy(self): - self.logDir = os.path.join(self.path,"sim","dnode%d" % self.index, "log") - self.dataDir = os.path.join(self.path,"sim","dnode%d" % self.index, "data") - self.cfgDir = os.path.join(self.path,"sim","dnode%d" % self.index, "cfg") - self.cfgPath = os.path.join(self.path,"sim","dnode%d" % self.index, "cfg","taos.cfg") - - cmd = "rm -rf " + self.dataDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "rm -rf " + self.logDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "rm -rf " + self.cfgDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "mkdir -p " + self.dataDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "mkdir -p " + self.logDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "mkdir -p " + self.cfgDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "touch " + self.cfgPath - if os.system(cmd) != 0: - tdLog.exit(cmd) - - if self.testCluster: - self.startIP() - - if self.testCluster: - self.cfg("masterIp", "192.168.0.1") - self.cfg("secondIp", "192.168.0.2") - self.cfg("publicIp", "192.168.0.%d" % (self.index)) - self.cfg("internalIp", "192.168.0.%d" % (self.index)) - self.cfg("privateIp", "192.168.0.%d" % (self.index)) - self.cfg("dataDir", self.dataDir) - self.cfg("logDir", self.logDir) - self.cfg("numOfLogLines", "100000000") - self.cfg("mnodeEqualVnodeNum", "0") - self.cfg("walLevel", "2") - self.cfg("fsync", "1000") - self.cfg("statusInterval", "1") - self.cfg("numOfMnodes", "3") - self.cfg("numOfThreadsPerCore", "2.0") - self.cfg("monitor", "0") - self.cfg("maxVnodeConnections", "30000") - self.cfg("maxMgmtConnections", "30000") - self.cfg("maxMeterConnections", "30000") - self.cfg("maxShellConns", "30000") - self.cfg("locale", "en_US.UTF-8") - self.cfg("charset", "UTF-8") - self.cfg("asyncLog", "0") - self.cfg("anyIp", "0") - self.cfg("dDebugFlag", "135") - self.cfg("mDebugFlag", "135") - self.cfg("sdbDebugFlag", "135") - self.cfg("rpcDebugFlag", "135") - self.cfg("tmrDebugFlag", "131") - self.cfg("cDebugFlag", "135") - self.cfg("httpDebugFlag", "135") - self.cfg("monitorDebugFlag", "135") - self.cfg("udebugFlag", "135") - self.cfg("jnidebugFlag", "135") - self.cfg("qdebugFlag", "135") - self.deployed = 1 - tdLog.debug( - "dnode:%d is deployed and configured by %s" % - (self.index, self.cfgPath)) - - def getBuildPath(self): - selfPath = os.path.dirname(os.path.realpath(__file__)) - - if ("community" in selfPath): - projPath = selfPath[:selfPath.find("community")] - else: - projPath = selfPath[:selfPath.find("tests")] - - for root, dirs, files in os.walk(projPath): - if ("taosd" in files): - rootRealPath = os.path.dirname(os.path.realpath(root)) - if ("packaging" not in rootRealPath): - buildPath = root[:len(root)-len("/build/bin")] - break - return buildPath - - def start(self): - buildPath = self.getBuildPath() - - if (buildPath == ""): - tdLog.exit("taosd not found!") - else: - tdLog.info("taosd found in %s" % buildPath) - - binPath = buildPath + "/build/bin/taosd" - - if self.deployed == 0: - tdLog.exit("dnode:%d is not deployed" % (self.index)) - - if self.valgrind == 0: - cmd = "nohup %s -c %s > /dev/null 2>&1 & " % ( - binPath, self.cfgDir) - else: - valgrindCmdline = "valgrind --tool=memcheck --leak-check=full --show-reachable=no --track-origins=yes --show-leak-kinds=all -v --workaround-gcc296-bugs=yes" - - cmd = "nohup %s %s -c %s 2>&1 & " % ( - valgrindCmdline, binPath, self.cfgDir) - - print(cmd) - - if os.system(cmd) != 0: - tdLog.exit(cmd) - self.running = 1 - tdLog.debug("dnode:%d is running with %s " % (self.index, cmd)) - - tdLog.debug("wait 5 seconds for the dnode:%d to start." % (self.index)) - time.sleep(5) - - def stop(self): - if self.valgrind == 0: - toBeKilled = "taosd" - else: - toBeKilled = "valgrind.bin" - - if self.running != 0: - psCmd = "ps -ef|grep -w %s| grep -v grep | awk '{print $2}'" % toBeKilled - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8") - - while(processID): - killCmd = "kill -INT %s > /dev/null 2>&1" % processID - os.system(killCmd) - time.sleep(1) - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8") - for port in range(6030, 6041): - fuserCmd = "fuser -k -n tcp %d" % port - os.system(fuserCmd) - if self.valgrind: - time.sleep(2) - - self.running = 0 - tdLog.debug("dnode:%d is stopped by kill -INT" % (self.index)) - - def forcestop(self): - if self.valgrind == 0: - toBeKilled = "taosd" - else: - toBeKilled = "valgrind.bin" - - if self.running != 0: - psCmd = "ps -ef|grep -w %s| grep -v grep | awk '{print $2}'" % toBeKilled - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8") - - while(processID): - killCmd = "kill -KILL %s > /dev/null 2>&1" % processID - os.system(killCmd) - time.sleep(1) - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8") - for port in range(6030, 6041): - fuserCmd = "fuser -k -n tcp %d" % port - os.system(fuserCmd) - if self.valgrind: - time.sleep(2) - - self.running = 0 - tdLog.debug("dnode:%d is stopped by kill -KILL" % (self.index)) - - def startIP(self): - cmd = "sudo ifconfig lo:%d 192.168.0.%d up" % (self.index, self.index) - if os.system(cmd) != 0: - tdLog.exit(cmd) - - def stopIP(self): - cmd = "sudo ifconfig lo:%d 192.168.0.%d down" % ( - self.index, self.index) - if os.system(cmd) != 0: - tdLog.exit(cmd) - - def cfg(self, option, value): - cmd = "echo %s %s >> %s" % (option, value, self.cfgPath) - if os.system(cmd) != 0: - tdLog.exit(cmd) - - def getDnodeRootDir(self, index): - dnodeRootDir = os.path.join(self.path,"sim","psim","dnode%d" % index) - return dnodeRootDir - - def getDnodesRootDir(self): - dnodesRootDir = os.path.join(self.path,"sim","psim") - return dnodesRootDir - - -class TDDnodes: - def __init__(self): - self.dnodes = [] - self.dnodes.append(TDDnode(1)) - self.dnodes.append(TDDnode(2)) - self.dnodes.append(TDDnode(3)) - self.dnodes.append(TDDnode(4)) - self.dnodes.append(TDDnode(5)) - self.dnodes.append(TDDnode(6)) - self.dnodes.append(TDDnode(7)) - self.dnodes.append(TDDnode(8)) - self.dnodes.append(TDDnode(9)) - self.dnodes.append(TDDnode(10)) - self.simDeployed = False - - def init(self, path): - psCmd = "ps -ef|grep -w taosd| grep -v grep | awk '{print $2}'" - processID = subprocess.check_output(psCmd, shell=True).decode("utf-8") - while(processID): - killCmd = "kill -TERM %s > /dev/null 2>&1" % processID - os.system(killCmd) - time.sleep(1) - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8") - - psCmd = "ps -ef|grep -w valgrind.bin| grep -v grep | awk '{print $2}'" - processID = subprocess.check_output(psCmd, shell=True).decode("utf-8") - while(processID): - killCmd = "kill -TERM %s > /dev/null 2>&1" % processID - os.system(killCmd) - time.sleep(1) - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8") - - binPath = os.path.dirname(os.path.realpath(__file__)) - binPath = binPath + "/../../../debug/" - tdLog.debug("binPath %s" % (binPath)) - binPath = os.path.realpath(binPath) - tdLog.debug("binPath real path %s" % (binPath)) - - # cmd = "sudo cp %s/build/lib/libtaos.so /usr/local/lib/taos/" % (binPath) - # tdLog.debug(cmd) - # os.system(cmd) - - # cmd = "sudo cp %s/build/bin/taos /usr/local/bin/taos/" % (binPath) - # if os.system(cmd) != 0 : - # tdLog.exit(cmd) - # tdLog.debug("execute %s" % (cmd)) - - # cmd = "sudo cp %s/build/bin/taosd /usr/local/bin/taos/" % (binPath) - # if os.system(cmd) != 0 : - # tdLog.exit(cmd) - # tdLog.debug("execute %s" % (cmd)) - - if path == "": - # self.path = os.path.expanduser('~') - self.path = os.path.abspath(binPath + "../../") - else: - self.path = os.path.realpath(path) - - for i in range(len(self.dnodes)): - self.dnodes[i].init(self.path) - - self.sim = TDSimClient() - self.sim.init(self.path) - - def setTestCluster(self, value): - self.testCluster = value - - def setValgrind(self, value): - self.valgrind = value - - def deploy(self, index): - self.sim.setTestCluster(self.testCluster) - - if (self.simDeployed == False): - self.sim.deploy() - self.simDeployed = True - - self.check(index) - self.dnodes[index - 1].setTestCluster(self.testCluster) - self.dnodes[index - 1].setValgrind(self.valgrind) - self.dnodes[index - 1].deploy() - - def cfg(self, index, option, value): - self.check(index) - self.dnodes[index - 1].cfg(option, value) - - def start(self, index): - self.check(index) - self.dnodes[index - 1].start() - - def stop(self, index): - self.check(index) - self.dnodes[index - 1].stop() - - def getDataSize(self, index): - self.check(index) - return self.dnodes[index - 1].getDataSize() - - def forcestop(self, index): - self.check(index) - self.dnodes[index - 1].forcestop() - - def startIP(self, index): - self.check(index) - - if self.testCluster: - self.dnodes[index - 1].startIP() - - def stopIP(self, index): - self.check(index) - - if self.dnodes[index - 1].testCluster: - self.dnodes[index - 1].stopIP() - - def check(self, index): - if index < 1 or index > 10: - tdLog.exit("index:%d should on a scale of [1, 10]" % (index)) - - def stopAll(self): - tdLog.info("stop all dnodes") - for i in range(len(self.dnodes)): - self.dnodes[i].stop() - - psCmd = "ps -ef | grep -w taosd | grep 'root' | grep -v grep | awk '{print $2}'" - processID = subprocess.check_output(psCmd, shell=True).decode("utf-8") - if processID: - cmd = "sudo systemctl stop taosd" - os.system(cmd) - # if os.system(cmd) != 0 : - # tdLog.exit(cmd) - psCmd = "ps -ef|grep -w taosd| grep -v grep | awk '{print $2}'" - processID = subprocess.check_output(psCmd, shell=True).decode("utf-8") - while(processID): - killCmd = "kill -TERM %s > /dev/null 2>&1" % processID - os.system(killCmd) - time.sleep(1) - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8") - - psCmd = "ps -ef|grep -w valgrind.bin| grep -v grep | awk '{print $2}'" - processID = subprocess.check_output(psCmd, shell=True).decode("utf-8") - while(processID): - killCmd = "kill -TERM %s > /dev/null 2>&1" % processID - os.system(killCmd) - time.sleep(1) - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8") - - # if os.system(cmd) != 0 : - # tdLog.exit(cmd) - - def getDnodesRootDir(self): - dnodesRootDir = "%s/sim" % (self.path) - return dnodesRootDir - - def getSimCfgPath(self): - return self.sim.getCfgDir() - - def getSimLogPath(self): - return self.sim.getLogDir() - - def addSimExtraCfg(self, option, value): - self.sim.addExtraCfg(option, value) - - -tdDnodes = TDDnodes() diff --git a/tests/army/frame/dnodes-no-random-fail.py b/tests/army/frame/dnodes-no-random-fail.py deleted file mode 100644 index 3b3083396b..0000000000 --- a/tests/army/frame/dnodes-no-random-fail.py +++ /dev/null @@ -1,500 +0,0 @@ -################################################################### -# Copyright (c) 2016 by TAOS Technologies, Inc. -# All rights reserved. -# -# This file is proprietary and confidential to TAOS Technologies. -# No part of this file may be reproduced, stored, transmitted, -# disclosed or used in any form or by any means other than as -# expressly provided by the written permission from Jianhui Tao -# -################################################################### - -# -*- coding: utf-8 -*- - -import sys -import os -import os.path -import subprocess -from frame.log import * - - -class TDSimClient: - def __init__(self): - self.testCluster = False - - self.cfgDict = { - "numOfLogLines": "100000000", - "numOfThreadsPerCore": "2.0", - "locale": "en_US.UTF-8", - "charset": "UTF-8", - "asyncLog": "0", - "anyIp": "0", - "sdbDebugFlag": "135", - "rpcDebugFlag": "135", - "tmrDebugFlag": "131", - "cDebugFlag": "135", - "udebugFlag": "135", - "jnidebugFlag": "135", - "qdebugFlag": "135", - "telemetryReporting": "0", - } - - def init(self, path): - self.__init__() - self.path = path - - def getLogDir(self): - self.logDir = os.path.join(self.path,"sim","psim","log") - return self.logDir - - def getCfgDir(self): - self.cfgDir = os.path.join(self.path,"sim","psim","cfg") - return self.cfgDir - - def setTestCluster(self, value): - self.testCluster = value - - def addExtraCfg(self, option, value): - self.cfgDict.update({option: value}) - - def cfg(self, option, value): - cmd = "echo %s %s >> %s" % (option, value, self.cfgPath) - if os.system(cmd) != 0: - tdLog.exit(cmd) - - def deploy(self): - self.logDir = os.path.join(self.path,"sim","psim","log") - self.cfgDir = os.path.join(self.path,"sim","psim","cfg") - self.cfgPath = os.path.join(self.path,"sim","psim","cfg","taos.cfg") - - cmd = "rm -rf " + self.logDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "mkdir -p " + self.logDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "rm -rf " + self.cfgDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "mkdir -p " + self.cfgDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "touch " + self.cfgPath - if os.system(cmd) != 0: - tdLog.exit(cmd) - - if self.testCluster: - self.cfg("masterIp", "192.168.0.1") - self.cfg("secondIp", "192.168.0.2") - self.cfg("logDir", self.logDir) - - for key, value in self.cfgDict.items(): - self.cfg(key, value) - - tdLog.debug("psim is deployed and configured by %s" % (self.cfgPath)) - - -class TDDnode: - def __init__(self, index): - self.index = index - self.running = 0 - self.deployed = 0 - self.testCluster = False - self.valgrind = 0 - - def init(self, path): - self.path = path - - def setTestCluster(self, value): - self.testCluster = value - - def setValgrind(self, value): - self.valgrind = value - - def getDataSize(self): - totalSize = 0 - - if (self.deployed == 1): - for dirpath, dirnames, filenames in os.walk(self.dataDir): - for f in filenames: - fp = os.path.join(dirpath, f) - - if not os.path.islink(fp): - totalSize = totalSize + os.path.getsize(fp) - - return totalSize - - def deploy(self): - self.logDir = os.path.join(self.path,"sim","dnode%d" % self.index, "log") - self.dataDir = os.path.join(self.path,"sim","dnode%d" % self.index, "data") - self.cfgDir = os.path.join(self.path,"sim","dnode%d" % self.index, "cfg") - self.cfgPath = os.path.join(self.path,"sim","dnode%d" % self.index, "cfg","taos.cfg") - - cmd = "rm -rf " + self.dataDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "rm -rf " + self.logDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "rm -rf " + self.cfgDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "mkdir -p " + self.dataDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "mkdir -p " + self.logDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "mkdir -p " + self.cfgDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "touch " + self.cfgPath - if os.system(cmd) != 0: - tdLog.exit(cmd) - - if self.testCluster: - self.startIP() - - if self.testCluster: - self.cfg("masterIp", "192.168.0.1") - self.cfg("secondIp", "192.168.0.2") - self.cfg("publicIp", "192.168.0.%d" % (self.index)) - self.cfg("internalIp", "192.168.0.%d" % (self.index)) - self.cfg("privateIp", "192.168.0.%d" % (self.index)) - self.cfg("dataDir", self.dataDir) - self.cfg("logDir", self.logDir) - self.cfg("numOfLogLines", "100000000") - self.cfg("mnodeEqualVnodeNum", "0") - self.cfg("walLevel", "2") - self.cfg("fsync", "1000") - self.cfg("statusInterval", "1") - self.cfg("numOfMnodes", "3") - self.cfg("numOfThreadsPerCore", "2.0") - self.cfg("monitor", "0") - self.cfg("maxVnodeConnections", "30000") - self.cfg("maxMgmtConnections", "30000") - self.cfg("maxMeterConnections", "30000") - self.cfg("maxShellConns", "30000") - self.cfg("locale", "en_US.UTF-8") - self.cfg("charset", "UTF-8") - self.cfg("asyncLog", "0") - self.cfg("anyIp", "0") - self.cfg("dDebugFlag", "135") - self.cfg("mDebugFlag", "135") - self.cfg("sdbDebugFlag", "135") - self.cfg("rpcDebugFlag", "135") - self.cfg("tmrDebugFlag", "131") - self.cfg("cDebugFlag", "135") - self.cfg("httpDebugFlag", "135") - self.cfg("monitorDebugFlag", "135") - self.cfg("udebugFlag", "135") - self.cfg("jnidebugFlag", "135") - self.cfg("qdebugFlag", "135") - self.deployed = 1 - tdLog.debug( - "dnode:%d is deployed and configured by %s" % - (self.index, self.cfgPath)) - - def getBuildPath(self): - selfPath = os.path.dirname(os.path.realpath(__file__)) - - if ("community" in selfPath): - projPath = selfPath[:selfPath.find("community")] - else: - projPath = selfPath[:selfPath.find("tests")] - - for root, dirs, files in os.walk(projPath): - if ("taosd" in files): - rootRealPath = os.path.dirname(os.path.realpath(root)) - if ("packaging" not in rootRealPath): - buildPath = root[:len(root)-len("/build/bin")] - break - return buildPath - - def start(self): - buildPath = self.getBuildPath() - - if (buildPath == ""): - tdLog.exit("taosd not found!") - else: - tdLog.info("taosd found in %s" % buildPath) - - binPath = buildPath + "/build/bin/taosd" - - if self.deployed == 0: - tdLog.exit("dnode:%d is not deployed" % (self.index)) - - if self.valgrind == 0: - cmd = "nohup %s -c %s --random-file-fail-factor 0 > /dev/null 2>&1 & " % ( - binPath, self.cfgDir) - else: - valgrindCmdline = "valgrind --tool=memcheck --leak-check=full --show-reachable=no --track-origins=yes --show-leak-kinds=all -v --workaround-gcc296-bugs=yes" - - cmd = "nohup %s %s -c %s 2>&1 & " % ( - valgrindCmdline, binPath, self.cfgDir) - - print(cmd) - - if os.system(cmd) != 0: - tdLog.exit(cmd) - self.running = 1 - tdLog.debug("dnode:%d is running with %s " % (self.index, cmd)) - - tdLog.debug("wait 5 seconds for the dnode:%d to start." % (self.index)) - time.sleep(5) - - def stop(self): - if self.valgrind == 0: - toBeKilled = "taosd" - else: - toBeKilled = "valgrind.bin" - - if self.running != 0: - psCmd = "ps -ef|grep -w %s| grep -v grep | awk '{print $2}'" % toBeKilled - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8") - - while(processID): - killCmd = "kill -INT %s > /dev/null 2>&1" % processID - os.system(killCmd) - time.sleep(1) - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8") - for port in range(6030, 6041): - fuserCmd = "fuser -k -n tcp %d" % port - os.system(fuserCmd) - if self.valgrind: - time.sleep(2) - - self.running = 0 - tdLog.debug("dnode:%d is stopped by kill -INT" % (self.index)) - - def forcestop(self): - if self.valgrind == 0: - toBeKilled = "taosd" - else: - toBeKilled = "valgrind.bin" - - if self.running != 0: - psCmd = "ps -ef|grep -w %s| grep -v grep | awk '{print $2}'" % toBeKilled - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8") - - while(processID): - killCmd = "kill -KILL %s > /dev/null 2>&1" % processID - os.system(killCmd) - time.sleep(1) - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8") - for port in range(6030, 6041): - fuserCmd = "fuser -k -n tcp %d" % port - os.system(fuserCmd) - if self.valgrind: - time.sleep(2) - - self.running = 0 - tdLog.debug("dnode:%d is stopped by kill -KILL" % (self.index)) - - def startIP(self): - cmd = "sudo ifconfig lo:%d 192.168.0.%d up" % (self.index, self.index) - if os.system(cmd) != 0: - tdLog.exit(cmd) - - def stopIP(self): - cmd = "sudo ifconfig lo:%d 192.168.0.%d down" % ( - self.index, self.index) - if os.system(cmd) != 0: - tdLog.exit(cmd) - - def cfg(self, option, value): - cmd = "echo %s %s >> %s" % (option, value, self.cfgPath) - if os.system(cmd) != 0: - tdLog.exit(cmd) - - def getDnodeRootDir(self, index): - dnodeRootDir = os.path.join(self.path,"sim","psim","dnode%d" % index) - return dnodeRootDir - - def getDnodesRootDir(self): - dnodesRootDir = os.path.join(self.path,"sim","psim") - return dnodesRootDir - - -class TDDnodes: - def __init__(self): - self.dnodes = [] - self.dnodes.append(TDDnode(1)) - self.dnodes.append(TDDnode(2)) - self.dnodes.append(TDDnode(3)) - self.dnodes.append(TDDnode(4)) - self.dnodes.append(TDDnode(5)) - self.dnodes.append(TDDnode(6)) - self.dnodes.append(TDDnode(7)) - self.dnodes.append(TDDnode(8)) - self.dnodes.append(TDDnode(9)) - self.dnodes.append(TDDnode(10)) - self.simDeployed = False - - def init(self, path): - psCmd = "ps -ef|grep -w taosd| grep -v grep | awk '{print $2}'" - processID = subprocess.check_output(psCmd, shell=True).decode("utf-8") - while(processID): - killCmd = "kill -TERM %s > /dev/null 2>&1" % processID - os.system(killCmd) - time.sleep(1) - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8") - - psCmd = "ps -ef|grep -w valgrind.bin| grep -v grep | awk '{print $2}'" - processID = subprocess.check_output(psCmd, shell=True).decode("utf-8") - while(processID): - killCmd = "kill -TERM %s > /dev/null 2>&1" % processID - os.system(killCmd) - time.sleep(1) - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8") - - binPath = os.path.dirname(os.path.realpath(__file__)) - binPath = binPath + "/../../../debug/" - tdLog.debug("binPath %s" % (binPath)) - binPath = os.path.realpath(binPath) - tdLog.debug("binPath real path %s" % (binPath)) - - # cmd = "sudo cp %s/build/lib/libtaos.so /usr/local/lib/taos/" % (binPath) - # tdLog.debug(cmd) - # os.system(cmd) - - # cmd = "sudo cp %s/build/bin/taos /usr/local/bin/taos/" % (binPath) - # if os.system(cmd) != 0 : - # tdLog.exit(cmd) - # tdLog.debug("execute %s" % (cmd)) - - # cmd = "sudo cp %s/build/bin/taosd /usr/local/bin/taos/" % (binPath) - # if os.system(cmd) != 0 : - # tdLog.exit(cmd) - # tdLog.debug("execute %s" % (cmd)) - - if path == "": - # self.path = os.path.expanduser('~') - self.path = os.path.abspath(binPath + "../../") - else: - self.path = os.path.realpath(path) - - for i in range(len(self.dnodes)): - self.dnodes[i].init(self.path) - - self.sim = TDSimClient() - self.sim.init(self.path) - - def setTestCluster(self, value): - self.testCluster = value - - def setValgrind(self, value): - self.valgrind = value - - def deploy(self, index): - self.sim.setTestCluster(self.testCluster) - - if (self.simDeployed == False): - self.sim.deploy() - self.simDeployed = True - - self.check(index) - self.dnodes[index - 1].setTestCluster(self.testCluster) - self.dnodes[index - 1].setValgrind(self.valgrind) - self.dnodes[index - 1].deploy() - - def cfg(self, index, option, value): - self.check(index) - self.dnodes[index - 1].cfg(option, value) - - def start(self, index): - self.check(index) - self.dnodes[index - 1].start() - - def stop(self, index): - self.check(index) - self.dnodes[index - 1].stop() - - def getDataSize(self, index): - self.check(index) - return self.dnodes[index - 1].getDataSize() - - def forcestop(self, index): - self.check(index) - self.dnodes[index - 1].forcestop() - - def startIP(self, index): - self.check(index) - - if self.testCluster: - self.dnodes[index - 1].startIP() - - def stopIP(self, index): - self.check(index) - - if self.dnodes[index - 1].testCluster: - self.dnodes[index - 1].stopIP() - - def check(self, index): - if index < 1 or index > 10: - tdLog.exit("index:%d should on a scale of [1, 10]" % (index)) - - def stopAll(self): - tdLog.info("stop all dnodes") - for i in range(len(self.dnodes)): - self.dnodes[i].stop() - - psCmd = "ps -ef | grep -w taosd | grep 'root' | grep -v grep | awk '{print $2}'" - processID = subprocess.check_output(psCmd, shell=True).decode("utf-8") - if processID: - cmd = "sudo systemctl stop taosd" - os.system(cmd) - # if os.system(cmd) != 0 : - # tdLog.exit(cmd) - psCmd = "ps -ef|grep -w taosd| grep -v grep | awk '{print $2}'" - processID = subprocess.check_output(psCmd, shell=True).decode("utf-8") - while(processID): - killCmd = "kill -TERM %s > /dev/null 2>&1" % processID - os.system(killCmd) - time.sleep(1) - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8") - - psCmd = "ps -ef|grep -w valgrind.bin| grep -v grep | awk '{print $2}'" - processID = subprocess.check_output(psCmd, shell=True).decode("utf-8") - while(processID): - killCmd = "kill -TERM %s > /dev/null 2>&1" % processID - os.system(killCmd) - time.sleep(1) - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8") - - # if os.system(cmd) != 0 : - # tdLog.exit(cmd) - - def getDnodesRootDir(self): - dnodesRootDir = "%s/sim" % (self.path) - return dnodesRootDir - - def getSimCfgPath(self): - return self.sim.getCfgDir() - - def getSimLogPath(self): - return self.sim.getLogDir() - - def addSimExtraCfg(self, option, value): - self.sim.addExtraCfg(option, value) - - -tdDnodes = TDDnodes() diff --git a/tests/army/frame/dnodes-random-fail.py b/tests/army/frame/dnodes-random-fail.py deleted file mode 100644 index 794adfd7ed..0000000000 --- a/tests/army/frame/dnodes-random-fail.py +++ /dev/null @@ -1,497 +0,0 @@ -################################################################### -# Copyright (c) 2016 by TAOS Technologies, Inc. -# All rights reserved. -# -# This file is proprietary and confidential to TAOS Technologies. -# No part of this file may be reproduced, stored, transmitted, -# disclosed or used in any form or by any means other than as -# expressly provided by the written permission from Jianhui Tao -# -################################################################### - -# -*- coding: utf-8 -*- - -import sys -import os -import os.path -import subprocess -from frame.log import * - - -class TDSimClient: - def __init__(self): - self.testCluster = False - - self.cfgDict = { - "numOfLogLines": "100000000", - "locale": "en_US.UTF-8", - "charset": "UTF-8", - "asyncLog": "0", - "rpcDebugFlag": "135", - "tmrDebugFlag": "131", - "cDebugFlag": "135", - "udebugFlag": "135", - "jnidebugFlag": "135", - "qdebugFlag": "135", - "telemetryReporting": "0", - } - - def init(self, path): - self.__init__() - self.path = path - - def getLogDir(self): - self.logDir = os.path.join(self.path,"sim","psim","log") - return self.logDir - - def getCfgDir(self): - self.cfgDir = os.path.join(self.path,"sim","psim","cfg") - return self.cfgDir - - def setTestCluster(self, value): - self.testCluster = value - - def addExtraCfg(self, option, value): - self.cfgDict.update({option: value}) - - def cfg(self, option, value): - cmd = "echo %s %s >> %s" % (option, value, self.cfgPath) - if os.system(cmd) != 0: - tdLog.exit(cmd) - - def deploy(self): - self.logDir = os.path.join(self.path,"sim","psim","log") - self.cfgDir = os.path.join(self.path,"sim","psim","cfg") - self.cfgPath = os.path.join(self.path,"sim","psim","cfg","taos.cfg") - - cmd = "rm -rf " + self.logDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "mkdir -p " + self.logDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "rm -rf " + self.cfgDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "mkdir -p " + self.cfgDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "touch " + self.cfgPath - if os.system(cmd) != 0: - tdLog.exit(cmd) - - if self.testCluster: - self.cfg("masterIp", "192.168.0.1") - self.cfg("secondIp", "192.168.0.2") - self.cfg("logDir", self.logDir) - - for key, value in self.cfgDict.items(): - self.cfg(key, value) - - tdLog.debug("psim is deployed and configured by %s" % (self.cfgPath)) - - -class TDDnode: - def __init__(self, index): - self.index = index - self.running = 0 - self.deployed = 0 - self.testCluster = False - self.valgrind = 0 - - def init(self, path): - self.path = path - - def setTestCluster(self, value): - self.testCluster = value - - def setValgrind(self, value): - self.valgrind = value - - def getDataSize(self): - totalSize = 0 - - if (self.deployed == 1): - for dirpath, dirnames, filenames in os.walk(self.dataDir): - for f in filenames: - fp = os.path.join(dirpath, f) - - if not os.path.islink(fp): - totalSize = totalSize + os.path.getsize(fp) - - return totalSize - - def deploy(self): - self.logDir = os.path.join(self.path,"sim","dnode%d" % self.index, "log") - self.dataDir = os.path.join(self.path,"sim","dnode%d" % self.index, "data") - self.cfgDir = os.path.join(self.path,"sim","dnode%d" % self.index, "cfg") - self.cfgPath = os.path.join(self.path,"sim","dnode%d" % self.index, "cfg","taos.cfg") - - cmd = "rm -rf " + self.dataDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "rm -rf " + self.logDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "rm -rf " + self.cfgDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "mkdir -p " + self.dataDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "mkdir -p " + self.logDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "mkdir -p " + self.cfgDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "touch " + self.cfgPath - if os.system(cmd) != 0: - tdLog.exit(cmd) - - if self.testCluster: - self.startIP() - - if self.testCluster: - self.cfg("masterIp", "192.168.0.1") - self.cfg("secondIp", "192.168.0.2") - self.cfg("publicIp", "192.168.0.%d" % (self.index)) - self.cfg("internalIp", "192.168.0.%d" % (self.index)) - self.cfg("privateIp", "192.168.0.%d" % (self.index)) - self.cfg("dataDir", self.dataDir) - self.cfg("logDir", self.logDir) - self.cfg("numOfLogLines", "100000000") - self.cfg("mnodeEqualVnodeNum", "0") - self.cfg("walLevel", "2") - self.cfg("fsync", "1000") - self.cfg("statusInterval", "1") - self.cfg("numOfMnodes", "3") - self.cfg("numOfThreadsPerCore", "2.0") - self.cfg("monitor", "0") - self.cfg("maxVnodeConnections", "30000") - self.cfg("maxMgmtConnections", "30000") - self.cfg("maxMeterConnections", "30000") - self.cfg("maxShellConns", "30000") - self.cfg("locale", "en_US.UTF-8") - self.cfg("charset", "UTF-8") - self.cfg("asyncLog", "0") - self.cfg("anyIp", "0") - self.cfg("dDebugFlag", "135") - self.cfg("mDebugFlag", "135") - self.cfg("sdbDebugFlag", "135") - self.cfg("rpcDebugFlag", "135") - self.cfg("tmrDebugFlag", "131") - self.cfg("cDebugFlag", "135") - self.cfg("httpDebugFlag", "135") - self.cfg("monitorDebugFlag", "135") - self.cfg("udebugFlag", "135") - self.cfg("jnidebugFlag", "135") - self.cfg("qdebugFlag", "135") - self.deployed = 1 - tdLog.debug( - "dnode:%d is deployed and configured by %s" % - (self.index, self.cfgPath)) - - def getBuildPath(self): - selfPath = os.path.dirname(os.path.realpath(__file__)) - - if ("community" in selfPath): - projPath = selfPath[:selfPath.find("community")] - else: - projPath = selfPath[:selfPath.find("tests")] - - for root, dirs, files in os.walk(projPath): - if ("taosd" in files): - rootRealPath = os.path.dirname(os.path.realpath(root)) - if ("packaging" not in rootRealPath): - buildPath = root[:len(root)-len("/build/bin")] - break - return buildPath - - def start(self): - buildPath = self.getBuildPath() - - if (buildPath == ""): - tdLog.exit("taosd not found!") - else: - tdLog.info("taosd found in %s" % buildPath) - - binPath = buildPath + "/build/bin/taosd" - - if self.deployed == 0: - tdLog.exit("dnode:%d is not deployed" % (self.index)) - - if self.valgrind == 0: - cmd = "nohup %s -c %s --alloc-random-fail --random-file-fail-factor 5 > /dev/null 2>&1 & " % ( - binPath, self.cfgDir) - else: - valgrindCmdline = "valgrind --tool=memcheck --leak-check=full --show-reachable=no --track-origins=yes --show-leak-kinds=all -v --workaround-gcc296-bugs=yes" - - cmd = "nohup %s %s -c %s 2>&1 & " % ( - valgrindCmdline, binPath, self.cfgDir) - - print(cmd) - - if os.system(cmd) != 0: - tdLog.exit(cmd) - self.running = 1 - tdLog.debug("dnode:%d is running with %s " % (self.index, cmd)) - - tdLog.debug("wait 5 seconds for the dnode:%d to start." % (self.index)) - time.sleep(5) - - def stop(self): - if self.valgrind == 0: - toBeKilled = "taosd" - else: - toBeKilled = "valgrind.bin" - - if self.running != 0: - psCmd = "ps -ef|grep -w %s| grep -v grep | awk '{print $2}'" % toBeKilled - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8") - - while(processID): - killCmd = "kill -INT %s > /dev/null 2>&1" % processID - os.system(killCmd) - time.sleep(1) - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8") - for port in range(6030, 6041): - fuserCmd = "fuser -k -n tcp %d" % port - os.system(fuserCmd) - if self.valgrind: - time.sleep(2) - - self.running = 0 - tdLog.debug("dnode:%d is stopped by kill -INT" % (self.index)) - - def forcestop(self): - if self.valgrind == 0: - toBeKilled = "taosd" - else: - toBeKilled = "valgrind.bin" - - if self.running != 0: - psCmd = "ps -ef|grep -w %s| grep -v grep | awk '{print $2}'" % toBeKilled - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8") - - while(processID): - killCmd = "kill -KILL %s > /dev/null 2>&1" % processID - os.system(killCmd) - time.sleep(1) - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8") - for port in range(6030, 6041): - fuserCmd = "fuser -k -n tcp %d" % port - os.system(fuserCmd) - if self.valgrind: - time.sleep(2) - - self.running = 0 - tdLog.debug("dnode:%d is stopped by kill -KILL" % (self.index)) - - def startIP(self): - cmd = "sudo ifconfig lo:%d 192.168.0.%d up" % (self.index, self.index) - if os.system(cmd) != 0: - tdLog.exit(cmd) - - def stopIP(self): - cmd = "sudo ifconfig lo:%d 192.168.0.%d down" % ( - self.index, self.index) - if os.system(cmd) != 0: - tdLog.exit(cmd) - - def cfg(self, option, value): - cmd = "echo %s %s >> %s" % (option, value, self.cfgPath) - if os.system(cmd) != 0: - tdLog.exit(cmd) - - def getDnodeRootDir(self, index): - dnodeRootDir = os.path.join(self.path,"sim","psim","dnode%d" % index) - return dnodeRootDir - - def getDnodesRootDir(self): - dnodesRootDir = os.path.join(self.path,"sim","psim") - return dnodesRootDir - - -class TDDnodes: - def __init__(self): - self.dnodes = [] - self.dnodes.append(TDDnode(1)) - self.dnodes.append(TDDnode(2)) - self.dnodes.append(TDDnode(3)) - self.dnodes.append(TDDnode(4)) - self.dnodes.append(TDDnode(5)) - self.dnodes.append(TDDnode(6)) - self.dnodes.append(TDDnode(7)) - self.dnodes.append(TDDnode(8)) - self.dnodes.append(TDDnode(9)) - self.dnodes.append(TDDnode(10)) - self.simDeployed = False - - def init(self, path): - psCmd = "ps -ef|grep -w taosd| grep -v grep | awk '{print $2}'" - processID = subprocess.check_output(psCmd, shell=True).decode("utf-8") - while(processID): - killCmd = "kill -TERM %s > /dev/null 2>&1" % processID - os.system(killCmd) - time.sleep(1) - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8") - - psCmd = "ps -ef|grep -w valgrind.bin| grep -v grep | awk '{print $2}'" - processID = subprocess.check_output(psCmd, shell=True).decode("utf-8") - while(processID): - killCmd = "kill -TERM %s > /dev/null 2>&1" % processID - os.system(killCmd) - time.sleep(1) - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8") - - binPath = os.path.dirname(os.path.realpath(__file__)) - binPath = binPath + "/../../../debug/" - tdLog.debug("binPath %s" % (binPath)) - binPath = os.path.realpath(binPath) - tdLog.debug("binPath real path %s" % (binPath)) - - # cmd = "sudo cp %s/build/lib/libtaos.so /usr/local/lib/taos/" % (binPath) - # tdLog.debug(cmd) - # os.system(cmd) - - # cmd = "sudo cp %s/build/bin/taos /usr/local/bin/taos/" % (binPath) - # if os.system(cmd) != 0 : - # tdLog.exit(cmd) - # tdLog.debug("execute %s" % (cmd)) - - # cmd = "sudo cp %s/build/bin/taosd /usr/local/bin/taos/" % (binPath) - # if os.system(cmd) != 0 : - # tdLog.exit(cmd) - # tdLog.debug("execute %s" % (cmd)) - - if path == "": - # self.path = os.path.expanduser('~') - self.path = os.path.abspath(binPath + "../../") - else: - self.path = os.path.realpath(path) - - for i in range(len(self.dnodes)): - self.dnodes[i].init(self.path) - - self.sim = TDSimClient() - self.sim.init(self.path) - - def setTestCluster(self, value): - self.testCluster = value - - def setValgrind(self, value): - self.valgrind = value - - def deploy(self, index): - self.sim.setTestCluster(self.testCluster) - - if (self.simDeployed == False): - self.sim.deploy() - self.simDeployed = True - - self.check(index) - self.dnodes[index - 1].setTestCluster(self.testCluster) - self.dnodes[index - 1].setValgrind(self.valgrind) - self.dnodes[index - 1].deploy() - - def cfg(self, index, option, value): - self.check(index) - self.dnodes[index - 1].cfg(option, value) - - def start(self, index): - self.check(index) - self.dnodes[index - 1].start() - - def stop(self, index): - self.check(index) - self.dnodes[index - 1].stop() - - def getDataSize(self, index): - self.check(index) - return self.dnodes[index - 1].getDataSize() - - def forcestop(self, index): - self.check(index) - self.dnodes[index - 1].forcestop() - - def startIP(self, index): - self.check(index) - - if self.testCluster: - self.dnodes[index - 1].startIP() - - def stopIP(self, index): - self.check(index) - - if self.dnodes[index - 1].testCluster: - self.dnodes[index - 1].stopIP() - - def check(self, index): - if index < 1 or index > 10: - tdLog.exit("index:%d should on a scale of [1, 10]" % (index)) - - def stopAll(self): - tdLog.info("stop all dnodes") - for i in range(len(self.dnodes)): - self.dnodes[i].stop() - - psCmd = "ps -ef | grep -w taosd | grep 'root' | grep -v grep | awk '{print $2}'" - processID = subprocess.check_output(psCmd, shell=True).decode("utf-8") - if processID: - cmd = "sudo systemctl stop taosd" - os.system(cmd) - # if os.system(cmd) != 0 : - # tdLog.exit(cmd) - psCmd = "ps -ef|grep -w taosd| grep -v grep | awk '{print $2}'" - processID = subprocess.check_output(psCmd, shell=True).decode("utf-8") - while(processID): - killCmd = "kill -TERM %s > /dev/null 2>&1" % processID - os.system(killCmd) - time.sleep(1) - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8") - - psCmd = "ps -ef|grep -w valgrind.bin| grep -v grep | awk '{print $2}'" - processID = subprocess.check_output(psCmd, shell=True).decode("utf-8") - while(processID): - killCmd = "kill -TERM %s > /dev/null 2>&1" % processID - os.system(killCmd) - time.sleep(1) - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8") - - # if os.system(cmd) != 0 : - # tdLog.exit(cmd) - - def getDnodesRootDir(self): - dnodesRootDir = "%s/sim" % (self.path) - return dnodesRootDir - - def getSimCfgPath(self): - return self.sim.getCfgDir() - - def getSimLogPath(self): - return self.sim.getLogDir() - - def addSimExtraCfg(self, option, value): - self.sim.addExtraCfg(option, value) - - -tdDnodes = TDDnodes() diff --git a/tests/army/frame/eos.py b/tests/army/frame/eos.py new file mode 100644 index 0000000000..dcb63ff3aa --- /dev/null +++ b/tests/army/frame/eos.py @@ -0,0 +1,36 @@ +################################################################### +# Copyright (c) 2023 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +# +# about system funciton extension +# + +import sys +import os +import time +import datetime +import platform + +# if windows platform return True +def isWin(): + return platform.system().lower() == 'windows' + +# wait util execute file finished +def exe(file): + return os.system(file) + +# execute file and return immediately +def exeNoWait(file): + print("exe no wait") + + diff --git a/tests/army/frame/epath.py b/tests/army/frame/epath.py new file mode 100644 index 0000000000..edff9c78a4 --- /dev/null +++ b/tests/army/frame/epath.py @@ -0,0 +1,53 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +# +# about path function extension +# + +import os +from frame.log import * + +# build/bin path +binDir = "" + +def binPath(): + global binDir + + if binDir != "": + return binDir + + selfPath = os.path.dirname(os.path.realpath(__file__)) + + if ("community/tests" in selfPath): + projPath = selfPath[:selfPath.find("community/tests")] + else: + projPath = selfPath[:selfPath.find("TDengine/tests")] + + for root, dirs, files in os.walk(projPath): + if ("taosd" in files): + rootRealPath = os.path.dirname(os.path.realpath(root)) + if ("packaging" not in rootRealPath): + buildPath = root[:len(root)-len("/build/bin")] + break + # check + if (buildPath == ""): + tdLog.exit("taosd not found!") + else: + tdLog.info(f"taosd found in {buildPath}") + # return + binDir = buildPath + "/build/bin/" + return binDir + +def binFile(filename): + return binPath() + filename diff --git a/tests/army/frame/etime.py b/tests/army/frame/etime.py new file mode 100644 index 0000000000..2ee223e122 --- /dev/null +++ b/tests/army/frame/etime.py @@ -0,0 +1,23 @@ +################################################################### +# Copyright (c) 2023 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +# +# about tools funciton extension +# + +import sys +import os +import time +import datetime + + diff --git a/tests/army/frame/etool.py b/tests/army/frame/etool.py new file mode 100644 index 0000000000..35c390dc1a --- /dev/null +++ b/tests/army/frame/etool.py @@ -0,0 +1,46 @@ +################################################################### +# Copyright (c) 2023 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +# +# about system funciton extension +# + +import sys +import os +import time +import datetime +import frame.epath +import frame.eos +from frame.log import * + +# run taosBenchmark with command or json file mode +def runBenchmark(command = "", json = "") : + # get taosBenchmark path + bmFile = frame.epath.binFile("taosBenchmark") + if frame.eos.isWin(): + bmFile += ".exe" + + # run + if command != "": + frame.eos.exe(bmFile + " " + command) + if json != "": + cmd = f"{bmFile} -f {json}" + print(cmd) + status = frame.eos.exe(cmd) + if status !=0: + tdLog.exit(f"run failed {cmd} status={status}") + + +# get current directory file name +def curFile(fullPath, filename): + return os.path.dirname(fullPath) + "/" + filename diff --git a/tests/army/frame/pathFinding.py b/tests/army/frame/pathFinding.py deleted file mode 100644 index df03f0ed68..0000000000 --- a/tests/army/frame/pathFinding.py +++ /dev/null @@ -1,83 +0,0 @@ -################################################################### -# Copyright (c) 2016 by TAOS Technologies, Inc. -# All rights reserved. -# -# This file is proprietary and confidential to TAOS Technologies. -# No part of this file may be reproduced, stored, transmitted, -# disclosed or used in any form or by any means other than as -# expressly provided by the written permission from Jianhui Tao -# -################################################################### - -# -*- coding: utf-8 -*- - - -import os -from frame.log import * - - - -class TDFindPath: - """This class is for finding path within TDengine - """ - def __init__(self): - self.file = "" - - - def init(self, file): - """[summary] - - Args: - file (str): the file location you want to start the query. Generally using __file__ - """ - self.file = file - - def getTaosdemoPath(self): - """for finding the path of directory containing taosdemo - - Returns: - str: the path to directory containing taosdemo - """ - selfPath = os.path.dirname(os.path.realpath(self.file)) - - if ("community" in selfPath): - projPath = selfPath[:selfPath.find("community")] - else: - projPath = selfPath[:selfPath.find("tests")] - - for root, dirs, files in os.walk(projPath): - if ("taosd" in files): - rootRealPath = os.path.dirname(os.path.realpath(root)) - if ("packaging" not in rootRealPath): - buildPath = root[:len(root)-len("/build/bin")] - break - if (buildPath == ""): - tdLog.exit("taosd not found!") - else: - tdLog.info(f"taosd found in {buildPath}") - return buildPath + "/build/bin/" - - def getTDenginePath(self): - """for finding the root path of TDengine - - Returns: - str: the root path of TDengine - """ - selfPath = os.path.dirname(os.path.realpath(self.file)) - - if ("community" in selfPath): - projPath = selfPath[:selfPath.find("community")] - else: - projPath = selfPath[:selfPath.find("tests")] - print(projPath) - for root, dirs, files in os.walk(projPath): - if ("sim" in dirs): - print(root) - rootRealPath = os.path.realpath(root) - if (rootRealPath == ""): - tdLog.exit("TDengine not found!") - else: - tdLog.info(f"TDengine found in {rootRealPath}") - return rootRealPath - -tdFindPath = TDFindPath() \ No newline at end of file diff --git a/tests/army/frame/cluster.py b/tests/army/frame/server/cluster.py similarity index 91% rename from tests/army/frame/cluster.py rename to tests/army/frame/server/cluster.py index 4da53840c0..ade8ac39a2 100644 --- a/tests/army/frame/cluster.py +++ b/tests/army/frame/server/cluster.py @@ -8,7 +8,7 @@ import socket from frame.log import * from frame.sql import * from frame.cases import * -from frame.dnodes import * +from frame.server.dnodes import * from frame.common import * class ClusterDnodes(TDDnodes): @@ -35,17 +35,21 @@ class ConfigureyCluster: self.startPort = 6030 self.portStep = 100 self.mnodeNums = 0 + self.level = 0 + self.disk = 0 - def configure_cluster(self ,dnodeNums=5,mnodeNums=0,independentMnode=True,startPort=6030,portStep=100,hostname="%s"%hostname): + def configure_cluster(self ,dnodeNums=5, mnodeNums=0, independentMnode=True, startPort=6030, portStep=100, hostname="%s"%hostname, level=1, disk=1): self.startPort=int(startPort) self.portStep=int(portStep) self.hostname=hostname self.dnodeNums = int(dnodeNums) self.mnodeNums = int(mnodeNums) + self.level = int(level) + self.disk = int(disk) self.dnodes = [] startPort_sec = int(startPort+portStep) for num in range(1, (self.dnodeNums+1)): - dnode = TDDnode(num) + dnode = TDDnode(num, self.level, self.disk) dnode.addExtraCfg("firstEp", f"{hostname}:{self.startPort}") dnode.addExtraCfg("fqdn", f"{hostname}") dnode.addExtraCfg("serverPort", f"{self.startPort + (num-1)*self.portStep}") diff --git a/tests/army/frame/dnodes.py b/tests/army/frame/server/dnodes.py similarity index 96% rename from tests/army/frame/dnodes.py rename to tests/army/frame/server/dnodes.py index 5581f29a57..0d40b665dd 100644 --- a/tests/army/frame/dnodes.py +++ b/tests/army/frame/server/dnodes.py @@ -115,8 +115,11 @@ class TDSimClient: class TDDnode: - def __init__(self, index): + def __init__(self, index=1, level=1, disk=1): self.index = index + self.level = level + self.disk = disk + self.dataDir = [] self.running = 0 self.deployed = 0 self.testCluster = False @@ -209,14 +212,30 @@ class TDDnode: self.remote_conn.run("python3 ./test.py %s -d %s -e %s"%(valgrindStr,remoteCfgDictStr,execCmdStr)) def deploy(self, *updatecfgDict): + # logDir self.logDir = os.path.join(self.path,"sim","dnode%d" % self.index, "log") - self.dataDir = os.path.join(self.path,"sim","dnode%d" % self.index, "data") + # dataDir + simPath = os.path.join(self.path, "sim", "dnode%d" % self.index) + primary = 1 + if self.level == 1 and self.disk == 1: + eDir = os.path.join(simPath, "data") + self.dataDir.append(eDir) + else: + for i in range(self.level): + for j in range(self.disk): + eDir = os.path.join(simPath, f"data{i}{j}") + self.dataDir.append(f"{eDir} {i} {primary}") + if primary == 1: + primary = 0 + + # taos.cfg self.cfgDir = os.path.join(self.path,"sim","dnode%d" % self.index, "cfg") self.cfgPath = os.path.join(self.path,"sim","dnode%d" % self.index, "cfg","taos.cfg") - - cmd = "rm -rf " + self.dataDir - if os.system(cmd) != 0: - tdLog.exit(cmd) + + for eDir in self.dataDir: + cmd = "rm -rf " + eDir + if os.system(cmd) != 0: + tdLog.exit(cmd) cmd = "rm -rf " + self.logDir if os.system(cmd) != 0: @@ -229,7 +248,8 @@ class TDDnode: # cmd = "mkdir -p " + self.dataDir # if os.system(cmd) != 0: # tdLog.exit(cmd) - os.makedirs(self.dataDir) + for eDir in self.dataDir: + os.makedirs(eDir.split(' ')[0]) # cmd = "mkdir -p " + self.logDir # if os.system(cmd) != 0: @@ -275,7 +295,11 @@ class TDDnode: self.addExtraCfg(key, value) if (self.remoteIP == ""): for key, value in self.cfgDict.items(): - self.cfg(key, value) + if type(value) == list: + for v in value: + self.cfg(key, v) + else: + self.cfg(key, value) else: self.remoteExec(self.cfgDict, "tdDnodes.deploy(%d,updateCfgDict)"%self.index) @@ -887,4 +911,10 @@ class TDDnodes: def getAsan(self): return self.asan -tdDnodes = TDDnodes() \ No newline at end of file + def setLevelDisk(self, level, disk): + for i in range(len(self.dnodes)): + self.dnodes[i].level = int(level) + self.dnodes[i].disk = int(disk) + + +tdDnodes = TDDnodes() diff --git a/tests/army/frame/sql.py b/tests/army/frame/sql.py index d7dce2bc3e..2e14f0c2f0 100644 --- a/tests/army/frame/sql.py +++ b/tests/army/frame/sql.py @@ -535,6 +535,16 @@ class TDSql: tdLog.info("%s(%d) failed: sql:%s, elm:%s == expect_elm:%s" % args) raise Exception + # check like select count(*) ... sql + def checkAgg(self, sql, expectCnt): + self.query(sql) + self.checkData(0, 0, expectCnt) + + # get first value + def getFirstValue(self, sql) : + self.query(sql) + return self.getData(0, 0) + def get_times(self, time_str, precision="ms"): caller = inspect.getframeinfo(inspect.stack()[1][0]) if time_str[-1] not in TAOS_TIME_INIT: @@ -602,6 +612,7 @@ class TDSql: if self.cursor.istype(col, "BIGINT UNSIGNED"): return "BIGINT UNSIGNED" + ''' def taosdStatus(self, state): tdLog.sleep(5) pstate = 0 @@ -630,6 +641,7 @@ class TDSql: tdLog.exit("taosd state is %d != expect:%d" %args) pass + def haveFile(self, dir, state): if os.path.exists(dir) and os.path.isdir(dir): if not os.listdir(dir): @@ -644,6 +656,7 @@ class TDSql: tdLog.exit("dir: %s is not empty, expect: empty" %dir) else: tdLog.exit("dir: %s doesn't exist" %dir) + def createDir(self, dir): if os.path.exists(dir): shrmtree(dir) @@ -651,5 +664,6 @@ class TDSql: os.makedirs( dir, 755 ) tdLog.info("dir: %s is created" %dir) pass +''' tdSql = TDSql() diff --git a/tests/army/frame/srvCtl.py b/tests/army/frame/srvCtl.py new file mode 100644 index 0000000000..e7ef08cde9 --- /dev/null +++ b/tests/army/frame/srvCtl.py @@ -0,0 +1,27 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import sys +import os +import time +import datetime + +class srvCtl: + def __init__(self): + # record server information + self.dnodeNum = 0 + self.mnodeNum = 0 + self.mLevel = 0 + self.mLevelDisk = 0 + +sc = srvCtl() \ No newline at end of file diff --git a/tests/army/test.py b/tests/army/test.py index cc114e0d16..a3e28b772d 100644 --- a/tests/army/test.py +++ b/tests/army/test.py @@ -28,9 +28,9 @@ import importlib import toml from frame.log import * -from frame.dnodes import * +from frame.server.dnodes import * +from frame.server.cluster import * from frame.cases import * -from frame.cluster import * from frame.taosadapter import * import taos @@ -111,8 +111,12 @@ if __name__ == "__main__": asan = False independentMnode = False previousCluster = False - opts, args = getopt.gnu_getopt(sys.argv[1:], 'f:p:m:l:scghrd:k:e:N:M:Q:C:RWD:n:i:aP', [ - 'file=', 'path=', 'master', 'logSql', 'stop', 'cluster', 'valgrind', 'help', 'restart', 'updateCfgDict', 'killv', 'execCmd','dnodeNums','mnodeNums','queryPolicy','createDnodeNums','restful','websocket','adaptercfgupdate','replicaVar','independentMnode','previous']) + level = 1 + disk = 1 + + opts, args = getopt.gnu_getopt(sys.argv[1:], 'f:p:m:l:scghrd:k:e:N:M:Q:C:RWU:n:i:aP:L:D:', [ + 'file=', 'path=', 'master', 'logSql', 'stop', 'cluster', 'valgrind', 'help', 'restart', 'updateCfgDict', 'killv', 'execCmd','dnodeNums','mnodeNums', + 'queryPolicy','createDnodeNums','restful','websocket','adaptercfgupdate','replicaVar','independentMnode',"asan",'previous','level','disk']) for key, value in opts: if key in ['-h', '--help']: tdLog.printNoPrefix( @@ -134,11 +138,13 @@ if __name__ == "__main__": tdLog.printNoPrefix('-C create Dnode Numbers in one cluster') tdLog.printNoPrefix('-R restful realization form') tdLog.printNoPrefix('-W websocket connection') - tdLog.printNoPrefix('-D taosadapter update cfg dict ') + tdLog.printNoPrefix('-U taosadapter update cfg dict ') tdLog.printNoPrefix('-n the number of replicas') tdLog.printNoPrefix('-i independentMnode Mnode') tdLog.printNoPrefix('-a address sanitizer mode') tdLog.printNoPrefix('-P run case with [P]revious cluster, do not create new cluster to run case.') + tdLog.printNoPrefix('-L set multiple level number. range 1 ~ 3') + tdLog.printNoPrefix('-D set disk number on each level. range 1 ~ 10') sys.exit(0) @@ -213,7 +219,7 @@ if __name__ == "__main__": if key in ['-a', '--asan']: asan = True - if key in ['-D', '--adaptercfgupdate']: + if key in ['-U', '--adaptercfgupdate']: try: adaptercfgupdate = eval(base64.b64decode(value.encode()).decode()) except: @@ -226,6 +232,12 @@ if __name__ == "__main__": if key in ['-P', '--previous']: previousCluster = True + if key in ['-L', '--level']: + level = value + + if key in ['-D', '--disk']: + disk = value + # # do exeCmd command # @@ -361,6 +373,7 @@ if __name__ == "__main__": tAdapter.stop(force_kill=True) if dnodeNums == 1 : + tdDnodes.setLevelDisk(level, disk) tdDnodes.deploy(1,updateCfgDict) tdDnodes.start(1) tdCases.logSql(logSql) @@ -391,7 +404,7 @@ if __name__ == "__main__": tdLog.exit(f"alter queryPolicy to {queryPolicy} failed") else : tdLog.debug("create an cluster with %s nodes and make %s dnode as independent mnode"%(dnodeNums,mnodeNums)) - dnodeslist = cluster.configure_cluster(dnodeNums=dnodeNums, mnodeNums=mnodeNums, independentMnode=independentMnode) + dnodeslist = cluster.configure_cluster(dnodeNums=dnodeNums, mnodeNums=mnodeNums, independentMnode=independentMnode, level=level, disk=disk) tdDnodes = ClusterDnodes(dnodeslist) tdDnodes.init(deployPath, masterIp) tdDnodes.setTestCluster(testCluster) @@ -498,6 +511,7 @@ if __name__ == "__main__": else: tdLog.info("not need to query") else: + # except windows tdDnodes.setKillValgrind(killValgrind) tdDnodes.init(deployPath, masterIp) tdDnodes.setTestCluster(testCluster) @@ -529,6 +543,7 @@ if __name__ == "__main__": if dnodeNums == 1 : # dnode is one + tdDnodes.setLevelDisk(level, disk) tdDnodes.deploy(1,updateCfgDict) tdDnodes.start(1) tdCases.logSql(logSql) @@ -574,7 +589,8 @@ if __name__ == "__main__": # dnode > 1 cluster tdLog.debug("create an cluster with %s nodes and make %s dnode as independent mnode"%(dnodeNums,mnodeNums)) print(independentMnode,"independentMnode valuse") - dnodeslist = cluster.configure_cluster(dnodeNums=dnodeNums, mnodeNums=mnodeNums, independentMnode=independentMnode) + # create dnode list + dnodeslist = cluster.configure_cluster(dnodeNums=dnodeNums, mnodeNums=mnodeNums, independentMnode=independentMnode, level=level, disk=disk) tdDnodes = ClusterDnodes(dnodeslist) tdDnodes.init(deployPath, masterIp) tdDnodes.setTestCluster(testCluster) diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index bcdd143cfc..19a44767a7 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -6,7 +6,7 @@ ,,y,unit-test,bash test.sh #army-test -,,y,army,./pytest.sh python3 ./test.py -f empty.py +,,y,army,./pytest.sh python3 ./test.py -f enterprise/multi-level/mlevel_basic.py -N 3 -L 3 -D 2 #system test ,,y,system-test,./pytest.sh python3 ./test.py -f 8-stream/stream_basic.py diff --git a/tests/pytest/util/sql.py b/tests/pytest/util/sql.py index 5ed985187a..f8a49983e1 100644 --- a/tests/pytest/util/sql.py +++ b/tests/pytest/util/sql.py @@ -128,7 +128,7 @@ class TDSql: if expectErrInfo == self.error_info: tdLog.info("sql:%s, expected expectErrInfo '%s' occured" % (sql, expectErrInfo)) else: - tdLog.exit("%s(%d) failed: sql:%s, expectErrInfo '%s' occured, but not expected errno '%s'" % (caller.filename, caller.lineno, sql, self.error_info, expectErrInfo)) + tdLog.exit("%s(%d) failed: sql:%s, expectErrInfo '%s' occured, but not expected expectErrInfo '%s'" % (caller.filename, caller.lineno, sql, self.error_info, expectErrInfo)) else: if expectedErrno != None: if expectedErrno in self.errno: @@ -140,7 +140,7 @@ class TDSql: if expectErrInfo in self.error_info: tdLog.info("sql:%s, expected expectErrInfo '%s' occured" % (sql, expectErrInfo)) else: - tdLog.exit("%s(%d) failed: sql:%s, expectErrInfo %s occured, but not expected errno '%s'" % (caller.filename, caller.lineno, sql, self.error_info, expectErrInfo)) + tdLog.exit("%s(%d) failed: sql:%s, expectErrInfo %s occured, but not expected expectErrInfo '%s'" % (caller.filename, caller.lineno, sql, self.error_info, expectErrInfo)) return self.error_info @@ -207,6 +207,7 @@ class TDSql: caller = inspect.getframeinfo(inspect.stack()[1][0]) if i < queryTimes: error_info = repr(e) + print(error_info) self.error_info = ','.join(error_info[error_info.index('(')+1:-1].split(",")[:-1]).replace("'","") self.queryRows = 0 self.queryCols = 0 @@ -217,13 +218,13 @@ class TDSql: if expectErrInfo == self.error_info: tdLog.info("sql:%s, expected expectErrInfo '%s' occured" % (sql, expectErrInfo)) else: - tdLog.exit("%s(%d) failed: sql:%s, expectErrInfo '%s' occured, but not expected errno '%s'" % (caller.filename, caller.lineno, sql, self.error_info, expectErrInfo)) + tdLog.exit("%s(%d) failed: sql:%s, expectErrInfo '%s' occured, but not expected expectErrInfo '%s'" % (caller.filename, caller.lineno, sql, self.error_info, expectErrInfo)) else: if expectErrInfo != None: if expectErrInfo in self.error_info: tdLog.info("sql:%s, expected expectErrInfo '%s' occured" % (sql, expectErrInfo)) else: - tdLog.exit("%s(%d) failed: sql:%s, expectErrInfo %s occured, but not expected errno '%s'" % (caller.filename, caller.lineno, sql, self.error_info, expectErrInfo)) + tdLog.exit("%s(%d) failed: sql:%s, expectErrInfo %s occured, but not expected expectErrInfo '%s'" % (caller.filename, caller.lineno, sql, self.error_info, expectErrInfo)) return self.error_info elif i == queryTimes: diff --git a/tests/script/tsim/stream/session1.sim b/tests/script/tsim/stream/session1.sim index cf42159d84..aae17053b2 100644 --- a/tests/script/tsim/stream/session1.sim +++ b/tests/script/tsim/stream/session1.sim @@ -327,4 +327,40 @@ if $rows != 1 then goto loop17 endi +sql create database test2 vgroups 4; +sql use test2; +sql create stable st(ts timestamp,a int,b int,c int) tags(ta int,tb int,tc int); +sql create table t1 using st tags(1,1,1); +sql create table t2 using st tags(2,2,2); +sql create stream streams4 trigger at_once ignore update 0 ignore expired 0 into streamt4 as select _wstart, count(*) c1, count(a) c2 from st session(ts, 2s) ; + +sql insert into t1 values(1648791255100,1,2,3); +sql insert into t1 values(1648791255300,1,2,3); + +sleep 1000 + +sql insert into t1 values(1648791253000,1,2,3) (1648791254000,1,2,3); + +$loop_count = 0 +loop18: +sleep 1000 +sql select * from streamt4; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 1 then + print =====rows=$rows + goto loop18 +endi + +if $data01 != 4 then + print =====data01=$data01 + goto loop18 +endi + +print =====over + system sh/exec.sh -n dnode1 -s stop -x SIGINT diff --git a/tests/system-test/0-others/test_hot_refresh_configurations.py b/tests/system-test/0-others/test_hot_refresh_configurations.py index cbde8c060e..59222281ae 100644 --- a/tests/system-test/0-others/test_hot_refresh_configurations.py +++ b/tests/system-test/0-others/test_hot_refresh_configurations.py @@ -215,6 +215,12 @@ class TDTestCase: self.svr_check(item["name"], item["alias"], item["except_values"], True) else: raise Exception(f"unknown key: {key}") + # reset log + path = os.sep.join([tdDnodes.getDnodesRootDir(), "dnode1", "log", "taosdlog.*"]) + tdSql.execute("alter all dnodes 'resetlog';") + r = subprocess.Popen("cat {} | grep 'reset log file'".format(path), shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = r.communicate() + assert('reset log file' in stdout.decode()) def stop(self): tdSql.close() diff --git a/tests/system-test/1-insert/ts-4272.py b/tests/system-test/1-insert/ts-4272.py index bb81305eb3..f2bacd0c2b 100644 --- a/tests/system-test/1-insert/ts-4272.py +++ b/tests/system-test/1-insert/ts-4272.py @@ -26,12 +26,10 @@ class TDTestCase: self.file1 = f"{self.testcasePath}/b.csv" self.file2 = f"{self.testcasePath}/c.csv" - #os.system("rm -rf %s/b.csv" %self.testcasePath) tdLog.debug(f"start to excute {__file__}") tdSql.init(conn.cursor(), logSql) def check_count(self, rows, records): - tdSql.execute(f"use {self.db};") tdSql.query(f"select tbname,count(*) from {self.stable0} group by tbname order by tbname;") tdSql.checkRows(rows) for i in range(rows): @@ -39,13 +37,6 @@ class TDTestCase: def reset_tb(self): # create database and tables - # os.system("taos -s 'drop database if exists d1;'") - # os.system("taos -s 'create database d1;use d1;create stable meters (ts timestamp, current float, voltage int, phase float) tags (location binary(64), groupId int);'") - # os.system(f"taos -s 'use d1;create table d2001 using meters(groupId) tags(5);'") - # res = os.system(f"taos -s 'use d1;create table d2002 using meters(groupId) tags(6);'") - # if (0 != res): - # tdLog.exit(f"create tb error") - tdSql.execute(f"drop database if exists {self.db};") tdSql.execute(f"create database {self.db};") tdSql.execute(f"use {self.db};") @@ -54,13 +45,14 @@ class TDTestCase: tdSql.execute(f"create table {self.tb2} {self.tag2};") tdSql.execute(f"create stable {self.stable1} (ts timestamp , q_int int , q_bigint bigint , q_smallint smallint , q_tinyint tinyint , q_float float , q_double double , q_bool bool , q_binary binary(100) , q_nchar nchar(100) , q_ts timestamp , q_int_null int , q_bigint_null bigint , q_smallint_null smallint , q_tinyint_null tinyint, q_float_null float , q_double_null double , q_bool_null bool , q_binary_null binary(20) , q_nchar_null nchar(20) , q_ts_null timestamp) tags(loc nchar(100) , t_int int , t_bigint bigint , t_smallint smallint , t_tinyint tinyint, t_bool bool , t_binary binary(100) , t_nchar nchar(100) ,t_float float , t_double double , t_ts timestamp);") tdSql.execute(f"create stable {self.stable2} (ts timestamp , q_int int , q_bigint bigint , q_smallint smallint , q_tinyint tinyint , q_float float , q_double double , q_bool bool , q_binary binary(100) , q_nchar nchar(100) , q_ts timestamp , q_int_null int , q_bigint_null bigint , q_smallint_null smallint , q_tinyint_null tinyint, q_float_null float , q_double_null double , q_bool_null bool , q_binary_null binary(20) , q_nchar_null nchar(20) , q_ts_null timestamp) tags(loc nchar(100) , t_int int , t_bigint bigint , t_smallint smallint , t_tinyint tinyint, t_bool bool , t_binary binary(100) , t_nchar nchar(100) ,t_float float , t_double double , t_ts timestamp);") + tdSql.execute(f"create table {self.stable1}_1 using {self.stable1}(t_int) tags(1);") + tdSql.execute(f"create table {self.stable2}_1 using {self.stable2}(t_int) tags(2);") def test(self, sql): - sql = f"use {self.db};" + sql - res = os.system(f'taos -s "{sql}"') - # if (0 != res): - # tdLog.exit(f"taos sql error") - + # sql = f"use {self.db};" + sql + # os.system(f'taos -s "{sql}"') + print(f'{sql}\n') + tdSql.execute(sql, 1) def check(self): # same table, auto create + create @@ -95,13 +87,8 @@ class TDTestCase: sql = f"insert into {self.tb1} file '{self.file1}' {self.tb2} file '{self.file2}';" self.test(sql) - # bigNum = 1010000 - # self.check_count(5, [2100, 2100, bigNum, bigNum, bigNum]) + self.check_count(2, [2010000, 1000000]) - result = os.popen("taos -s 'select count(*) from %s.%s'" %(self.db, self.tb1)) - res = result.read() - if (f"OK" in res): - tdLog.info(f"check count success") def make_csv(self, filepath, once, qtime, startts): f = open(filepath, 'w') @@ -118,10 +105,8 @@ class TDTestCase: def test_mix(self): #forbid use both value and file in one insert - result = os.popen(f"insert into {self.tb1} file '{self.file2}' {self.tb2} values('2021-07-13 14:06:34.630', 10.2, 219, 0.32);") - res = result.read() - if (f"error" in res): - tdLog.info(f"forbid success") + self.make_csv(self.file2, 10, 10, self.ts) + tdSql.error(f"insert into {self.tb1} file '{self.file2}' {self.tb2} values('2021-07-13 14:06:34.630', 10.2, 219, 0.32);") def test_bigcsv(self): # prepare csv @@ -144,7 +129,6 @@ class TDTestCase: self.test(sql) print("end insert to table") - #tdSql.execute(f"use d1;") tdSql.query(f"select tbname,count(*) from {self.stable0} group by tbname order by tbname;") tdSql.checkRows(2) tdSql.checkData(0, 1, rowNum1) @@ -160,7 +144,7 @@ class TDTestCase: ts = startts + offset rows = [] for i in range(once): - rows.append([table_name, ts + i, offset + i, 'NULL']) + rows.append([f"\'{table_name}\'", ts + i, offset + i, 'NULL']) writer.writerows(rows) f.close() print(datetime.now(), filepath, " ready!") @@ -171,22 +155,22 @@ class TDTestCase: once = 10000 qtime1 = 101 qtime2 = 100 - # rowNum1 = qtime1 * once - # rowNum2 = qtime2 * once child_1 = f"{self.stable1}_1" child_2 = f"{self.stable2}_1" self.make_stable_csv(self.file1, once, qtime1, self.ts - 86400000, child_1) self.make_stable_csv(self.file2, once, qtime2, self.ts, child_2) print("end stable_csv data prepare") - - # insert create child table of stable + sql = f"insert into {self.db}.{self.stable1}(tbname,ts,q_int,q_binary) file '{self.file1}' {self.db}.{self.stable2}(tbname,ts,q_int,q_binary) file '{self.file2}';" self.test(sql) print("end insert to stable") - #tdSql.execute(f"insert into {self.db}.{child_1}(ts, q_int) values(now, 1);") - tdSql.query(f"select tbname,count(*) from {self.stable1} group by tbname order by tbname;") - tdSql.checkRows(0) + tdSql.query(f"select tbname,count(*) from {self.stable1} group by tbname;") + tdSql.checkRows(1) + tdSql.checkData(0, 1, qtime1 * once) + tdSql.query(f"select tbname,count(*) from {self.stable2} group by tbname;") + tdSql.checkRows(1) + tdSql.checkData(0, 1, qtime2 * once) print("check stable success") def run(self): @@ -194,8 +178,10 @@ class TDTestCase: self.reset_tb() self.test_stable_csv() self.test_bigcsv() - self.test_mix() self.check() + self.test_mix() + os.system(f"rm -rf {self.file1}") + os.system(f"rm -rf {self.file2}") tdSql.close() def stop(self): diff --git a/tests/system-test/2-query/last_cache_scan.py b/tests/system-test/2-query/last_cache_scan.py index 4fb8b70c58..39271318ba 100644 --- a/tests/system-test/2-query/last_cache_scan.py +++ b/tests/system-test/2-query/last_cache_scan.py @@ -28,7 +28,7 @@ class TDTestCase: def init(self, conn, logSql, replicaVar=1): self.replicaVar = int(replicaVar) tdLog.debug(f"start to excute {__file__}") - tdSql.init(conn.cursor(), False) + tdSql.init(conn.cursor(), True) def create_database(self,tsql, dbName,dropFlag=1,vgroups=2,replica=1, duration:str='1d'): if dropFlag == 1: @@ -371,49 +371,48 @@ class TDTestCase: tdSql.checkData(0, 10, None) def test_cache_scan_last_row_with_drop_column2(self): - tdSql.query('select last_row(c1) from meters') + tdSql.query('select last_row(c2) from meters') print(str(tdSql.queryResult)) tdSql.checkCols(1) - p = subprocess.run(["taos", '-s', "alter table test.meters drop column c1; alter table test.meters add column c11 int"]) + p = subprocess.run(["taos", '-s', "alter table test.meters drop column c2; alter table test.meters add column c1 int"]) p.check_returncode() - tdSql.query('select last_row(c1) from meters', queryTimes=1) - print(str(tdSql.queryResult)) - tdSql.checkCols(1) + tdSql.query_success_failed("select ts, last_row(c2), c12, ts, c12 from meters", queryTimes=10, expectErrInfo="Invalid column name: c2") + tdSql.query('select last(c1), c1, ts from meters', queryTimes=1) + tdSql.checkRows(1) + tdSql.checkCols(3) tdSql.checkData(0, 0, None) + tdSql.checkData(0, 1, None) def test_cache_scan_last_row_with_partition_by(self): tdSql.query('select last(c1) from meters partition by t1') print(str(tdSql.queryResult)) tdSql.checkCols(1) - tdSql.checkRows(5) - p = subprocess.run(["taos", '-s', "alter table test.meters drop column c1; alter table test.meters add column c11 int"]) + tdSql.checkRows(2) + p = subprocess.run(["taos", '-s', "alter table test.meters drop column c1; alter table test.meters add column c2 int"]) p.check_returncode() - tdSql.query('select last_row(c1) from meters partition by t1', queryTimes=1) + tdSql.query_success_failed('select last(c1) from meters partition by t1', queryTimes=10, expectErrInfo="Invalid column name: c1") + tdSql.query('select last(c2), c2, ts from meters', queryTimes=1) print(str(tdSql.queryResult)) - tdSql.checkCols(1) - tdSql.checkRows(5) + tdSql.checkRows(1) + tdSql.checkCols(3) tdSql.checkData(0, 0, None) - tdSql.checkData(1, 0, None) - tdSql.checkData(2, 0, None) - tdSql.checkData(3, 0, None) - tdSql.checkData(4, 0, None) + tdSql.checkData(0, 1, None) + def test_cache_scan_last_row_with_partition_by_tbname(self): - tdSql.query('select last(c1) from meters partition by tbname', queryTimes=1) + tdSql.query('select last(c2) from meters partition by tbname') print(str(tdSql.queryResult)) tdSql.checkCols(1) tdSql.checkRows(10) - p = subprocess.run(["taos", '-s', "alter table test.meters drop column c1; alter table test.meters add column c11 int"]) + p = subprocess.run(["taos", '-s', "alter table test.meters drop column c2; alter table test.meters add column c1 int"]) p.check_returncode() - tdSql.query('select last_row(c1) from meters partition by tbname', queryTimes=1) + tdSql.query_success_failed('select last_row(c2) from meters partition by tbname', queryTimes=10, expectErrInfo="Invalid column name: c2") + tdSql.query('select last(c1), c1, ts from meters', queryTimes=1) print(str(tdSql.queryResult)) - tdSql.checkCols(1) - tdSql.checkRows(10) + tdSql.checkRows(1) + tdSql.checkCols(3) tdSql.checkData(0, 0, None) - tdSql.checkData(1, 0, None) - tdSql.checkData(2, 0, None) - tdSql.checkData(3, 0, None) - tdSql.checkData(4, 0, None) + tdSql.checkData(0, 1, None) @@ -425,9 +424,9 @@ class TDTestCase: self.test_cache_scan_with_drop_and_add_column2() #self.test_cache_scan_with_drop_column() #self.test_cache_scan_last_row_with_drop_column() - #self.test_cache_scan_last_row_with_drop_column2() - #self.test_cache_scan_last_row_with_partition_by() - #self.test_cache_scan_last_row_with_partition_by_tbname() + self.test_cache_scan_last_row_with_drop_column2() + self.test_cache_scan_last_row_with_partition_by() + self.test_cache_scan_last_row_with_partition_by_tbname() def stop(self): tdSql.close()