diff --git a/docs/en/12-taos-sql/06-select.md b/docs/en/12-taos-sql/06-select.md index 2c94787440..1fc6fb7e67 100755 --- a/docs/en/12-taos-sql/06-select.md +++ b/docs/en/12-taos-sql/06-select.md @@ -91,13 +91,15 @@ The list of currently supported Hints is as follows: | :-----------: | -------------- | -------------------------- | -----------------------------------| | BATCH_SCAN | None | Batch table scan | JOIN statment for stable | | NO_BATCH_SCAN | None | Sequential table scan | JOIN statment for stable | -| SORT_FOR_GROUP| None | Use sort for partition | With normal column in partition by list | +| SORT_FOR_GROUP| None | Use sort for partition, conflict with PARTITION_FIRST | With normal column in partition by list | +| PARTITION_FIRST| None | Use Partition before aggregate, conflict with SORT_FOR_GROUP | With normal column in partition by list | For example: ```sql SELECT /*+ BATCH_SCAN() */ a.ts FROM stable1 a, stable2 b where a.tag0 = b.tag0 and a.ts = b.ts; SELECT /*+ SORT_FOR_GROUP() */ count(*), c1 FROM stable1 PARTITION BY c1; +SELECT /*+ PARTITION_FIRST() */ count(*), c1 FROM stable1 PARTITION BY c1; ``` ## Lists diff --git a/docs/zh/12-taos-sql/06-select.md b/docs/zh/12-taos-sql/06-select.md index 3c6e4f3bcf..2a7dff6f7d 100755 --- a/docs/zh/12-taos-sql/06-select.md +++ b/docs/zh/12-taos-sql/06-select.md @@ -91,13 +91,15 @@ Hints 是用户控制单个语句查询优化的一种手段,当 Hint 不适 | :-----------: | -------------- | -------------------------- | -----------------------------| | BATCH_SCAN | 无 | 采用批量读表的方式 | 超级表 JOIN 语句 | | NO_BATCH_SCAN | 无 | 采用顺序读表的方式 | 超级表 JOIN 语句 | -| SORT_FOR_GROUP| 无 | 采用sort方式进行分组 | partition by 列表有普通列时 | +| SORT_FOR_GROUP| 无 | 采用sort方式进行分组, 与PARTITION_FIRST冲突 | partition by 列表有普通列时 | +| PARTITION_FIRST| 无 | 在聚合之前使用PARTITION计算分组, 与SORT_FOR_GROUP冲突 | partition by 列表有普通列时 | 举例: ```sql SELECT /*+ BATCH_SCAN() */ a.ts FROM stable1 a, stable2 b where a.tag0 = b.tag0 and a.ts = b.ts; SELECT /*+ SORT_FOR_GROUP() */ count(*), c1 FROM stable1 PARTITION BY c1; +SELECT /*+ PARTITION_FIRST() */ count(*), c1 FROM stable1 PARTITION BY c1; ``` ## 列表 diff --git a/include/common/tglobal.h b/include/common/tglobal.h index f006779a48..f23bb4d51b 100644 --- a/include/common/tglobal.h +++ b/include/common/tglobal.h @@ -74,6 +74,7 @@ extern int64_t tsRpcQueueMemoryAllowed; extern int32_t tsElectInterval; extern int32_t tsHeartbeatInterval; extern int32_t tsHeartbeatTimeout; +extern int32_t tsSnapReplMaxWaitN; // vnode extern int64_t tsVndCommitMaxIntervalMs; diff --git a/include/common/tmsgcb.h b/include/common/tmsgcb.h index 311bffb7da..dd1e54318e 100644 --- a/include/common/tmsgcb.h +++ b/include/common/tmsgcb.h @@ -53,9 +53,12 @@ typedef struct { void* mgmt; void* clientRpc; void* serverRpc; + void* statusRpc; + void* syncRpc; PutToQueueFp putToQueueFp; GetQueueSizeFp qsizeFp; SendReqFp sendReqFp; + SendReqFp sendSyncReqFp; SendRspFp sendRspFp; RegisterBrokenLinkArgFp registerBrokenLinkArgFp; ReleaseHandleFp releaseHandleFp; @@ -67,6 +70,7 @@ void tmsgSetDefault(const SMsgCb* msgcb); int32_t tmsgPutToQueue(const SMsgCb* msgcb, EQueueType qtype, SRpcMsg* pMsg); int32_t tmsgGetQueueSize(const SMsgCb* msgcb, int32_t vgId, EQueueType qtype); int32_t tmsgSendReq(const SEpSet* epSet, SRpcMsg* pMsg); +int32_t tmsgSendSyncReq(const SEpSet* epSet, SRpcMsg* pMsg); void tmsgSendRsp(SRpcMsg* pMsg); void tmsgRegisterBrokenLinkArg(SRpcMsg* pMsg); void tmsgReleaseHandle(SRpcHandleInfo* pHandle, int8_t type); diff --git a/include/common/ttime.h b/include/common/ttime.h index ed4d1a9290..4db25d2f71 100644 --- a/include/common/ttime.h +++ b/include/common/ttime.h @@ -75,6 +75,7 @@ static FORCE_INLINE int64_t taosGetTimestampToday(int32_t precision) { int64_t taosTimeAdd(int64_t t, int64_t duration, char unit, int32_t precision); int64_t taosTimeTruncate(int64_t ts, const SInterval* pInterval); +int64_t taosTimeGetIntervalEnd(int64_t ts, const SInterval* pInterval); int32_t taosTimeCountIntervalForFill(int64_t skey, int64_t ekey, int64_t interval, char unit, int32_t precision, int32_t order); int32_t parseAbsoluteDuration(const char* token, int32_t tokenlen, int64_t* ts, char* unit, int32_t timePrecision); diff --git a/include/common/ttokendef.h b/include/common/ttokendef.h index bd711002ae..2ae9a5c050 100644 --- a/include/common/ttokendef.h +++ b/include/common/ttokendef.h @@ -373,6 +373,7 @@ #define TK_BATCH_SCAN 606 #define TK_NO_BATCH_SCAN 607 #define TK_SORT_FOR_GROUP 608 +#define TK_PARTITION_FIRST 609 #define TK_NK_NIL 65535 diff --git a/include/dnode/vnode/tqCommon.h b/include/dnode/vnode/tqCommon.h index 82c4102369..39e0c07406 100644 --- a/include/dnode/vnode/tqCommon.h +++ b/include/dnode/vnode/tqCommon.h @@ -18,6 +18,7 @@ // message process int32_t tqStreamTaskStartAsync(SStreamMeta* pMeta, SMsgCb* cb, bool restart); +int32_t tqStreamOneTaskStartAsync(SStreamMeta* pMeta, SMsgCb* cb, int64_t streamId, int32_t taskId); int32_t tqStreamTaskProcessUpdateReq(SStreamMeta* pMeta, SMsgCb* cb, SRpcMsg* pMsg, bool restored); int32_t tqStreamTaskProcessDispatchReq(SStreamMeta* pMeta, SRpcMsg* pMsg); int32_t tqStreamTaskProcessDispatchRsp(SStreamMeta* pMeta, SRpcMsg* pMsg); @@ -27,10 +28,11 @@ int32_t tqStreamTaskProcessScanHistoryFinishRsp(SStreamMeta* pMeta, SRpcMsg* pMs int32_t tqStreamTaskProcessCheckReq(SStreamMeta* pMeta, SRpcMsg* pMsg); int32_t tqStreamTaskProcessCheckRsp(SStreamMeta* pMeta, SRpcMsg* pMsg, bool isLeader); int32_t tqStreamTaskProcessCheckpointReadyMsg(SStreamMeta* pMeta, SRpcMsg* pMsg); -int32_t tqStreamTaskProcessDeployReq(SStreamMeta* pMeta, int64_t sversion, char* msg, int32_t msgLen, bool isLeader, bool restored); +int32_t tqStreamTaskProcessDeployReq(SStreamMeta* pMeta, SMsgCb* cb, int64_t sversion, char* msg, int32_t msgLen, + bool isLeader, bool restored); int32_t tqStreamTaskProcessDropReq(SStreamMeta* pMeta, char* msg, int32_t msgLen); int32_t tqStreamTaskProcessRunReq(SStreamMeta* pMeta, SRpcMsg* pMsg, bool isLeader); -int32_t tqStreamTaskResetStatus(SStreamMeta* pMeta); +int32_t tqStreamTaskResetStatus(SStreamMeta* pMeta, int32_t* numOfTasks); int32_t tqStartTaskCompleteCallback(SStreamMeta* pMeta); int32_t tqStreamTaskProcessTaskResetReq(SStreamMeta* pMeta, SRpcMsg* pMsg); int32_t tqStreamTaskProcessTaskPauseReq(SStreamMeta* pMeta, char* pMsg); diff --git a/include/libs/executor/executor.h b/include/libs/executor/executor.h index f78b7a3126..be11d04ff8 100644 --- a/include/libs/executor/executor.h +++ b/include/libs/executor/executor.h @@ -210,6 +210,7 @@ void* qExtractReaderFromStreamScanner(void* scanner); int32_t qExtractStreamScanner(qTaskInfo_t tinfo, void** scanner); int32_t qSetStreamOperatorOptionForScanHistory(qTaskInfo_t tinfo); +int32_t qResetStreamOperatorOptionForScanHistory(qTaskInfo_t tinfo); int32_t qStreamSourceScanParamForHistoryScanStep1(qTaskInfo_t tinfo, SVersionRange *pVerRange, STimeWindow* pWindow); int32_t qStreamSourceScanParamForHistoryScanStep2(qTaskInfo_t tinfo, SVersionRange *pVerRange, STimeWindow* pWindow); int32_t qStreamRecoverFinish(qTaskInfo_t tinfo); diff --git a/include/libs/function/functionMgt.h b/include/libs/function/functionMgt.h index 865f1b2295..a0b5d938e3 100644 --- a/include/libs/function/functionMgt.h +++ b/include/libs/function/functionMgt.h @@ -259,9 +259,13 @@ EFuncDataRequired fmFuncDynDataRequired(int32_t funcId, void* pRes, STimeWindow* int32_t fmGetFuncExecFuncs(int32_t funcId, SFuncExecFuncs* pFpSet); int32_t fmGetScalarFuncExecFuncs(int32_t funcId, SScalarFuncExecFuncs* pFpSet); int32_t fmGetUdafExecFuncs(int32_t funcId, SFuncExecFuncs* pFpSet); + +#ifdef BUILD_NO_CALL int32_t fmSetInvertFunc(int32_t funcId, SFuncExecFuncs* pFpSet); int32_t fmSetNormalFunc(int32_t funcId, SFuncExecFuncs* pFpSet); bool fmIsInvertible(int32_t funcId); +#endif + char* fmGetFuncName(int32_t funcId); #ifdef __cplusplus diff --git a/include/libs/nodes/querynodes.h b/include/libs/nodes/querynodes.h index fdf598153f..331679a3d3 100644 --- a/include/libs/nodes/querynodes.h +++ b/include/libs/nodes/querynodes.h @@ -126,6 +126,7 @@ typedef enum EHintOption { HINT_NO_BATCH_SCAN = 1, HINT_BATCH_SCAN, HINT_SORT_FOR_GROUP, + HINT_PARTITION_FIRST, } EHintOption; typedef struct SHintNode { diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index a6dac7f5ba..a115783b70 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -50,10 +50,13 @@ extern "C" { (_t)->hTaskInfo.id.streamId = 0; \ } while (0) -#define STREAM_EXEC_EXTRACT_DATA_IN_WAL_ID (-1) -#define STREAM_EXEC_START_ALL_TASKS_ID (-2) -#define STREAM_EXEC_RESTART_ALL_TASKS_ID (-3) -#define STREAM_EXEC_STOP_ALL_TASKS_ID (-4) +#define STREAM_EXEC_T_EXTRACT_WAL_DATA (-1) +#define STREAM_EXEC_T_START_ALL_TASKS (-2) +#define STREAM_EXEC_T_START_ONE_TASK (-3) +#define STREAM_EXEC_T_RESTART_ALL_TASKS (-4) +#define STREAM_EXEC_T_STOP_ALL_TASKS (-5) +#define STREAM_EXEC_T_RESUME_TASK (-6) +#define STREAM_EXEC_T_UPDATE_TASK_EPSET (-7) typedef struct SStreamTask SStreamTask; typedef struct SStreamQueue SStreamQueue; @@ -81,14 +84,12 @@ typedef enum ETaskStatus { TASK_STATUS__HALT, // pause, but not be manipulated by user command TASK_STATUS__PAUSE, // pause TASK_STATUS__CK, // stream task is in checkpoint status, no data are allowed to put into inputQ anymore -// TASK_STATUS__STREAM_SCAN_HISTORY, } ETaskStatus; enum { TASK_SCHED_STATUS__INACTIVE = 1, TASK_SCHED_STATUS__WAITING, TASK_SCHED_STATUS__ACTIVE, - TASK_SCHED_STATUS__FAILED, TASK_SCHED_STATUS__DROPPING, }; @@ -322,10 +323,11 @@ typedef struct SStreamStatus { int8_t taskStatus; int8_t downstreamReady; // downstream tasks are all ready now, if this flag is set int8_t schedStatus; - int8_t keepTaskStatus; - bool appendTranstateBlock; // has append the transfer state data block already, todo: remove it + int32_t schedIdleTime; // idle time before invoke again + int64_t lastExecTs; // last exec time stamp + int8_t statusBackup; + bool appendTranstateBlock; // has append the transfer state data block already int32_t timerActive; // timer is active - int8_t allowedAddInTimer; // allowed to add into timer int32_t inScanHistorySentinel; } SStreamStatus; @@ -366,7 +368,8 @@ typedef struct STaskQueue { typedef struct STaskSchedInfo { int8_t status; - void* pTimer; + tmr_h pDelayTimer; + tmr_h pIdleTimer; } STaskSchedInfo; typedef struct SSinkRecorder { @@ -481,6 +484,11 @@ typedef struct STaskUpdateInfo { int32_t transId; } STaskUpdateInfo; +typedef struct SScanWalInfo { + int32_t scanCounter; + tmr_h scanTimer; +} SScanWalInfo; + // meta typedef struct SStreamMeta { char* path; @@ -498,7 +506,7 @@ typedef struct SStreamMeta { bool sendMsgBeforeClosing; // send hb to mnode before close all tasks when switch to follower. STaskStartInfo startInfo; TdThreadRwlock lock; - int32_t walScanCounter; + SScanWalInfo scanInfo; void* streamBackend; int64_t streamBackendRid; SHashObj* pTaskDbUnique; @@ -541,6 +549,7 @@ typedef struct { SMsgHead head; int64_t streamId; int32_t taskId; + int32_t reqType; } SStreamTaskRunReq; struct SStreamDispatchReq { @@ -553,6 +562,7 @@ struct SStreamDispatchReq { int32_t upstreamTaskId; int32_t upstreamChildId; int32_t upstreamNodeId; + int32_t upstreamRelTaskId; int32_t blockNum; int64_t totalLen; SArray* dataLen; // SArray @@ -676,18 +686,18 @@ typedef struct STaskStatusEntry { int32_t statusLastDuration; // to record the last duration of current status int64_t stage; int32_t nodeId; - int64_t verStart; // start version in WAL, only valid for source task - int64_t verEnd; // end version in WAL, only valid for source task - int64_t processedVer; // only valid for source task + int64_t verStart; // start version in WAL, only valid for source task + int64_t verEnd; // end version in WAL, only valid for source task + int64_t processedVer; // only valid for source task int64_t activeCheckpointId; // current active checkpoint id - int32_t chkpointTransId; // checkpoint trans id - bool checkpointFailed; // denote if the checkpoint is failed or not - bool inputQChanging; // inputQ is changing or not + int32_t chkpointTransId; // checkpoint trans id + bool checkpointFailed; // denote if the checkpoint is failed or not + bool inputQChanging; // inputQ is changing or not int64_t inputQUnchangeCounter; - double inputQUsed; // in MiB + double inputQUsed; // in MiB double inputRate; - double sinkQuota; // existed quota size for sink task - double sinkDataSize; // sink to dst data size + double sinkQuota; // existed quota size for sink task + double sinkDataSize; // sink to dst data size } STaskStatusEntry; typedef struct SStreamHbMsg { @@ -728,6 +738,11 @@ typedef struct SStreamTaskNodeUpdateMsg { int32_t tEncodeStreamTaskUpdateMsg(SEncoder* pEncoder, const SStreamTaskNodeUpdateMsg* pMsg); int32_t tDecodeStreamTaskUpdateMsg(SDecoder* pDecoder, SStreamTaskNodeUpdateMsg* pMsg); +typedef struct SStreamTaskState { + ETaskStatus state; + char* name; +} SStreamTaskState; + typedef struct { int64_t streamId; int32_t downstreamTaskId; @@ -764,17 +779,18 @@ SStreamChildEpInfo* streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t void streamTaskInputFail(SStreamTask* pTask); int32_t streamExecTask(SStreamTask* pTask); +int32_t streamResumeTask(SStreamTask* pTask); int32_t streamSchedExec(SStreamTask* pTask); bool streamTaskShouldStop(const SStreamTask* pStatus); bool streamTaskShouldPause(const SStreamTask* pStatus); bool streamTaskIsIdle(const SStreamTask* pTask); bool streamTaskReadyToRun(const SStreamTask* pTask, char** pStatus); -char* createStreamTaskIdStr(int64_t streamId, int32_t taskId); -ETaskStatus streamTaskGetStatus(const SStreamTask* pTask, char** pStr); -const char* streamTaskGetStatusStr(ETaskStatus status); -void streamTaskResetStatus(SStreamTask* pTask); -void streamTaskSetStatusReady(SStreamTask* pTask); +char* createStreamTaskIdStr(int64_t streamId, int32_t taskId); +SStreamTaskState* streamTaskGetStatus(const SStreamTask* pTask); +const char* streamTaskGetStatusStr(ETaskStatus status); +void streamTaskResetStatus(SStreamTask* pTask); +void streamTaskSetStatusReady(SStreamTask* pTask); void initRpcMsg(SRpcMsg* pMsg, int32_t msgType, void* pCont, int32_t contLen); @@ -810,6 +826,7 @@ int32_t streamQueueGetNumOfItems(const SStreamQueue* pQueue); // common int32_t streamRestoreParam(SStreamTask* pTask); +int32_t streamResetParamForScanHistory(SStreamTask* pTask); void streamTaskPause(SStreamTask* pTask, SStreamMeta* pMeta); void streamTaskResume(SStreamTask* pTask); int32_t streamTaskSetUpstreamInfo(SStreamTask* pTask, const SStreamTask* pUpstreamTask); @@ -867,6 +884,7 @@ void streamMetaUpdateStageRole(SStreamMeta* pMeta, int64_t stage, bool i int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta); int32_t streamMetaStartAllTasks(SStreamMeta* pMeta); int32_t streamMetaStopAllTasks(SStreamMeta* pMeta); +int32_t streamMetaStartOneTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId); // checkpoint int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSourceReq* pReq); diff --git a/include/util/tdef.h b/include/util/tdef.h index 026e531928..b51eb75607 100644 --- a/include/util/tdef.h +++ b/include/util/tdef.h @@ -109,12 +109,12 @@ extern const int32_t TYPE_BYTES[21]; #define TSDB_INS_USER_STABLES_DBNAME_COLID 2 static const int64_t TICK_PER_SECOND[] = { - 1000LL, // MILLISECOND - 1000000LL, // MICROSECOND - 1000000000LL, // NANOSECOND - 0LL, // HOUR - 0LL, // MINUTE - 1LL // SECOND + 1000LL, // MILLISECOND + 1000000LL, // MICROSECOND + 1000000000LL, // NANOSECOND + 0LL, // HOUR + 0LL, // MINUTE + 1LL // SECOND }; #define TSDB_TICK_PER_SECOND(precision) \ @@ -239,8 +239,8 @@ typedef enum ELogicConditionType { #define TSDB_MAX_SQL_SHOW_LEN 1024 #define TSDB_MAX_ALLOWED_SQL_LEN (1 * 1024 * 1024u) // sql length should be less than 1mb -#define TSDB_VIEW_NAME_LEN 193 -#define TSDB_VIEW_FNAME_LEN (TSDB_DB_FNAME_LEN + TSDB_VIEW_NAME_LEN + TSDB_NAME_DELIMITER_LEN) +#define TSDB_VIEW_NAME_LEN 193 +#define TSDB_VIEW_FNAME_LEN (TSDB_DB_FNAME_LEN + TSDB_VIEW_NAME_LEN + TSDB_NAME_DELIMITER_LEN) #define TSDB_APP_NAME_LEN TSDB_UNI_LEN #define TSDB_TB_COMMENT_LEN 1025 @@ -260,7 +260,7 @@ typedef enum ELogicConditionType { #define TSDB_PASSWORD_LEN 32 #define TSDB_USET_PASSWORD_LEN 129 #define TSDB_VERSION_LEN 32 -#define TSDB_LABEL_LEN 12 +#define TSDB_LABEL_LEN 16 #define TSDB_JOB_STATUS_LEN 32 #define TSDB_CLUSTER_ID_LEN 40 @@ -290,7 +290,7 @@ typedef enum ELogicConditionType { #define TSDB_CONN_ACTIVE_KEY_LEN 255 #define TSDB_UNIQ_ACTIVE_KEY_LEN 249 -#define TSDB_DEFAULT_PKT_SIZE 65480 // same as RPC_MAX_UDP_SIZE +#define TSDB_DEFAULT_PKT_SIZE 65480 // same as RPC_MAX_UDP_SIZE #define TSDB_SNAP_DATA_PAYLOAD_SIZE (1 * 1024 * 1024) #define TSDB_PAYLOAD_SIZE TSDB_DEFAULT_PKT_SIZE @@ -399,13 +399,13 @@ typedef enum ELogicConditionType { #define TSDB_MAX_STT_TRIGGER 1 #define TSDB_DEFAULT_SST_TRIGGER 1 #endif -#define TSDB_STT_TRIGGER_ARRAY_SIZE 16 // maximum of TSDB_MAX_STT_TRIGGER of TD_ENTERPRISE and TD_COMMUNITY -#define TSDB_MIN_HASH_PREFIX (2 - TSDB_TABLE_NAME_LEN) -#define TSDB_MAX_HASH_PREFIX (TSDB_TABLE_NAME_LEN - 2) -#define TSDB_DEFAULT_HASH_PREFIX 0 -#define TSDB_MIN_HASH_SUFFIX (2 - TSDB_TABLE_NAME_LEN) -#define TSDB_MAX_HASH_SUFFIX (TSDB_TABLE_NAME_LEN - 2) -#define TSDB_DEFAULT_HASH_SUFFIX 0 +#define TSDB_STT_TRIGGER_ARRAY_SIZE 16 // maximum of TSDB_MAX_STT_TRIGGER of TD_ENTERPRISE and TD_COMMUNITY +#define TSDB_MIN_HASH_PREFIX (2 - TSDB_TABLE_NAME_LEN) +#define TSDB_MAX_HASH_PREFIX (TSDB_TABLE_NAME_LEN - 2) +#define TSDB_DEFAULT_HASH_PREFIX 0 +#define TSDB_MIN_HASH_SUFFIX (2 - TSDB_TABLE_NAME_LEN) +#define TSDB_MAX_HASH_SUFFIX (TSDB_TABLE_NAME_LEN - 2) +#define TSDB_DEFAULT_HASH_SUFFIX 0 #define TSDB_DB_MIN_WAL_RETENTION_PERIOD -1 #define TSDB_REP_DEF_DB_WAL_RET_PERIOD 3600 diff --git a/include/util/tworker.h b/include/util/tworker.h index 8508adf052..f39540d24b 100644 --- a/include/util/tworker.h +++ b/include/util/tworker.h @@ -23,7 +23,6 @@ extern "C" { #endif -typedef struct SQWorkerPool SQWorkerPool; typedef struct SWWorkerPool SWWorkerPool; typedef struct SQueueWorker { @@ -60,14 +59,14 @@ typedef struct SWWorker { SWWorkerPool *pool; } SWWorker; -typedef struct SWWorkerPool { +struct SWWorkerPool { int32_t max; // max number of workers int32_t num; int32_t nextId; // from 0 to max-1, cyclic const char *name; SWWorker *workers; TdThreadMutex mutex; -} SWWorkerPool; +}; int32_t tQWorkerInit(SQWorkerPool *pool); void tQWorkerCleanup(SQWorkerPool *pool); diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 15c8903978..28e269ee10 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -1968,7 +1968,7 @@ TAOS_RES* tmq_consumer_poll(tmq_t* tmq, int64_t timeout) { void* rspObj = NULL; int64_t startTime = taosGetTimestampMs(); - tscInfo("consumer:0x%" PRIx64 " start to poll at %" PRId64 ", timeout:%" PRId64, tmq->consumerId, startTime, + tscDebug("consumer:0x%" PRIx64 " start to poll at %" PRId64 ", timeout:%" PRId64, tmq->consumerId, startTime, timeout); // in no topic status, delayed task also need to be processed @@ -2015,7 +2015,7 @@ TAOS_RES* tmq_consumer_poll(tmq_t* tmq, int64_t timeout) { int64_t currentTime = taosGetTimestampMs(); int64_t elapsedTime = currentTime - startTime; if (elapsedTime > timeout) { - tscInfo("consumer:0x%" PRIx64 " (epoch %d) timeout, no rsp, start time %" PRId64 ", current time %" PRId64, + tscDebug("consumer:0x%" PRIx64 " (epoch %d) timeout, no rsp, start time %" PRId64 ", current time %" PRId64, tmq->consumerId, tmq->epoch, startTime, currentTime); return NULL; } diff --git a/source/client/test/clientTests.cpp b/source/client/test/clientTests.cpp index e78783cf3c..e6519a436e 100644 --- a/source/client/test/clientTests.cpp +++ b/source/client/test/clientTests.cpp @@ -939,8 +939,8 @@ TEST(clientCase, agg_query_tables) { } taos_free_result(pRes); - int64_t st = 1685959293000; - for (int32_t i = 0; i < 10000000; ++i) { + int64_t st = 1685959293299; + for (int32_t i = 0; i < 5; ++i) { char s[256] = {0}; while (1) { @@ -954,16 +954,16 @@ TEST(clientCase, agg_query_tables) { } } - while (1) { - sprintf(s, "insert into t2 values(%ld, %d)", st + i, i); - pRes = taos_query(pConn, s); - int32_t ret = taos_errno(pRes); - - taos_free_result(pRes); - if (ret == 0) { - break; - } - } +// while (1) { +// sprintf(s, "insert into t2 values(%ld, %d)", st + i, i); +// pRes = taos_query(pConn, s); +// int32_t ret = taos_errno(pRes); +// +// taos_free_result(pRes); +// if (ret == 0) { +// break; +// } +// } } // pRes = taos_query(pConn, "show table distributed tup"); diff --git a/source/common/src/systable.c b/source/common/src/systable.c index 895ec808fc..abe7064fc0 100644 --- a/source/common/src/systable.c +++ b/source/common/src/systable.c @@ -170,7 +170,7 @@ static const SSysDbTableSchema streamTaskSchema[] = { {.name = "node_type", .bytes = 10 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "node_id", .bytes = 4, .type = TSDB_DATA_TYPE_INT, .sysInfo = false}, {.name = "level", .bytes = 10 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, - {.name = "status", .bytes = 15 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, + {.name = "status", .bytes = 12 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "stage", .bytes = 8, .type = TSDB_DATA_TYPE_BIGINT, .sysInfo = false}, {.name = "in_queue", .bytes = 20, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, // {.name = "out_queue", .bytes = 20, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index 79d21955d4..a7d80fe5db 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -58,7 +58,7 @@ int32_t tsNumOfMnodeQueryThreads = 4; int32_t tsNumOfMnodeFetchThreads = 1; int32_t tsNumOfMnodeReadThreads = 1; int32_t tsNumOfVnodeQueryThreads = 4; -float tsRatioOfVnodeStreamThreads = 4.0; +float tsRatioOfVnodeStreamThreads = 1.0; int32_t tsNumOfVnodeFetchThreads = 4; int32_t tsNumOfVnodeRsmaThreads = 2; int32_t tsNumOfQnodeQueryThreads = 4; @@ -72,6 +72,7 @@ int32_t tsPQSortMemThreshold = 16; // M int32_t tsElectInterval = 25 * 1000; int32_t tsHeartbeatInterval = 1000; int32_t tsHeartbeatTimeout = 20 * 1000; +int32_t tsSnapReplMaxWaitN = 128; // mnode int64_t tsMndSdbWriteDelta = 200; @@ -95,9 +96,9 @@ int32_t tsMonitorMaxLogs = 100; bool tsMonitorComp = false; // audit -bool tsEnableAudit = true; -bool tsEnableAuditCreateTable = true; -int32_t tsAuditInterval = 5000; +bool tsEnableAudit = true; +bool tsEnableAuditCreateTable = true; +int32_t tsAuditInterval = 5000; // telem #ifdef TD_ENTERPRISE @@ -108,7 +109,7 @@ bool tsEnableTelem = true; int32_t tsTelemInterval = 43200; char tsTelemServer[TSDB_FQDN_LEN] = "telemetry.tdengine.com"; uint16_t tsTelemPort = 80; -char * tsTelemUri = "/report"; +char *tsTelemUri = "/report"; #ifdef TD_ENTERPRISE bool tsEnableCrashReport = false; @@ -252,7 +253,7 @@ int32_t tsCompactPullupInterval = 10; int32_t tsMqRebalanceInterval = 2; int32_t tsStreamCheckpointInterval = 60; float tsSinkDataRate = 2.0; -int32_t tsStreamNodeCheckInterval = 15; +int32_t tsStreamNodeCheckInterval = 16; int32_t tsTtlUnit = 86400; int32_t tsTtlPushIntervalSec = 10; int32_t tsTrimVDbIntervalSec = 60 * 60; // interval of trimming db in all vgroups @@ -282,7 +283,7 @@ int32_t tsS3BlockCacheSize = 16; // number of blocks int32_t tsS3PageCacheSize = 4096; // number of pages int32_t tsS3UploadDelaySec = 60 * 60 * 24; -bool tsExperimental = true; +bool tsExperimental = true; #ifndef _STORAGE int32_t taosSetTfsCfg(SConfig *pCfg) { @@ -621,7 +622,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { 0) return -1; - if (cfgAddFloat(pCfg, "ratioOfVnodeStreamThreads", tsRatioOfVnodeStreamThreads, 0.01, 100, CFG_SCOPE_SERVER, + if (cfgAddFloat(pCfg, "ratioOfVnodeStreamThreads", tsRatioOfVnodeStreamThreads, 0.01, 10, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; @@ -673,6 +674,9 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddInt32(pCfg, "syncHeartbeatTimeout", tsHeartbeatTimeout, 10, 1000 * 60 * 24 * 2, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "syncSnapReplMaxWaitN", tsSnapReplMaxWaitN, 16, + (TSDB_SYNC_SNAP_BUFFER_SIZE >> 2), CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) + return -1; if (cfgAddInt64(pCfg, "mndSdbWriteDelta", tsMndSdbWriteDelta, 20, 10000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; @@ -691,8 +695,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddBool(pCfg, "audit", tsEnableAudit, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; if (cfgAddBool(pCfg, "auditCreateTable", tsEnableAuditCreateTable, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - if (cfgAddInt32(pCfg, "auditInterval", tsAuditInterval, 500, 200000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) - return -1; + if (cfgAddInt32(pCfg, "auditInterval", tsAuditInterval, 500, 200000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "crashReporting", tsEnableCrashReport, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "telemetryReporting", tsEnableTelem, CFG_SCOPE_BOTH, CFG_DYN_ENT_SERVER) != 0) return -1; @@ -711,8 +714,8 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddInt32(pCfg, "transPullupInterval", tsTransPullupInterval, 1, 10000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; - if (cfgAddInt32(pCfg, "compactPullupInterval", tsCompactPullupInterval, 1, 10000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != - 0) + if (cfgAddInt32(pCfg, "compactPullupInterval", tsCompactPullupInterval, 1, 10000, CFG_SCOPE_SERVER, + CFG_DYN_ENT_SERVER) != 0) return -1; if (cfgAddInt32(pCfg, "mqRebalanceInterval", tsMqRebalanceInterval, 1, 10000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) @@ -1183,6 +1186,7 @@ static int32_t taosSetServerCfg(SConfig *pCfg) { tsElectInterval = cfgGetItem(pCfg, "syncElectInterval")->i32; tsHeartbeatInterval = cfgGetItem(pCfg, "syncHeartbeatInterval")->i32; tsHeartbeatTimeout = cfgGetItem(pCfg, "syncHeartbeatTimeout")->i32; + tsSnapReplMaxWaitN = cfgGetItem(pCfg, "syncSnapReplMaxWaitN")->i32; tsMndSdbWriteDelta = cfgGetItem(pCfg, "mndSdbWriteDelta")->i64; tsMndLogRetention = cfgGetItem(pCfg, "mndLogRetention")->i64; @@ -1377,7 +1381,7 @@ void taosCleanupCfg() { typedef struct { const char *optionName; - void * optionVar; + void *optionVar; } OptionNameAndVar; static int32_t taosCfgSetOption(OptionNameAndVar *pOptions, int32_t optionSize, SConfigItem *pItem, bool isDebugflag) { @@ -1390,7 +1394,7 @@ static int32_t taosCfgSetOption(OptionNameAndVar *pOptions, int32_t optionSize, switch (pItem->dtype) { case CFG_DTYPE_BOOL: { int32_t flag = pItem->i32; - bool * pVar = pOptions[d].optionVar; + bool *pVar = pOptions[d].optionVar; uInfo("%s set from %d to %d", optName, *pVar, flag); *pVar = flag; terrno = TSDB_CODE_SUCCESS; @@ -1465,40 +1469,38 @@ static int32_t taosCfgDynamicOptionsForServer(SConfig *pCfg, char *name) { {"stDebugFlag", &stDebugFlag}, {"sndDebugFlag", &sndDebugFlag}, }; - static OptionNameAndVar options[] = { - {"audit", &tsEnableAudit}, - {"asynclog", &tsAsyncLog}, - {"disableStream", &tsDisableStream}, - {"enableWhiteList", &tsEnableWhiteList}, - {"telemetryReporting", &tsEnableTelem}, - {"monitor", &tsEnableMonitor}, + static OptionNameAndVar options[] = {{"audit", &tsEnableAudit}, + {"asynclog", &tsAsyncLog}, + {"disableStream", &tsDisableStream}, + {"enableWhiteList", &tsEnableWhiteList}, + {"telemetryReporting", &tsEnableTelem}, + {"monitor", &tsEnableMonitor}, - {"mndSdbWriteDelta", &tsMndSdbWriteDelta}, - {"minDiskFreeSize", &tsMinDiskFreeSize}, + {"mndSdbWriteDelta", &tsMndSdbWriteDelta}, + {"minDiskFreeSize", &tsMinDiskFreeSize}, - {"cacheLazyLoadThreshold", &tsCacheLazyLoadThreshold}, - {"checkpointInterval", &tsStreamCheckpointInterval}, - {"keepAliveIdle", &tsKeepAliveIdle}, - {"logKeepDays", &tsLogKeepDays}, - {"maxStreamBackendCache", &tsMaxStreamBackendCache}, - {"mqRebalanceInterval", &tsMqRebalanceInterval}, - {"numOfLogLines", &tsNumOfLogLines}, - {"queryRspPolicy", &tsQueryRspPolicy}, - {"timeseriesThreshold", &tsTimeSeriesThreshold}, - {"tmqMaxTopicNum", &tmqMaxTopicNum}, - {"transPullupInterval", &tsTransPullupInterval}, - {"compactPullupInterval", &tsCompactPullupInterval}, - {"trimVDbIntervalSec", &tsTrimVDbIntervalSec}, - {"ttlBatchDropNum", &tsTtlBatchDropNum}, - {"ttlFlushThreshold", &tsTtlFlushThreshold}, - {"ttlPushInterval", &tsTtlPushIntervalSec}, - //{"s3BlockSize", &tsS3BlockSize}, - {"s3BlockCacheSize", &tsS3BlockCacheSize}, - {"s3PageCacheSize", &tsS3PageCacheSize}, - {"s3UploadDelaySec", &tsS3UploadDelaySec}, - {"supportVnodes", &tsNumOfSupportVnodes}, - {"experimental", &tsExperimental} - }; + {"cacheLazyLoadThreshold", &tsCacheLazyLoadThreshold}, + {"checkpointInterval", &tsStreamCheckpointInterval}, + {"keepAliveIdle", &tsKeepAliveIdle}, + {"logKeepDays", &tsLogKeepDays}, + {"maxStreamBackendCache", &tsMaxStreamBackendCache}, + {"mqRebalanceInterval", &tsMqRebalanceInterval}, + {"numOfLogLines", &tsNumOfLogLines}, + {"queryRspPolicy", &tsQueryRspPolicy}, + {"timeseriesThreshold", &tsTimeSeriesThreshold}, + {"tmqMaxTopicNum", &tmqMaxTopicNum}, + {"transPullupInterval", &tsTransPullupInterval}, + {"compactPullupInterval", &tsCompactPullupInterval}, + {"trimVDbIntervalSec", &tsTrimVDbIntervalSec}, + {"ttlBatchDropNum", &tsTtlBatchDropNum}, + {"ttlFlushThreshold", &tsTtlFlushThreshold}, + {"ttlPushInterval", &tsTtlPushIntervalSec}, + //{"s3BlockSize", &tsS3BlockSize}, + {"s3BlockCacheSize", &tsS3BlockCacheSize}, + {"s3PageCacheSize", &tsS3PageCacheSize}, + {"s3UploadDelaySec", &tsS3UploadDelaySec}, + {"supportVnodes", &tsNumOfSupportVnodes}, + {"experimental", &tsExperimental}}; if (taosCfgSetOption(debugOptions, tListLen(debugOptions), pItem, true) != 0) { taosCfgSetOption(options, tListLen(options), pItem, false); @@ -1692,36 +1694,34 @@ static int32_t taosCfgDynamicOptionsForClient(SConfig *pCfg, char *name) { {"cDebugFlag", &cDebugFlag}, {"dDebugFlag", &dDebugFlag}, {"fsDebugFlag", &fsDebugFlag}, {"idxDebugFlag", &idxDebugFlag}, {"jniDebugFlag", &jniDebugFlag}, {"qDebugFlag", &qDebugFlag}, {"rpcDebugFlag", &rpcDebugFlag}, {"smaDebugFlag", &smaDebugFlag}, {"tmrDebugFlag", &tmrDebugFlag}, - {"uDebugFlag", &uDebugFlag}, {"simDebugFlag", &simDebugFlag}, + {"uDebugFlag", &uDebugFlag}, {"simDebugFlag", &simDebugFlag}, }; - static OptionNameAndVar options[] = { - {"asyncLog", &tsAsyncLog}, - {"assert", &tsAssert}, - {"compressMsgSize", &tsCompressMsgSize}, - {"countAlwaysReturnValue", &tsCountAlwaysReturnValue}, - {"crashReporting", &tsEnableCrashReport}, - {"enableCoreFile", &tsAsyncLog}, - {"enableQueryHb", &tsEnableQueryHb}, - {"keepColumnName", &tsKeepColumnName}, - {"keepAliveIdle", &tsKeepAliveIdle}, - {"logKeepDays", &tsLogKeepDays}, - {"maxInsertBatchRows", &tsMaxInsertBatchRows}, - {"maxRetryWaitTime", &tsMaxRetryWaitTime}, - {"minSlidingTime", &tsMinSlidingTime}, - {"minIntervalTime", &tsMinIntervalTime}, - {"numOfLogLines", &tsNumOfLogLines}, - {"querySmaOptimize", &tsQuerySmaOptimize}, - {"queryPolicy", &tsQueryPolicy}, - {"queryPlannerTrace", &tsQueryPlannerTrace}, - {"queryNodeChunkSize", &tsQueryNodeChunkSize}, - {"queryUseNodeAllocator", &tsQueryUseNodeAllocator}, - {"smlDot2Underline", &tsSmlDot2Underline}, - {"shellActivityTimer", &tsShellActivityTimer}, - {"slowLogThreshold", &tsSlowLogThreshold}, - {"useAdapter", &tsUseAdapter}, - {"experimental", &tsExperimental} - }; + static OptionNameAndVar options[] = {{"asyncLog", &tsAsyncLog}, + {"assert", &tsAssert}, + {"compressMsgSize", &tsCompressMsgSize}, + {"countAlwaysReturnValue", &tsCountAlwaysReturnValue}, + {"crashReporting", &tsEnableCrashReport}, + {"enableCoreFile", &tsAsyncLog}, + {"enableQueryHb", &tsEnableQueryHb}, + {"keepColumnName", &tsKeepColumnName}, + {"keepAliveIdle", &tsKeepAliveIdle}, + {"logKeepDays", &tsLogKeepDays}, + {"maxInsertBatchRows", &tsMaxInsertBatchRows}, + {"maxRetryWaitTime", &tsMaxRetryWaitTime}, + {"minSlidingTime", &tsMinSlidingTime}, + {"minIntervalTime", &tsMinIntervalTime}, + {"numOfLogLines", &tsNumOfLogLines}, + {"querySmaOptimize", &tsQuerySmaOptimize}, + {"queryPolicy", &tsQueryPolicy}, + {"queryPlannerTrace", &tsQueryPlannerTrace}, + {"queryNodeChunkSize", &tsQueryNodeChunkSize}, + {"queryUseNodeAllocator", &tsQueryUseNodeAllocator}, + {"smlDot2Underline", &tsSmlDot2Underline}, + {"shellActivityTimer", &tsShellActivityTimer}, + {"slowLogThreshold", &tsSlowLogThreshold}, + {"useAdapter", &tsUseAdapter}, + {"experimental", &tsExperimental}}; if (taosCfgSetOption(debugOptions, tListLen(debugOptions), pItem, true) != 0) { taosCfgSetOption(options, tListLen(options), pItem, false); @@ -1798,7 +1798,7 @@ void taosSetAllDebugFlag(int32_t flag) { taosArrayClear(noNeedToSetVars); // reset array uInfo("all debug flag are set to %d", flag); - if (terrno == TSDB_CODE_CFG_NOT_FOUND) terrno = TSDB_CODE_SUCCESS; // ignore not exist + if (terrno == TSDB_CODE_CFG_NOT_FOUND) terrno = TSDB_CODE_SUCCESS; // ignore not exist } int8_t taosGranted() { return atomic_load_8(&tsGrant); } diff --git a/source/common/src/ttime.c b/source/common/src/ttime.c index 023a425df2..775cff6670 100644 --- a/source/common/src/ttime.c +++ b/source/common/src/ttime.c @@ -683,6 +683,10 @@ int32_t parseNatualDuration(const char* token, int32_t tokenLen, int64_t* durati return getDuration(*duration, *unit, duration, timePrecision); } +static bool taosIsLeapYear(int32_t year) { + return (year % 4 == 0 && (year % 100 != 0 || year % 400 == 0)); +} + int64_t taosTimeAdd(int64_t t, int64_t duration, char unit, int32_t precision) { if (duration == 0) { return t; @@ -702,7 +706,13 @@ int64_t taosTimeAdd(int64_t t, int64_t duration, char unit, int32_t precision) { int32_t mon = tm.tm_year * 12 + tm.tm_mon + (int32_t)numOfMonth; tm.tm_year = mon / 12; tm.tm_mon = mon % 12; - + int daysOfMonth[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; + if (taosIsLeapYear(1900 + tm.tm_year)) { + daysOfMonth[1] = 29; + } + if (tm.tm_mday > daysOfMonth[tm.tm_mon]) { + tm.tm_mday = daysOfMonth[tm.tm_mon]; + } return (int64_t)(taosMktime(&tm) * TSDB_TICK_PER_SECOND(precision) + fraction); } @@ -872,23 +882,33 @@ int64_t taosTimeTruncate(int64_t ts, const SInterval* pInterval) { ASSERT(pInterval->offset >= 0); if (pInterval->offset > 0) { - start = taosTimeAdd(start, pInterval->offset, pInterval->offsetUnit, precision); - // try to move current window to the left-hande-side, due to the offset effect. - int64_t end = taosTimeAdd(start, pInterval->interval, pInterval->intervalUnit, precision) - 1; - - int64_t newe = end; + int64_t newe = taosTimeAdd(start, pInterval->interval, pInterval->intervalUnit, precision) - 1; + int64_t slidingStart = start; while (newe >= ts) { - end = newe; - newe = taosTimeAdd(newe, -pInterval->sliding, pInterval->slidingUnit, precision); + start = slidingStart; + slidingStart = taosTimeAdd(slidingStart, -pInterval->sliding, pInterval->slidingUnit, precision); + int64_t slidingEnd = taosTimeAdd(slidingStart, pInterval->interval, pInterval->intervalUnit, precision) - 1; + newe = taosTimeAdd(slidingEnd, pInterval->offset, pInterval->offsetUnit, precision); } - - start = taosTimeAdd(end, -pInterval->interval, pInterval->intervalUnit, precision) + 1; + start = taosTimeAdd(start, pInterval->offset, pInterval->offsetUnit, precision); } return start; } +// used together with taosTimeTruncate. when offset is great than zero, slide-start/slide-end is the anchor point +int64_t taosTimeGetIntervalEnd(int64_t intervalStart, const SInterval* pInterval) { + if (pInterval->offset > 0) { + int64_t slideStart = taosTimeAdd(intervalStart, -1 * pInterval->offset, pInterval->offsetUnit, pInterval->precision); + int64_t slideEnd = taosTimeAdd(slideStart, pInterval->interval, pInterval->intervalUnit, pInterval->precision) - 1; + int64_t result = taosTimeAdd(slideEnd, pInterval->offset, pInterval->offsetUnit, pInterval->precision); + return result; + } else { + int64_t result = taosTimeAdd(intervalStart, pInterval->interval, pInterval->intervalUnit, pInterval->precision) - 1; + return result; + } +} // internal function, when program is paused in debugger, // one can call this function from debugger to print a // timestamp as human readable string, for example (gdb): diff --git a/source/dnode/mgmt/mgmt_dnode/src/dmHandle.c b/source/dnode/mgmt/mgmt_dnode/src/dmHandle.c index bcdb31508a..2caf2dee1e 100644 --- a/source/dnode/mgmt/mgmt_dnode/src/dmHandle.c +++ b/source/dnode/mgmt/mgmt_dnode/src/dmHandle.c @@ -45,7 +45,7 @@ static void dmMayShouldUpdateIpWhiteList(SDnodeMgmt *pMgmt, int64_t ver) { SRetrieveIpWhiteReq req = {.ipWhiteVer = oldVer}; int32_t contLen = tSerializeRetrieveIpWhite(NULL, 0, &req); - void *pHead = rpcMallocCont(contLen); + void * pHead = rpcMallocCont(contLen); tSerializeRetrieveIpWhite(pHead, contLen, &req); SRpcMsg rpcMsg = {.pCont = pHead, @@ -144,7 +144,7 @@ void dmSendStatusReq(SDnodeMgmt *pMgmt) { req.ipWhiteVer = pMgmt->pData->ipWhiteVer; int32_t contLen = tSerializeSStatusReq(NULL, 0, &req); - void *pHead = rpcMallocCont(contLen); + void * pHead = rpcMallocCont(contLen); tSerializeSStatusReq(pHead, contLen, &req); tFreeSStatusReq(&req); @@ -161,7 +161,7 @@ void dmSendStatusReq(SDnodeMgmt *pMgmt) { SEpSet epSet = {0}; int8_t epUpdated = 0; dmGetMnodeEpSet(pMgmt->pData, &epSet); - rpcSendRecvWithTimeout(pMgmt->msgCb.clientRpc, &epSet, &rpcMsg, &rpcRsp, &epUpdated, 5000); + rpcSendRecvWithTimeout(pMgmt->msgCb.statusRpc, &epSet, &rpcMsg, &rpcRsp, &epUpdated, tsStatusInterval * 5 * 1000); if (rpcRsp.code != 0) { dmRotateMnodeEpSet(pMgmt->pData); char tbuf[512]; @@ -189,7 +189,7 @@ void dmSendNotifyReq(SDnodeMgmt *pMgmt) { req.pVloads = vinfo.pVloads; int32_t contLen = tSerializeSNotifyReq(NULL, 0, &req); - void *pHead = rpcMallocCont(contLen); + void * pHead = rpcMallocCont(contLen); tSerializeSNotifyReq(pHead, contLen, &req); tFreeSNotifyReq(&req); @@ -284,7 +284,7 @@ int32_t dmProcessServerRunStatus(SDnodeMgmt *pMgmt, SRpcMsg *pMsg) { } SSDataBlock *dmBuildVariablesBlock(void) { - SSDataBlock *pBlock = taosMemoryCalloc(1, sizeof(SSDataBlock)); + SSDataBlock * pBlock = taosMemoryCalloc(1, sizeof(SSDataBlock)); size_t size = 0; const SSysTableMeta *pMeta = NULL; getInfosDbMeta(&pMeta, &size); diff --git a/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c b/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c index 5917c7abbb..831d1bc10c 100644 --- a/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c +++ b/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c @@ -47,28 +47,29 @@ static void *dmStatusThreadFp(void *param) { } SDmNotifyHandle dmNotifyHdl = {.state = 0}; -static void *dmNotifyThreadFp(void *param) { - SDnodeMgmt *pMgmt = param; - setThreadName("dnode-notify"); - if (tsem_init(&dmNotifyHdl.sem, 0, 0) != 0) { - return NULL; +static void *dmNotifyThreadFp(void *param) { + SDnodeMgmt *pMgmt = param; + setThreadName("dnode-notify"); + + if (tsem_init(&dmNotifyHdl.sem, 0, 0) != 0) { + return NULL; } - bool wait = true; - while (1) { - if (pMgmt->pData->dropped || pMgmt->pData->stopped) break; + bool wait = true; + while (1) { + if (pMgmt->pData->dropped || pMgmt->pData->stopped) break; if (wait) tsem_wait(&dmNotifyHdl.sem); atomic_store_8(&dmNotifyHdl.state, 1); - dmSendNotifyReq(pMgmt); - if (1 == atomic_val_compare_exchange_8(&dmNotifyHdl.state, 1, 0)) { - wait = true; - continue; + dmSendNotifyReq(pMgmt); + if (1 == atomic_val_compare_exchange_8(&dmNotifyHdl.state, 1, 0)) { + wait = true; + continue; } - wait = false; + wait = false; } - return NULL; + return NULL; } static void *dmMonitorThreadFp(void *param) { diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c index 9a792a2774..8b80527447 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c @@ -407,12 +407,8 @@ int32_t vmStartWorker(SVnodeMgmt *pMgmt) { if (tWWorkerInit(pFPool) != 0) return -1; SSingleWorkerCfg mgmtCfg = { - .min = 1, - .max = 1, - .name = "vnode-mgmt", - .fp = (FItem)vmProcessMgmtQueue, - .param = pMgmt, - }; + .min = 1, .max = 1, .name = "vnode-mgmt", .fp = (FItem)vmProcessMgmtQueue, .param = pMgmt}; + if (tSingleWorkerInit(&pMgmt->mgmtWorker, &mgmtCfg) != 0) return -1; dDebug("vnode workers are initialized"); diff --git a/source/dnode/mgmt/node_mgmt/inc/dmMgmt.h b/source/dnode/mgmt/node_mgmt/inc/dmMgmt.h index 243d0bfeed..bc6a4652e7 100644 --- a/source/dnode/mgmt/node_mgmt/inc/dmMgmt.h +++ b/source/dnode/mgmt/node_mgmt/inc/dmMgmt.h @@ -48,6 +48,8 @@ typedef struct { typedef struct { void *serverRpc; void *clientRpc; + void *statusRpc; + void *syncRpc; SDnodeHandle msgHandles[TDMT_MAX]; } SDnodeTrans; @@ -115,8 +117,10 @@ int32_t dmInitServer(SDnode *pDnode); void dmCleanupServer(SDnode *pDnode); int32_t dmInitClient(SDnode *pDnode); int32_t dmInitStatusClient(SDnode *pDnode); +int32_t dmInitSyncClient(SDnode *pDnode); void dmCleanupClient(SDnode *pDnode); void dmCleanupStatusClient(SDnode *pDnode); +void dmCleanupSyncClient(SDnode *pDnode); SMsgCb dmGetMsgcb(SDnode *pDnode); int32_t dmInitMsgHandle(SDnode *pDnode); int32_t dmProcessNodeMsg(SMgmtWrapper *pWrapper, SRpcMsg *pMsg); diff --git a/source/dnode/mgmt/node_mgmt/src/dmEnv.c b/source/dnode/mgmt/node_mgmt/src/dmEnv.c index f9bba19fbb..d173c83e97 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmEnv.c +++ b/source/dnode/mgmt/node_mgmt/src/dmEnv.c @@ -28,9 +28,6 @@ } \ } while (0) -extern int32_t streamTimerInit(); -extern void streamTimerCleanUp(); - static SDnode globalDnode = {0}; SDnode *dmInstance() { return &globalDnode; } @@ -169,7 +166,6 @@ int32_t dmInit() { #if defined(USE_S3) if (s3Begin() != 0) return -1; #endif - if (streamTimerInit() != 0) return -1; dInfo("dnode env is initialized"); return 0; @@ -196,10 +192,10 @@ void dmCleanup() { udfStopUdfd(); taosStopCacheRefreshWorker(); dmDiskClose(); + #if defined(USE_S3) s3End(); #endif - streamTimerCleanUp(); dInfo("dnode env is cleaned up"); diff --git a/source/dnode/mgmt/node_mgmt/src/dmMgmt.c b/source/dnode/mgmt/node_mgmt/src/dmMgmt.c index 4c5deef91a..6cbf31b15f 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmMgmt.c +++ b/source/dnode/mgmt/node_mgmt/src/dmMgmt.c @@ -82,9 +82,13 @@ int32_t dmInitDnode(SDnode *pDnode) { if (dmInitModule(pDnode) != 0) { goto _OVER; } + indexInit(tsNumOfCommitThreads); streamMetaInit(); + dmInitStatusClient(pDnode); + dmInitSyncClient(pDnode); + dmReportStartup("dnode-transport", "initialized"); dDebug("dnode is created, ptr:%p", pDnode); code = 0; @@ -100,11 +104,15 @@ _OVER: } void dmCleanupDnode(SDnode *pDnode) { - if (pDnode == NULL) return; + if (pDnode == NULL) { + return; + } dmCleanupClient(pDnode); dmCleanupStatusClient(pDnode); + dmCleanupSyncClient(pDnode); dmCleanupServer(pDnode); + dmClearVars(pDnode); rpcCleanup(); streamMetaCleanup(); diff --git a/source/dnode/mgmt/node_mgmt/src/dmTransport.c b/source/dnode/mgmt/node_mgmt/src/dmTransport.c index 5a61d68674..60c4ffe97b 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmTransport.c +++ b/source/dnode/mgmt/node_mgmt/src/dmTransport.c @@ -293,6 +293,23 @@ static inline int32_t dmSendReq(const SEpSet *pEpSet, SRpcMsg *pMsg) { return rpcSendRequest(pDnode->trans.clientRpc, pEpSet, pMsg, NULL); } } +static inline int32_t dmSendSyncReq(const SEpSet *pEpSet, SRpcMsg *pMsg) { + SDnode *pDnode = dmInstance(); + if (pDnode->status != DND_STAT_RUNNING && pMsg->msgType < TDMT_SYNC_MSG) { + rpcFreeCont(pMsg->pCont); + pMsg->pCont = NULL; + if (pDnode->status == DND_STAT_INIT) { + terrno = TSDB_CODE_APP_IS_STARTING; + } else { + terrno = TSDB_CODE_APP_IS_STOPPING; + } + dError("failed to send rpc msg:%s since %s, handle:%p", TMSG_INFO(pMsg->msgType), terrstr(), pMsg->info.handle); + return -1; + } else { + rpcSendRequest(pDnode->trans.syncRpc, pEpSet, pMsg, NULL); + return 0; + } +} static inline void dmRegisterBrokenLinkArg(SRpcMsg *pMsg) { rpcRegisterBrokenLinkArg(pMsg); } @@ -317,8 +334,8 @@ int32_t dmInitClient(SDnode *pDnode) { SDnodeTrans *pTrans = &pDnode->trans; SRpcInit rpcInit = {0}; - rpcInit.label = "DND-C"; - rpcInit.numOfThreads = tsNumOfRpcThreads; + rpcInit.label = "DNODE-CLI"; + rpcInit.numOfThreads = tsNumOfRpcThreads / 2; rpcInit.cfp = (RpcCfp)dmProcessRpcMsg; rpcInit.sessions = 1024; rpcInit.connType = TAOS_CONN_CLIENT; @@ -337,7 +354,7 @@ int32_t dmInitClient(SDnode *pDnode) { rpcInit.failFastThreshold = 3; // failed threshold rpcInit.ffp = dmFailFastFp; - int32_t connLimitNum = tsNumOfRpcSessions / (tsNumOfRpcThreads * 3); + int32_t connLimitNum = tsNumOfRpcSessions / (tsNumOfRpcThreads * 3) / 2; connLimitNum = TMAX(connLimitNum, 10); connLimitNum = TMIN(connLimitNum, 500); @@ -361,7 +378,7 @@ int32_t dmInitStatusClient(SDnode *pDnode) { SDnodeTrans *pTrans = &pDnode->trans; SRpcInit rpcInit = {0}; - rpcInit.label = "DND-STATUS"; + rpcInit.label = "DNODE-STA-CLI"; rpcInit.numOfThreads = 1; rpcInit.cfp = (RpcCfp)dmProcessRpcMsg; rpcInit.sessions = 1024; @@ -392,16 +409,61 @@ int32_t dmInitStatusClient(SDnode *pDnode) { rpcInit.timeToGetConn = tsTimeToGetAvailableConn; taosVersionStrToInt(version, &(rpcInit.compatibilityVer)); - // pTrans->statusClientRpc = rpcOpen(&rpcInit); - // if (pTrans->statusClientRpc == NULL) { - // dError("failed to init dnode rpc status client"); - // return -1; - // } + pTrans->statusRpc = rpcOpen(&rpcInit); + if (pTrans->statusRpc == NULL) { + dError("failed to init dnode rpc status client"); + return -1; + } dDebug("dnode rpc status client is initialized"); return 0; } +int32_t dmInitSyncClient(SDnode *pDnode) { + SDnodeTrans *pTrans = &pDnode->trans; + + SRpcInit rpcInit = {0}; + rpcInit.label = "DNODE-SYNC-CLI"; + rpcInit.numOfThreads = tsNumOfRpcThreads / 2; + rpcInit.cfp = (RpcCfp)dmProcessRpcMsg; + rpcInit.sessions = 1024; + rpcInit.connType = TAOS_CONN_CLIENT; + rpcInit.user = TSDB_DEFAULT_USER; + rpcInit.idleTime = tsShellActivityTimer * 1000; + rpcInit.parent = pDnode; + rpcInit.rfp = rpcRfp; + rpcInit.compressSize = tsCompressMsgSize; + + rpcInit.retryMinInterval = tsRedirectPeriod; + rpcInit.retryStepFactor = tsRedirectFactor; + rpcInit.retryMaxInterval = tsRedirectMaxPeriod; + rpcInit.retryMaxTimeout = tsMaxRetryWaitTime; + + rpcInit.failFastInterval = 5000; // interval threshold(ms) + rpcInit.failFastThreshold = 3; // failed threshold + rpcInit.ffp = dmFailFastFp; + + int32_t connLimitNum = tsNumOfRpcSessions / (tsNumOfRpcThreads * 3) / 2; + connLimitNum = TMAX(connLimitNum, 10); + connLimitNum = TMIN(connLimitNum, 500); + + rpcInit.connLimitNum = connLimitNum; + rpcInit.connLimitLock = 1; + rpcInit.supportBatch = 1; + rpcInit.batchSize = 8 * 1024; + rpcInit.timeToGetConn = tsTimeToGetAvailableConn; + taosVersionStrToInt(version, &(rpcInit.compatibilityVer)); + + pTrans->syncRpc = rpcOpen(&rpcInit); + if (pTrans->syncRpc == NULL) { + dError("failed to init dnode rpc sync client"); + return -1; + } + + dDebug("dnode rpc sync client is initialized"); + return 0; +} + void dmCleanupClient(SDnode *pDnode) { SDnodeTrans *pTrans = &pDnode->trans; if (pTrans->clientRpc) { @@ -412,11 +474,19 @@ void dmCleanupClient(SDnode *pDnode) { } void dmCleanupStatusClient(SDnode *pDnode) { SDnodeTrans *pTrans = &pDnode->trans; - // if (pTrans->statusClientRpc) { - // rpcClose(pTrans->statusClientRpc); - // pTrans->statusClientRpc = NULL; - // dDebug("dnode rpc status client is closed"); - // } + if (pTrans->statusRpc) { + rpcClose(pTrans->statusRpc); + pTrans->statusRpc = NULL; + dDebug("dnode rpc status client is closed"); + } +} +void dmCleanupSyncClient(SDnode *pDnode) { + SDnodeTrans *pTrans = &pDnode->trans; + if (pTrans->syncRpc) { + rpcClose(pTrans->syncRpc); + pTrans->syncRpc = NULL; + dDebug("dnode rpc sync client is closed"); + } } int32_t dmInitServer(SDnode *pDnode) { @@ -457,7 +527,10 @@ SMsgCb dmGetMsgcb(SDnode *pDnode) { SMsgCb msgCb = { .clientRpc = pDnode->trans.clientRpc, .serverRpc = pDnode->trans.serverRpc, + .statusRpc = pDnode->trans.statusRpc, + .syncRpc = pDnode->trans.syncRpc, .sendReqFp = dmSendReq, + .sendSyncReqFp = dmSendSyncReq, .sendRspFp = dmSendRsp, .registerBrokenLinkArgFp = dmRegisterBrokenLinkArg, .releaseHandleFp = dmReleaseHandle, diff --git a/source/dnode/mnode/impl/inc/mndStream.h b/source/dnode/mnode/impl/inc/mndStream.h index 4d06b16297..58a4c92d3e 100644 --- a/source/dnode/mnode/impl/inc/mndStream.h +++ b/source/dnode/mnode/impl/inc/mndStream.h @@ -71,7 +71,8 @@ int32_t mndPersistStream(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream int32_t mndStreamRegisterTrans(STrans* pTrans, const char* pTransName, int64_t streamUid); int32_t mndAddtoCheckpointWaitingList(SStreamObj *pStream, int64_t checkpointId); -bool streamTransConflictOtherTrans(SMnode *pMnode, int64_t streamUid, const char *pTransName, bool lock); +bool mndStreamTransConflictCheck(SMnode *pMnode, int64_t streamUid, const char *pTransName, bool lock); +int32_t mndStreamGetRelTrans(SMnode *pMnode, int64_t streamUid); // for sma // TODO refactor diff --git a/source/dnode/mnode/impl/src/mndDump.c b/source/dnode/mnode/impl/src/mndDump.c index 5efebbc16e..c68b11d184 100644 --- a/source/dnode/mnode/impl/src/mndDump.c +++ b/source/dnode/mnode/impl/src/mndDump.c @@ -32,6 +32,10 @@ int32_t sendReq(const SEpSet *pEpSet, SRpcMsg *pMsg) { terrno = TSDB_CODE_INVALID_PTR; return -1; } +int32_t sendSyncReq(const SEpSet *pEpSet, SRpcMsg *pMsg) { + terrno = TSDB_CODE_INVALID_PTR; + return -1; +} char *i642str(int64_t val) { static char str[24] = {0}; @@ -568,6 +572,7 @@ void mndDumpSdb() { SMsgCb msgCb = {0}; msgCb.reportStartupFp = reportStartup; msgCb.sendReqFp = sendReq; + msgCb.sendSyncReqFp = sendSyncReq; msgCb.sendRspFp = sendRsp; msgCb.mgmt = (SMgmtWrapper *)(&msgCb); // hack tmsgSetDefault(&msgCb); @@ -590,7 +595,7 @@ void mndDumpSdb() { dumpTopic(pSdb, json); dumpConsumer(pSdb, json); dumpSubscribe(pSdb, json); -// dumpOffset(pSdb, json); + // dumpOffset(pSdb, json); dumpStream(pSdb, json); dumpAcct(pSdb, json); dumpAuth(pSdb, json); @@ -605,7 +610,7 @@ void mndDumpSdb() { char *pCont = tjsonToString(json); int32_t contLen = strlen(pCont); char file[] = "sdb.json"; - TdFilePtr pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC| TD_FILE_WRITE_THROUGH); + TdFilePtr pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH); if (pFile == NULL) { terrno = TAOS_SYSTEM_ERROR(errno); mError("failed to write %s since %s", file, terrstr()); diff --git a/source/dnode/mnode/impl/src/mndMain.c b/source/dnode/mnode/impl/src/mndMain.c index 6d5e66ed04..84bf026153 100644 --- a/source/dnode/mnode/impl/src/mndMain.c +++ b/source/dnode/mnode/impl/src/mndMain.c @@ -145,7 +145,7 @@ static void mndCalMqRebalance(SMnode *pMnode) { void *pReq = mndBuildTimerMsg(&contLen); if (pReq != NULL) { SRpcMsg rpcMsg = {.msgType = TDMT_MND_TMQ_TIMER, .pCont = pReq, .contLen = contLen}; - tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg); + tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); } } @@ -299,14 +299,14 @@ static bool mnodeIsNotLeader(SMnode *pMnode) { } static int32_t minCronTime() { - int64_t min = INT64_MAX; + int32_t min = INT32_MAX; min = TMIN(min, tsTtlPushIntervalSec); min = TMIN(min, tsTrimVDbIntervalSec); min = TMIN(min, tsTransPullupInterval); min = TMIN(min, tsCompactPullupInterval); min = TMIN(min, tsMqRebalanceInterval); min = TMIN(min, tsStreamCheckpointInterval); - min = TMIN(min, 5); // checkpointRemain + min = TMIN(min, 6); // checkpointRemain min = TMIN(min, tsStreamNodeCheckInterval); int64_t telemInt = TMIN(60, (tsTelemInterval - 1)); @@ -386,7 +386,8 @@ static void *mndThreadFp(void *param) { int64_t minCron = minCronTime(); if (sec % minCron == 0 && mnodeIsNotLeader(pMnode)) { // not leader, do nothing - mTrace("timer not process since mnode is not leader, reason: %s", tstrerror(terrno)) terrno = 0; + mTrace("timer not process since mnode is not leader, reason: %s", tstrerror(terrno)); + terrno = 0; continue; } mndDoTimerPullupTask(pMnode, sec); diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 6fc66756fd..bcd0a63c6f 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -80,7 +80,9 @@ static int32_t createStreamUpdateTrans(SStreamObj *pStream, SVgroupChangeInfo *p static void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode); static void saveStreamTasksInfo(SStreamObj *pStream, SStreamExecInfo *pExecNode); static int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot); -static int32_t killActiveCheckpointTrans(SMnode *pMnode, const char *pDbName, size_t len); +static int32_t doKillCheckpointTrans(SMnode *pMnode, const char *pDbName, size_t len); +static void killTransImpl(SMnode *pMnode, int32_t transId, const char *pDbName); + static int32_t setNodeEpsetExpiredFlag(const SArray *pNodeList); static void freeCheckpointCandEntry(void *); @@ -93,9 +95,6 @@ static int32_t mndStreamSeqActionInsert(SSdb *pSdb, SStreamSeq *pStream); static int32_t mndStreamSeqActionDelete(SSdb *pSdb, SStreamSeq *pStream); static int32_t mndStreamSeqActionUpdate(SSdb *pSdb, SStreamSeq *pOldStream, SStreamSeq *pNewStream); -static SSdbRaw *mndStreamActionEncode(SStreamObj *pStream); -static SSdbRow *mndStreamActionDecode(SSdbRaw *pRaw); - int32_t mndInitStream(SMnode *pMnode) { SSdbTable table = { .sdbType = SDB_STREAM, @@ -1051,7 +1050,7 @@ static int32_t mndProcessStreamCheckpointTrans(SMnode *pMnode, SStreamObj *pStre return -1; } - bool conflict = streamTransConflictOtherTrans(pMnode, pStream->uid, MND_STREAM_CHECKPOINT_NAME, true); + bool conflict = mndStreamTransConflictCheck(pMnode, pStream->uid, MND_STREAM_CHECKPOINT_NAME, true); if (conflict) { mndAddtoCheckpointWaitingList(pStream, checkpointId); mWarn("checkpoint conflict with other trans in %s, ignore the checkpoint for stream:%s %" PRIx64, pStream->sourceDb, @@ -1348,6 +1347,7 @@ static int32_t mndProcessStreamCheckpointInCandid(SRpcMsg *pReq) { if (ps == NULL) { continue; } + mDebug("start to launch checkpoint for stream:%s %" PRIx64 " in candidate list", pEntry->pName, pEntry->streamId); code = mndProcessStreamCheckpointTrans(pMnode, ps, pEntry->checkpointId); @@ -1403,7 +1403,7 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { } // check if it is conflict with other trans in both sourceDb and targetDb. - bool conflict = streamTransConflictOtherTrans(pMnode, pStream->uid, MND_STREAM_DROP_NAME, true); + bool conflict = mndStreamTransConflictCheck(pMnode, pStream->uid, MND_STREAM_DROP_NAME, true); if (conflict) { sdbRelease(pMnode->pSdb, pStream); tFreeMDropStreamReq(&dropReq); @@ -1455,6 +1455,13 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { return -1; } + // kill the related checkpoint trans + int32_t transId = mndStreamGetRelTrans(pMnode, pStream->uid); + if (transId != 0) { + mDebug("drop active related transId:%d due to stream:%s dropped", transId, pStream->name); + killTransImpl(pMnode, transId, pStream->sourceDb); + } + removeStreamTasksInBuf(pStream, &execInfo); SName name = {0}; @@ -1495,6 +1502,17 @@ int32_t mndDropStreamByDb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb) { return -1; } #endif + + // kill the related checkpoint trans + int32_t transId = mndStreamGetRelTrans(pMnode, pStream->uid); + if (transId != 0) { + mDebug("drop active related transId:%d due to stream:%s dropped", transId, pStream->name); + killTransImpl(pMnode, transId, pStream->sourceDb); + } + + // drop the stream obj in execInfo + removeStreamTasksInBuf(pStream, &execInfo); + if (mndPersistDropStreamLog(pMnode, pTrans, pStream) < 0) { sdbRelease(pSdb, pStream); sdbCancelFetch(pSdb, pIter); @@ -1627,10 +1645,18 @@ static void mndCancelGetNextStream(SMnode *pMnode, void *pIter) { sdbCancelFetch(pSdb, pIter); } -static void setTaskAttrInResBlock(SStreamObj *pStream, SStreamTask *pTask, SSDataBlock *pBlock, int32_t numOfRows) { +static int32_t setTaskAttrInResBlock(SStreamObj *pStream, SStreamTask *pTask, SSDataBlock *pBlock, int32_t numOfRows) { SColumnInfoData *pColInfo; int32_t cols = 0; + STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId}; + + STaskStatusEntry *pe = taosHashGet(execInfo.pTaskMap, &id, sizeof(id)); + if (pe == NULL) { + mError("task:0x%" PRIx64 " not exists in vnode, no valid status/stage info", id.taskId); + return -1; + } + // stream name char streamName[TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE] = {0}; STR_WITH_MAXSIZE_TO_VARSTR(streamName, mndGetDbStr(pStream->name), sizeof(streamName)); @@ -1681,14 +1707,7 @@ static void setTaskAttrInResBlock(SStreamObj *pStream, SStreamTask *pTask, SSDat colDataSetVal(pColInfo, numOfRows, (const char *)level, false); // status - char status[20 + VARSTR_HEADER_SIZE] = {0}; - STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId}; - - STaskStatusEntry *pe = taosHashGet(execInfo.pTaskMap, &id, sizeof(id)); - if (pe == NULL) { - mError("task:0x%" PRIx64 " not exists in vnode, no valid status/stage info", id.taskId); - return; - } + char status[20 + VARSTR_HEADER_SIZE] = {0}; const char *pStatus = streamTaskGetStatusStr(pe->status); STR_TO_VARSTR(status, pStatus); @@ -1712,7 +1731,7 @@ static void setTaskAttrInResBlock(SStreamObj *pStream, SStreamTask *pTask, SSDat colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); // output queue - // sprintf(buf, queueInfoStr, pe->outputQUsed, pe->outputRate); +// sprintf(buf, queueInfoStr, pe->outputQUsed, pe->outputRate); // STR_TO_VARSTR(vbuf, buf); // pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); @@ -1731,6 +1750,8 @@ static void setTaskAttrInResBlock(SStreamObj *pStream, SStreamTask *pTask, SSDat pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); + + return TSDB_CODE_SUCCESS; } static int32_t getNumOfTasks(SArray *pTaskList) { @@ -1772,8 +1793,10 @@ static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock int32_t numOfLevels = taosArrayGetSize(pLevel); for (int32_t j = 0; j < numOfLevels; j++) { SStreamTask *pTask = taosArrayGetP(pLevel, j); - setTaskAttrInResBlock(pStream, pTask, pBlock, numOfRows); - numOfRows++; + int32_t code = setTaskAttrInResBlock(pStream, pTask, pBlock, numOfRows); + if (code == TSDB_CODE_SUCCESS) { + numOfRows++; + } } } @@ -1842,7 +1865,7 @@ int32_t mndPauseAllStreamTasks(SMnode *pMnode, STrans *pTrans, SStreamObj *pStre } if (atomic_load_8(&pTask->status.taskStatus) != TASK_STATUS__PAUSE) { - atomic_store_8(&pTask->status.keepTaskStatus, pTask->status.taskStatus); + atomic_store_8(&pTask->status.statusBackup, pTask->status.taskStatus); atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__PAUSE); } } @@ -1898,7 +1921,7 @@ static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq) { } // check if it is conflict with other trans in both sourceDb and targetDb. - bool conflict = streamTransConflictOtherTrans(pMnode, pStream->uid, MND_STREAM_PAUSE_NAME, true); + bool conflict = mndStreamTransConflictCheck(pMnode, pStream->uid, MND_STREAM_PAUSE_NAME, true); if (conflict) { sdbRelease(pMnode->pSdb, pStream); return -1; @@ -2000,7 +2023,7 @@ int32_t mndResumeAllStreamTasks(STrans *pTrans, SMnode *pMnode, SStreamObj *pStr } if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__PAUSE) { - atomic_store_8(&pTask->status.taskStatus, pTask->status.keepTaskStatus); + atomic_store_8(&pTask->status.taskStatus, pTask->status.statusBackup); } } } @@ -2041,7 +2064,7 @@ static int32_t mndProcessResumeStreamReq(SRpcMsg *pReq) { } // check if it is conflict with other trans in both sourceDb and targetDb. - bool conflict = streamTransConflictOtherTrans(pMnode, pStream->uid, MND_STREAM_RESUME_NAME, true); + bool conflict = mndStreamTransConflictCheck(pMnode, pStream->uid, MND_STREAM_RESUME_NAME, true); if (conflict) { sdbRelease(pMnode->pSdb, pStream); return -1; @@ -2347,7 +2370,7 @@ static int32_t mndProcessVgroupChange(SMnode *pMnode, SVgroupChangeInfo *pChange break; } - bool conflict = streamTransConflictOtherTrans(pMnode, pStream->uid, MND_STREAM_TASK_UPDATE_NAME, false); + bool conflict = mndStreamTransConflictCheck(pMnode, pStream->uid, MND_STREAM_TASK_UPDATE_NAME, false); sdbRelease(pSdb, pStream); if (conflict) { @@ -2571,7 +2594,7 @@ static void killAllCheckpointTrans(SMnode *pMnode, SVgroupChangeInfo *pChangeInf size_t len = 0; void * pKey = taosHashGetKey(pDb, &len); - killActiveCheckpointTrans(pMnode, pKey, len); + doKillCheckpointTrans(pMnode, pKey, len); } } @@ -2816,7 +2839,16 @@ int32_t createStreamResetStatusTrans(SMnode *pMnode, SStreamObj *pStream) { return TSDB_CODE_ACTION_IN_PROGRESS; } -int32_t killActiveCheckpointTrans(SMnode *pMnode, const char *pDBName, size_t len) { +void killTransImpl(SMnode* pMnode, int32_t transId, const char* pDbName) { + STrans *pTrans = mndAcquireTrans(pMnode, transId); + if (pTrans != NULL) { + mInfo("kill active transId:%d in Db:%s", transId, pDbName); + mndKillTrans(pMnode, pTrans); + mndReleaseTrans(pMnode, pTrans); + } +} + +int32_t doKillCheckpointTrans(SMnode *pMnode, const char *pDBName, size_t len) { // data in the hash table will be removed automatically, no need to remove it here. SStreamTransInfo *pTransInfo = taosHashGet(execInfo.transMgmt.pDBTrans, pDBName, len); if (pTransInfo == NULL) { @@ -2829,15 +2861,9 @@ int32_t killActiveCheckpointTrans(SMnode *pMnode, const char *pDBName, size_t le return TSDB_CODE_SUCCESS; } - STrans *pTrans = mndAcquireTrans(pMnode, pTransInfo->transId); - if (pTrans != NULL) { - char* pDupDBName = strndup(pDBName, len); - mInfo("kill checkpoint transId:%d in Db:%s", pTransInfo->transId, pDupDBName); - taosMemoryFree(pDupDBName); - - mndKillTrans(pMnode, pTrans); - mndReleaseTrans(pMnode, pTrans); - } + char* pDupDBName = strndup(pDBName, len); + killTransImpl(pMnode, pTransInfo->transId, pDupDBName); + taosMemoryFree(pDupDBName); return TSDB_CODE_SUCCESS; } @@ -2859,7 +2885,7 @@ static int32_t mndResetStatusFromCheckpoint(SMnode *pMnode, int64_t streamId, in code = TSDB_CODE_STREAM_TASK_NOT_EXIST; mError("failed to acquire the streamObj:0x%" PRIx64 " to reset checkpoint, may have been dropped", pStream->uid); } else { - bool conflict = streamTransConflictOtherTrans(pMnode, pStream->uid, MND_STREAM_TASK_RESET_NAME, false); + bool conflict = mndStreamTransConflictCheck(pMnode, pStream->uid, MND_STREAM_TASK_RESET_NAME, false); if (conflict) { mError("stream:%s other trans exists in DB:%s, dstTable:%s failed to start reset-status trans", pStream->name, pStream->sourceDb, pStream->targetSTbName); @@ -3094,19 +3120,6 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { if (p->status != TASK_STATUS__READY) { mDebug("received s-task:0x%" PRIx64 " not in ready status:%s", p->id.taskId, streamTaskGetStatusStr(p->status)); - -// if (p->status == TASK_STATUS__STREAM_SCAN_HISTORY) { -// bool drop = needDropRelatedFillhistoryTask(pTaskEntry, &execInfo); -// if (drop) { -// SStreamObj *pStreamObj = mndGetStreamObj(pMnode, pTaskEntry->id.streamId); -// if (pStreamObj == NULL) { -// mError("failed to acquire the streamObj:0x%" PRIx64 " it may have been dropped", pStreamObj->uid); -// } else { -// mndDropRelatedFillhistoryTask(pMnode, pTaskEntry, pStreamObj); -// mndReleaseStream(pMnode, pStreamObj); -// } -// } -// } } } diff --git a/source/dnode/mnode/impl/src/mndStreamTrans.c b/source/dnode/mnode/impl/src/mndStreamTrans.c index 8f94843500..847f55a8fd 100644 --- a/source/dnode/mnode/impl/src/mndStreamTrans.c +++ b/source/dnode/mnode/impl/src/mndStreamTrans.c @@ -65,7 +65,7 @@ int32_t clearFinishedTrans(SMnode* pMnode) { return 0; } -bool streamTransConflictOtherTrans(SMnode* pMnode, int64_t streamUid, const char* pTransName, bool lock) { +bool mndStreamTransConflictCheck(SMnode* pMnode, int64_t streamUid, const char* pTransName, bool lock) { if (lock) { taosThreadMutexLock(&execInfo.lock); } @@ -92,6 +92,7 @@ bool streamTransConflictOtherTrans(SMnode* pMnode, int64_t streamUid, const char if (strcmp(pTransName, MND_STREAM_DROP_NAME) != 0) { mWarn("conflict with other transId:%d streamUid:0x%" PRIx64 ", trans:%s", tInfo.transId, tInfo.streamUid, tInfo.name); + terrno = TSDB_CODE_MND_TRANS_CONFLICT; return true; } else { mDebug("not conflict with checkpoint trans, name:%s, continue create trans", pTransName); @@ -100,6 +101,7 @@ bool streamTransConflictOtherTrans(SMnode* pMnode, int64_t streamUid, const char (strcmp(tInfo.name, MND_STREAM_TASK_RESET_NAME) == 0)) { mWarn("conflict with other transId:%d streamUid:0x%" PRIx64 ", trans:%s", tInfo.transId, tInfo.streamUid, tInfo.name); + terrno = TSDB_CODE_MND_TRANS_CONFLICT; return true; } } else { @@ -113,6 +115,30 @@ bool streamTransConflictOtherTrans(SMnode* pMnode, int64_t streamUid, const char return false; } +int32_t mndStreamGetRelTrans(SMnode* pMnode, int64_t streamUid) { + taosThreadMutexLock(&execInfo.lock); + int32_t num = taosHashGetSize(execInfo.transMgmt.pDBTrans); + if (num <= 0) { + taosThreadMutexUnlock(&execInfo.lock); + return 0; + } + + clearFinishedTrans(pMnode); + SStreamTransInfo* pEntry = taosHashGet(execInfo.transMgmt.pDBTrans, &streamUid, sizeof(streamUid)); + if (pEntry != NULL) { + SStreamTransInfo tInfo = *pEntry; + taosThreadMutexUnlock(&execInfo.lock); + + if (strcmp(tInfo.name, MND_STREAM_CHECKPOINT_NAME) == 0 || strcmp(tInfo.name, MND_STREAM_TASK_UPDATE_NAME) == 0) { + return tInfo.transId; + } + } else { + taosThreadMutexUnlock(&execInfo.lock); + } + + return 0; +} + int32_t mndAddtoCheckpointWaitingList(SStreamObj* pStream, int64_t checkpointId) { SCheckpointCandEntry* pEntry = taosHashGet(execInfo.transMgmt.pWaitingList, &pStream->uid, sizeof(pStream->uid)); if (pEntry == NULL) { diff --git a/source/dnode/mnode/impl/src/mndSubscribe.c b/source/dnode/mnode/impl/src/mndSubscribe.c index 62b671a12f..0909003201 100644 --- a/source/dnode/mnode/impl/src/mndSubscribe.c +++ b/source/dnode/mnode/impl/src/mndSubscribe.c @@ -747,7 +747,7 @@ static int32_t mndCheckConsumer(SRpcMsg *pMsg, SHashObj* rebSubHash) { int32_t hbStatus = atomic_add_fetch_32(&pConsumer->hbStatus, 1); int32_t status = atomic_load_32(&pConsumer->status); - mInfo("check for consumer:0x%" PRIx64 " status:%d(%s), sub-time:%" PRId64 ", createTime:%" PRId64 ", hbstatus:%d", + mDebug("check for consumer:0x%" PRIx64 " status:%d(%s), sub-time:%" PRId64 ", createTime:%" PRId64 ", hbstatus:%d", pConsumer->consumerId, status, mndConsumerStatusName(status), pConsumer->subscribeTime, pConsumer->createTime, hbStatus); diff --git a/source/dnode/mnode/impl/test/trans/trans2.cpp b/source/dnode/mnode/impl/test/trans/trans2.cpp index 2d03631a37..4d0d53ced0 100644 --- a/source/dnode/mnode/impl/test/trans/trans2.cpp +++ b/source/dnode/mnode/impl/test/trans/trans2.cpp @@ -61,6 +61,7 @@ class MndTestTrans2 : public ::testing::Test { static SMsgCb msgCb = {0}; msgCb.reportStartupFp = reportStartup; msgCb.sendReqFp = sendReq; + msgCb.sendSyncReqFp = sendSyncReq; msgCb.sendRspFp = sendRsp; msgCb.queueFps[SYNC_QUEUE] = putToQueue; msgCb.queueFps[WRITE_QUEUE] = putToQueue; diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index 9177559f61..4815dfe65c 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -91,9 +91,7 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t nextProcessVer pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer); } - char *p = NULL; - streamTaskGetStatus(pTask, &p); - + char* p = streamTaskGetStatus(pTask)->name; if (pTask->info.fillHistory) { sndInfo("vgId:%d expand stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 " nextProcessVer:%" PRId64 @@ -147,7 +145,8 @@ FAIL: } int32_t sndInit(SSnode *pSnode) { - tqStreamTaskResetStatus(pSnode->pMeta); + int32_t numOfTasks = 0; + tqStreamTaskResetStatus(pSnode->pMeta, &numOfTasks); streamMetaStartAllTasks(pSnode->pMeta); return 0; } @@ -195,7 +194,7 @@ int32_t sndProcessWriteMsg(SSnode *pSnode, SRpcMsg *pMsg, SRpcMsg *pRsp) { case TDMT_STREAM_TASK_DEPLOY: { void * pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); int32_t len = pMsg->contLen - sizeof(SMsgHead); - return tqStreamTaskProcessDeployReq(pSnode->pMeta, pMsg->info.conn.applyIndex, pReq, len, true, true); + return tqStreamTaskProcessDeployReq(pSnode->pMeta, &pSnode->msgCb,pMsg->info.conn.applyIndex, pReq, len, true, true); } case TDMT_STREAM_TASK_DROP: diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index 0ef29fcb3a..435b12bc06 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -107,6 +107,7 @@ struct STQ { TTB* pExecStore; TTB* pCheckStore; SStreamMeta* pStreamMeta; + void* tqTimer; }; int32_t tEncodeSTqHandle(SEncoder* pEncoder, const STqHandle* pHandle); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 275bbffe25..a53b322753 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -19,13 +19,25 @@ // 0: not init // 1: already inited -// 2: wait to be inited or cleaup +// 2: wait to be inited or cleanup static int32_t tqInitialize(STQ* pTq); static FORCE_INLINE bool tqIsHandleExec(STqHandle* pHandle) { return TMQ_HANDLE_STATUS_EXEC == pHandle->status; } static FORCE_INLINE void tqSetHandleExec(STqHandle* pHandle) { pHandle->status = TMQ_HANDLE_STATUS_EXEC; } static FORCE_INLINE void tqSetHandleIdle(STqHandle* pHandle) { pHandle->status = TMQ_HANDLE_STATUS_IDLE; } +static int32_t tqTimerInit(STQ* pTq) { + pTq->tqTimer = taosTmrInit(100, 100, 1000, "TQ"); + if (pTq->tqTimer == NULL) { + return -1; + } + return 0; +} + +static void tqTimerCleanUp(STQ* pTq) { + taosTmrCleanUp(pTq->tqTimer); +} + void tqDestroyTqHandle(void* data) { STqHandle* pData = (STqHandle*)data; qDestroyTask(pData->execHandle.task); @@ -106,6 +118,7 @@ int32_t tqInitialize(STQ* pTq) { return -1; } + tqTimerInit(pTq); return 0; } @@ -136,6 +149,8 @@ void tqClose(STQ* pTq) { taosMemoryFree(pTq->path); tqMetaClose(pTq); streamMetaClose(pTq->pStreamMeta); + tqTimerCleanUp(pTq); + qDebug("end to close tq"); taosMemoryFree(pTq); } @@ -842,8 +857,7 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t nextProcessVer) { pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer); } - char* p = NULL; - streamTaskGetStatus(pTask, &p); + char* p = streamTaskGetStatus(pTask)->name; if (pTask->info.fillHistory) { tqInfo("vgId:%d expand stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 @@ -873,7 +887,8 @@ int32_t tqProcessTaskCheckRsp(STQ* pTq, SRpcMsg* pMsg) { } int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { - return tqStreamTaskProcessDeployReq(pTq->pStreamMeta, sversion, msg, msgLen, vnodeIsRoleLeader(pTq->pVnode), pTq->pVnode->restored); + return tqStreamTaskProcessDeployReq(pTq->pStreamMeta, &pTq->pVnode->msgCb, sversion, msg, msgLen, + vnodeIsRoleLeader(pTq->pVnode), pTq->pVnode->restored); } static void doStartFillhistoryStep2(SStreamTask* pTask, SStreamTask* pStreamTask, STQ* pTq) { @@ -932,8 +947,7 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { // do recovery step1 const char* id = pTask->id.idStr; - char* pStatus = NULL; - streamTaskGetStatus(pTask, &pStatus); + char* pStatus = streamTaskGetStatus(pTask)->name; // avoid multi-thread exec while (1) { @@ -949,7 +963,6 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { // let's decide which step should be executed now if (pTask->execInfo.step1Start == 0) { int64_t ts = taosGetTimestampMs(); - pTask->execInfo.step1Start = ts; tqDebug("s-task:%s start scan-history stage(step 1), status:%s, step1 startTs:%" PRId64, id, pStatus, ts); } else { @@ -990,15 +1003,15 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { if (retInfo.ret == TASK_SCANHISTORY_REXEC) { streamReExecScanHistoryFuture(pTask, retInfo.idleTime); } else { - char* p = NULL; - ETaskStatus s = streamTaskGetStatus(pTask, &p); + SStreamTaskState* p = streamTaskGetStatus(pTask); + ETaskStatus s = p->state; if (s == TASK_STATUS__PAUSE) { tqDebug("s-task:%s is paused in the step1, elapsed time:%.2fs total:%.2fs, sched-status:%d", pTask->id.idStr, el, pTask->execInfo.step1El, status); } else if (s == TASK_STATUS__STOP || s == TASK_STATUS__DROPPING) { - tqDebug("s-task:%s status:%p not continue scan-history data, total elapsed time:%.2fs quit", pTask->id.idStr, p, - pTask->execInfo.step1El); + tqDebug("s-task:%s status:%p not continue scan-history data, total elapsed time:%.2fs quit", pTask->id.idStr, + p->name, pTask->execInfo.step1El); } } @@ -1038,15 +1051,6 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { streamMetaReleaseTask(pMeta, pStreamTask); } else { ASSERT(0); -// STimeWindow* pWindow = &pTask->dataRange.window; -// ASSERT(HAS_RELATED_FILLHISTORY_TASK(pTask) || streamTaskShouldStop(pTask)); -// -// // Not update the fill-history time window until the state transfer is completed. -// tqDebug("s-task:%s scan-history in stream time window completed, start to handle data from WAL, startVer:%" PRId64 -// ", window:%" PRId64 " - %" PRId64, -// id, pTask->chkInfo.nextProcessVer, pWindow->skey, pWindow->ekey); -// -// code = streamTaskScanHistoryDataComplete(pTask); } atomic_store_32(&pTask->status.inScanHistorySentinel, 0); @@ -1066,15 +1070,16 @@ int32_t tqProcessTaskScanHistoryFinishRsp(STQ* pTq, SRpcMsg* pMsg) { int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) { SStreamTaskRunReq* pReq = pMsg->pCont; - int32_t taskId = pReq->taskId; - - if (taskId == STREAM_EXEC_EXTRACT_DATA_IN_WAL_ID) { // all tasks are extracted submit data from the wal + // extracted submit data from wal files for all tasks + if (pReq->reqType == STREAM_EXEC_T_EXTRACT_WAL_DATA) { tqScanWal(pTq); return 0; } int32_t code = tqStreamTaskProcessRunReq(pTq->pStreamMeta, pMsg, vnodeIsRoleLeader(pTq->pVnode)); - if(code == 0 && taskId > 0){ + + // let's continue scan data in the wal files + if(code == 0 && pReq->reqType >= 0){ tqScanWalAsync(pTq, false); } @@ -1178,7 +1183,7 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) // todo save the checkpoint failed info taosThreadMutexLock(&pTask->lock); - ETaskStatus status = streamTaskGetStatus(pTask, NULL); + ETaskStatus status = streamTaskGetStatus(pTask)->state; if (status == TASK_STATUS__HALT || status == TASK_STATUS__PAUSE) { tqError("s-task:%s not ready for checkpoint, since it is halt, ignore this checkpoint:%" PRId64 ", set it failure", @@ -1265,14 +1270,8 @@ int32_t tqProcessTaskDropHTask(STQ* pTq, SRpcMsg* pMsg) { } taosThreadMutexLock(&pTask->lock); - ETaskStatus status = streamTaskGetStatus(pTask, NULL); -// if (status == TASK_STATUS__STREAM_SCAN_HISTORY) { -// streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_SCANHIST_DONE); -// } - SStreamTaskId id = {.streamId = pTask->hTaskInfo.id.streamId, .taskId = pTask->hTaskInfo.id.taskId}; streamBuildAndSendDropTaskMsg(pTask->pMsgCb, pMeta->vgId, &id); - taosThreadMutexUnlock(&pTask->lock); // clear the scheduler status diff --git a/source/dnode/vnode/src/tq/tqStreamTask.c b/source/dnode/vnode/src/tq/tqStreamTask.c index aba3b0945f..2a600d08ef 100644 --- a/source/dnode/vnode/src/tq/tqStreamTask.c +++ b/source/dnode/vnode/src/tq/tqStreamTask.c @@ -24,6 +24,7 @@ static int32_t setWalReaderStartOffset(SStreamTask* pTask, int32_t vgId); static bool handleFillhistoryScanComplete(SStreamTask* pTask, int64_t ver); static bool taskReadyForDataFromWal(SStreamTask* pTask); static bool doPutDataIntoInputQFromWal(SStreamTask* pTask, int64_t maxVer, int32_t* numOfItems); +static int32_t tqScanWalInFuture(STQ* pTq, int32_t numOfTasks, int32_t idleDuration); // extract data blocks(submit/delete) from WAL, and add them into the input queue for all the sources tasks. int32_t tqScanWal(STQ* pTq) { @@ -31,35 +32,78 @@ int32_t tqScanWal(STQ* pTq) { SStreamMeta* pMeta = pTq->pStreamMeta; int64_t st = taosGetTimestampMs(); - while (1) { - int32_t scan = pMeta->walScanCounter; - tqDebug("vgId:%d continue check if data in wal are available, walScanCounter:%d", vgId, scan); + tqDebug("vgId:%d continue to check if data in wal are available, scanCounter:%d", vgId, pMeta->scanInfo.scanCounter); - // check all tasks - bool shouldIdle = true; - doScanWalForAllTasks(pTq->pStreamMeta, &shouldIdle); + // check all tasks + int32_t numOfTasks = 0; + bool shouldIdle = true; + doScanWalForAllTasks(pMeta, &shouldIdle); -// if (shouldIdle) { - streamMetaWLock(pMeta); - int32_t times = (--pMeta->walScanCounter); - ASSERT(pMeta->walScanCounter >= 0); - streamMetaWUnLock(pMeta); + streamMetaWLock(pMeta); + int32_t times = (--pMeta->scanInfo.scanCounter); + ASSERT(pMeta->scanInfo.scanCounter >= 0); - if (times <= 0) { - break; - } else { - tqDebug("vgId:%d scan wal for stream tasks for %d times in %dms", vgId, times, SCAN_WAL_IDLE_DURATION); - } -// } - - taosMsleep(SCAN_WAL_IDLE_DURATION); - } + numOfTasks = taosArrayGetSize(pMeta->pTaskList); + streamMetaWUnLock(pMeta); int64_t el = (taosGetTimestampMs() - st); tqDebug("vgId:%d scan wal for stream tasks completed, elapsed time:%" PRId64 " ms", vgId, el); + + if (times > 0) { + tqDebug("vgId:%d scan wal for stream tasks for %d times in %dms", vgId, times, SCAN_WAL_IDLE_DURATION); + tqScanWalInFuture(pTq, numOfTasks, SCAN_WAL_IDLE_DURATION); + } return 0; } +typedef struct SBuildScanWalMsgParam { + STQ* pTq; + int32_t numOfTasks; +} SBuildScanWalMsgParam; + +static void doStartScanWal(void* param, void* tmrId) { + SBuildScanWalMsgParam* pParam = (SBuildScanWalMsgParam*) param; + + int32_t vgId = pParam->pTq->pStreamMeta->vgId; + + SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq)); + if (pRunReq == NULL) { + taosMemoryFree(pParam); + terrno = TSDB_CODE_OUT_OF_MEMORY; + tqError("vgId:%d failed to create msg to start wal scanning to launch stream tasks, code:%s", vgId, terrstr()); + return; + } + + tqDebug("vgId:%d create msg to start wal scan, numOfTasks:%d, vnd restored:%d", vgId, pParam->numOfTasks, + pParam->pTq->pVnode->restored); + + pRunReq->head.vgId = vgId; + pRunReq->streamId = 0; + pRunReq->taskId = 0; + pRunReq->reqType = STREAM_EXEC_T_EXTRACT_WAL_DATA; + + SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)}; + tmsgPutToQueue(&pParam->pTq->pVnode->msgCb, STREAM_QUEUE, &msg); + + taosMemoryFree(pParam); +} + +int32_t tqScanWalInFuture(STQ* pTq, int32_t numOfTasks, int32_t idleDuration) { + SStreamMeta* pMeta = pTq->pStreamMeta; + + SBuildScanWalMsgParam* pParam = taosMemoryMalloc(sizeof(SBuildScanWalMsgParam)); + + pParam->pTq = pTq; + pParam->numOfTasks = numOfTasks; + if (pMeta->scanInfo.scanTimer == NULL) { + pMeta->scanInfo.scanTimer = taosTmrStart(doStartScanWal, idleDuration, pParam, pTq->tqTimer); + } else { + taosTmrReset(doStartScanWal, idleDuration, pParam, pTq->tqTimer, &pMeta->scanInfo.scanTimer); + } + + return TSDB_CODE_SUCCESS; +} + int32_t tqScanWalAsync(STQ* pTq, bool ckPause) { int32_t vgId = TD_VID(pTq->pVnode); SStreamMeta* pMeta = pTq->pStreamMeta; @@ -80,23 +124,23 @@ int32_t tqScanWalAsync(STQ* pTq, bool ckPause) { return 0; } - pMeta->walScanCounter += 1; - if (pMeta->walScanCounter > MAX_REPEAT_SCAN_THRESHOLD) { - pMeta->walScanCounter = MAX_REPEAT_SCAN_THRESHOLD; + pMeta->scanInfo.scanCounter += 1; + if (pMeta->scanInfo.scanCounter > MAX_REPEAT_SCAN_THRESHOLD) { + pMeta->scanInfo.scanCounter = MAX_REPEAT_SCAN_THRESHOLD; } - if (pMeta->walScanCounter > 1) { - tqDebug("vgId:%d wal read task has been launched, remain scan times:%d", vgId, pMeta->walScanCounter); + if (pMeta->scanInfo.scanCounter > 1) { + tqDebug("vgId:%d wal read task has been launched, remain scan times:%d", vgId, pMeta->scanInfo.scanCounter); streamMetaWUnLock(pMeta); return 0; } - int32_t numOfPauseTasks = pTq->pStreamMeta->numOfPausedTasks; + int32_t numOfPauseTasks = pMeta->numOfPausedTasks; if (ckPause && numOfTasks == numOfPauseTasks) { tqDebug("vgId:%d ignore all submit, all streams had been paused, reset the walScanCounter", vgId); // reset the counter value, since we do not launch the scan wal operation. - pMeta->walScanCounter = 0; + pMeta->scanInfo.scanCounter = 0; streamMetaWUnLock(pMeta); return 0; } @@ -114,7 +158,8 @@ int32_t tqScanWalAsync(STQ* pTq, bool ckPause) { pRunReq->head.vgId = vgId; pRunReq->streamId = 0; - pRunReq->taskId = STREAM_EXEC_EXTRACT_DATA_IN_WAL_ID; + pRunReq->taskId = 0; + pRunReq->reqType = STREAM_EXEC_T_EXTRACT_WAL_DATA; SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)}; tmsgPutToQueue(&pTq->pVnode->msgCb, STREAM_QUEUE, &msg); @@ -138,7 +183,8 @@ int32_t tqStopStreamTasksAsync(STQ* pTq) { pRunReq->head.vgId = vgId; pRunReq->streamId = 0; - pRunReq->taskId = STREAM_EXEC_STOP_ALL_TASKS_ID; + pRunReq->taskId = 0; + pRunReq->reqType = STREAM_EXEC_T_STOP_ALL_TASKS; SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)}; tmsgPutToQueue(&pTq->pVnode->msgCb, STREAM_QUEUE, &msg); @@ -220,16 +266,15 @@ bool taskReadyForDataFromWal(SStreamTask* pTask) { } // not in ready state, do not handle the data from wal - char* p = NULL; - int32_t status = streamTaskGetStatus(pTask, &p); - if (streamTaskGetStatus(pTask, &p) != TASK_STATUS__READY) { - tqTrace("s-task:%s not ready for submit block in wal, status:%s", pTask->id.idStr, p); + SStreamTaskState* pState = streamTaskGetStatus(pTask); + if (pState->state != TASK_STATUS__READY) { + tqTrace("s-task:%s not ready for submit block in wal, status:%s", pTask->id.idStr, pState->name); return false; } // fill-history task has entered into the last phase, no need to anything if ((pTask->info.fillHistory == 1) && pTask->status.appendTranstateBlock) { - ASSERT(status == TASK_STATUS__READY); + ASSERT(pState->state == TASK_STATUS__READY); // the maximum version of data in the WAL has reached already, the step2 is done tqDebug("s-task:%s fill-history reach the maximum ver:%" PRId64 ", not scan wal anymore", pTask->id.idStr, pTask->dataRange.range.maxVer); @@ -342,10 +387,9 @@ int32_t doScanWalForAllTasks(SStreamMeta* pStreamMeta, bool* pScanIdle) { taosThreadMutexLock(&pTask->lock); - char* p = NULL; - ETaskStatus status = streamTaskGetStatus(pTask, &p); - if (status != TASK_STATUS__READY) { - tqDebug("s-task:%s not ready for submit block from wal, status:%s", pTask->id.idStr, p); + SStreamTaskState* pState = streamTaskGetStatus(pTask); + if (pState->state != TASK_STATUS__READY) { + tqDebug("s-task:%s not ready for submit block from wal, status:%s", pTask->id.idStr, pState->name); taosThreadMutexUnlock(&pTask->lock); streamMetaReleaseTask(pStreamMeta, pTask); continue; diff --git a/source/dnode/vnode/src/tqCommon/tqCommon.c b/source/dnode/vnode/src/tqCommon/tqCommon.c index 399788e731..7883c858f0 100644 --- a/source/dnode/vnode/src/tqCommon/tqCommon.c +++ b/source/dnode/vnode/src/tqCommon/tqCommon.c @@ -25,7 +25,6 @@ typedef struct STaskUpdateEntry { int32_t tqStreamTaskStartAsync(SStreamMeta* pMeta, SMsgCb* cb, bool restart) { int32_t vgId = pMeta->vgId; - int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); if (numOfTasks == 0) { tqDebug("vgId:%d no stream tasks existed to run", vgId); @@ -42,7 +41,62 @@ int32_t tqStreamTaskStartAsync(SStreamMeta* pMeta, SMsgCb* cb, bool restart) { tqDebug("vgId:%d start all %d stream task(s) async", vgId, numOfTasks); pRunReq->head.vgId = vgId; pRunReq->streamId = 0; - pRunReq->taskId = restart ? STREAM_EXEC_RESTART_ALL_TASKS_ID : STREAM_EXEC_START_ALL_TASKS_ID; + pRunReq->taskId = 0; + pRunReq->reqType = restart ? STREAM_EXEC_T_RESTART_ALL_TASKS : STREAM_EXEC_T_START_ALL_TASKS; + + SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)}; + tmsgPutToQueue(cb, STREAM_QUEUE, &msg); + return 0; +} + +int32_t tqStreamOneTaskStartAsync(SStreamMeta* pMeta, SMsgCb* cb, int64_t streamId, int32_t taskId) { + int32_t vgId = pMeta->vgId; + + int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); + if (numOfTasks == 0) { + tqDebug("vgId:%d no stream tasks existed to run", vgId); + return 0; + } + + SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq)); + if (pRunReq == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + tqError("vgId:%d failed to create msg to start task:0x%x, code:%s", vgId, taskId, terrstr()); + return -1; + } + + tqDebug("vgId:%d start task:0x%x async", vgId, taskId); + pRunReq->head.vgId = vgId; + pRunReq->streamId = streamId; + pRunReq->taskId = taskId; + pRunReq->reqType = STREAM_EXEC_T_START_ONE_TASK; + + SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)}; + tmsgPutToQueue(cb, STREAM_QUEUE, &msg); + return 0; +} + +int32_t tqUpdateNodeEpsetAsync(SStreamMeta* pMeta, SMsgCb* cb, int64_t streamId, int32_t taskId) { + int32_t vgId = pMeta->vgId; + + int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); + if (numOfTasks == 0) { + tqDebug("vgId:%d no stream tasks existed to run", vgId); + return 0; + } + + SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq)); + if (pRunReq == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + tqError("vgId:%d failed to create msg to start task:0x%x, code:%s", vgId, taskId, terrstr()); + return -1; + } + + tqDebug("vgId:%d update s-task:0x%x nodeEpset async", vgId, taskId); + pRunReq->head.vgId = vgId; + pRunReq->streamId = streamId; + pRunReq->taskId = taskId; + pRunReq->reqType = STREAM_EXEC_T_UPDATE_TASK_EPSET; SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)}; tmsgPutToQueue(cb, STREAM_QUEUE, &msg); @@ -54,6 +108,7 @@ int32_t tqStreamTaskProcessUpdateReq(SStreamMeta* pMeta, SMsgCb* cb, SRpcMsg* pM char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); int32_t len = pMsg->contLen - sizeof(SMsgHead); SRpcMsg rsp = {.info = pMsg->info, .code = TSDB_CODE_SUCCESS}; + int64_t st = taosGetTimestampMs(); SStreamTaskNodeUpdateMsg req = {0}; @@ -75,7 +130,7 @@ int32_t tqStreamTaskProcessUpdateReq(SStreamMeta* pMeta, SMsgCb* cb, SRpcMsg* pM STaskId id = {.streamId = req.streamId, .taskId = req.taskId}; SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); if (ppTask == NULL || *ppTask == NULL) { - tqError("vgId:%d failed to acquire task:0x%x when handling update, it may have been dropped already", pMeta->vgId, + tqError("vgId:%d failed to acquire task:0x%x when handling update, it may have been dropped already", vgId, req.taskId); rsp.code = TSDB_CODE_SUCCESS; streamMetaWUnLock(pMeta); @@ -85,43 +140,38 @@ int32_t tqStreamTaskProcessUpdateReq(SStreamMeta* pMeta, SMsgCb* cb, SRpcMsg* pM } SStreamTask* pTask = *ppTask; + const char* idstr = pTask->id.idStr; if (pMeta->updateInfo.transId != req.transId) { pMeta->updateInfo.transId = req.transId; - tqInfo("s-task:%s receive new trans to update nodeEp msg from mnode, transId:%d", pTask->id.idStr, req.transId); + tqInfo("s-task:%s receive new trans to update nodeEp msg from mnode, transId:%d", idstr, req.transId); // info needs to be kept till the new trans to update the nodeEp arrived. taosHashClear(pMeta->updateInfo.pTasks); } else { - tqDebug("s-task:%s recv trans to update nodeEp from mnode, transId:%d", pTask->id.idStr, req.transId); + tqDebug("s-task:%s recv trans to update nodeEp from mnode, transId:%d", idstr, req.transId); } STaskUpdateEntry entry = {.streamId = req.streamId, .taskId = req.taskId, .transId = req.transId}; - void* exist = taosHashGet(pMeta->updateInfo.pTasks, &entry, sizeof(STaskUpdateEntry)); - if (exist != NULL) { - tqDebug("s-task:%s (vgId:%d) already update in trans:%d, discard the nodeEp update msg", pTask->id.idStr, vgId, - req.transId); + + void* pReqTask = taosHashGet(pMeta->updateInfo.pTasks, &entry, sizeof(STaskUpdateEntry)); + if (pReqTask != NULL) { + tqDebug("s-task:%s (vgId:%d) already update in trans:%d, discard the nodeEp update msg", idstr, vgId, req.transId); rsp.code = TSDB_CODE_SUCCESS; streamMetaWUnLock(pMeta); taosArrayDestroy(req.pNodeList); return rsp.code; } -// streamMetaWUnLock(pMeta); - - // todo for test purpose // the following two functions should not be executed within the scope of meta lock to avoid deadlock streamTaskUpdateEpsetInfo(pTask, req.pNodeList); streamTaskResetStatus(pTask); - // continue after lock the meta again -// streamMetaWLock(pMeta); - SStreamTask** ppHTask = NULL; if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { ppHTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &pTask->hTaskInfo.id, sizeof(pTask->hTaskInfo.id)); if (ppHTask == NULL || *ppHTask == NULL) { tqError("vgId:%d failed to acquire fill-history task:0x%x when handling update, it may have been dropped already", - pMeta->vgId, req.taskId); + vgId, req.taskId); CLEAR_RELATED_FILLHISTORY_TASK(pTask); } else { tqDebug("s-task:%s fill-history task update nodeEp along with stream task", (*ppHTask)->id.idStr); @@ -129,17 +179,23 @@ int32_t tqStreamTaskProcessUpdateReq(SStreamMeta* pMeta, SMsgCb* cb, SRpcMsg* pM } } - { + if (restored) { + tqDebug("s-task:%s vgId:%d start to save task", idstr, vgId); streamMetaSaveTask(pMeta, pTask); if (ppHTask != NULL) { streamMetaSaveTask(pMeta, *ppHTask); } +#if 0 if (streamMetaCommit(pMeta) < 0) { // persist to disk } +#endif + } else { + tqDebug("s-task:%s vgId:%d not save since restore not finish", idstr, vgId); } + tqDebug("s-task:%s vgId:%d start to stop task after save task", idstr, vgId); streamTaskStop(pTask); // keep the already handled info @@ -147,10 +203,15 @@ int32_t tqStreamTaskProcessUpdateReq(SStreamMeta* pMeta, SMsgCb* cb, SRpcMsg* pM if (ppHTask != NULL) { streamTaskStop(*ppHTask); - tqDebug("s-task:%s task nodeEp update completed, streamTask and related fill-history task closed", pTask->id.idStr); + + int64_t now = taosGetTimestampMs(); + tqDebug("s-task:%s vgId:%d task nodeEp update completed, streamTask/fill-history closed, elapsed:%" PRId64 " ms", + idstr, vgId, now - st); taosHashPut(pMeta->updateInfo.pTasks, &(*ppHTask)->id, sizeof(pTask->id), NULL, 0); } else { - tqDebug("s-task:%s task nodeEp update completed, streamTask closed", pTask->id.idStr); + int64_t now = taosGetTimestampMs(); + tqDebug("s-task:%s vgId:%d, task nodeEp update completed, streamTask closed, elapsed time:%" PRId64 "ms", idstr, + vgId, now - st); } rsp.code = 0; @@ -171,11 +232,12 @@ int32_t tqStreamTaskProcessUpdateReq(SStreamMeta* pMeta, SMsgCb* cb, SRpcMsg* pM pMeta->startInfo.tasksWillRestart = 0; streamMetaWUnLock(pMeta); } else { - tqDebug("vgId:%d all %d task(s) nodeEp updated and closed", vgId, numOfTasks); + tqDebug("vgId:%d all %d task(s) nodeEp updated and closed, transId:%d", vgId, numOfTasks, req.transId); #if 0 // for test purpose, to trigger the leader election taosMSleep(5000); #endif + tqStreamTaskStartAsync(pMeta, cb, true); streamMetaWUnLock(pMeta); } @@ -399,11 +461,11 @@ int32_t tqStreamTaskProcessCheckReq(SStreamMeta* pMeta, SRpcMsg* pMsg) { rsp.status = streamTaskCheckStatus(pTask, req.upstreamTaskId, req.upstreamNodeId, req.stage, &rsp.oldStage); streamMetaReleaseTask(pMeta, pTask); - char* p = NULL; - streamTaskGetStatus(pTask, &p); + SStreamTaskState* pState = streamTaskGetStatus(pTask); tqDebug("s-task:%s status:%s, stage:%" PRId64 " recv task check req(reqId:0x%" PRIx64 ") task:0x%x (vgId:%d), check_status:%d", - pTask->id.idStr, p, rsp.oldStage, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); + pTask->id.idStr, pState->name, rsp.oldStage, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, + rsp.status); } else { rsp.status = TASK_DOWNSTREAM_NOT_READY; tqDebug("tq recv task check(taskId:0x%" PRIx64 "-0x%x not built yet) req(reqId:0x%" PRIx64 @@ -415,12 +477,22 @@ int32_t tqStreamTaskProcessCheckReq(SStreamMeta* pMeta, SRpcMsg* pMsg) { return streamSendCheckRsp(pMeta, &req, &rsp, &pMsg->info, taskId); } +static void setParam(SStreamTask* pTask, int64_t* initTs, bool* hasHTask, STaskId* pId) { + *initTs = pTask->execInfo.init; + + if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { + *hasHTask = true; + pId->streamId = pTask->hTaskInfo.id.streamId; + pId->taskId = pTask->hTaskInfo.id.taskId; + } +} + int32_t tqStreamTaskProcessCheckRsp(SStreamMeta* pMeta, SRpcMsg* pMsg, bool isLeader) { char* pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); int32_t len = pMsg->contLen - sizeof(SMsgHead); int32_t vgId = pMeta->vgId; + int32_t code = TSDB_CODE_SUCCESS; - int32_t code; SStreamTaskCheckRsp rsp; SDecoder decoder; @@ -437,20 +509,62 @@ int32_t tqStreamTaskProcessCheckRsp(SStreamMeta* pMeta, SRpcMsg* pMsg, bool isLe tqDebug("tq task:0x%x (vgId:%d) recv check rsp(reqId:0x%" PRIx64 ") from 0x%x (vgId:%d) status %d", rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.status); + int64_t initTs = 0; + int64_t now = taosGetTimestampMs(); + STaskId id = {.streamId = rsp.streamId, .taskId = rsp.upstreamTaskId}; + STaskId fId = {0}; + bool hasHistoryTask = false; + + // todo extract method if (!isLeader) { - streamMetaUpdateTaskDownstreamStatus(pMeta, rsp.streamId, rsp.upstreamTaskId, 0, taosGetTimestampMs(), false); - tqError("vgId:%d not leader, task:0x%x not handle the check rsp, downstream:0x%x (vgId:%d)", vgId, - rsp.upstreamTaskId, rsp.downstreamTaskId, rsp.downstreamNodeId); + // this task may have been stopped, so acquire task may failed. Retrieve it directly from the task hash map. + streamMetaRLock(pMeta); + + SStreamTask** ppTask = taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); + if (ppTask != NULL) { + setParam(*ppTask, &initTs, &hasHistoryTask, &fId); + streamMetaRUnLock(pMeta); + + if (hasHistoryTask) { + streamMetaUpdateTaskDownstreamStatus(pMeta, fId.streamId, fId.taskId, initTs, now, false); + } + + tqError("vgId:%d not leader, task:0x%x not handle the check rsp, downstream:0x%x (vgId:%d)", vgId, + rsp.upstreamTaskId, rsp.downstreamTaskId, rsp.downstreamNodeId); + } else { + streamMetaRUnLock(pMeta); + + tqError("tq failed to locate the stream task:0x%" PRIx64 "-0x%x (vgId:%d), it may have been destroyed or stopped", + rsp.streamId, rsp.upstreamTaskId, vgId); + code = terrno = TSDB_CODE_STREAM_TASK_NOT_EXIST; + } + + streamMetaUpdateTaskDownstreamStatus(pMeta, rsp.streamId, rsp.upstreamTaskId, initTs, now, false); return code; } SStreamTask* pTask = streamMetaAcquireTask(pMeta, rsp.streamId, rsp.upstreamTaskId); if (pTask == NULL) { - streamMetaUpdateTaskDownstreamStatus(pMeta, rsp.streamId, rsp.upstreamTaskId, 0, taosGetTimestampMs(), false); + streamMetaRLock(pMeta); + + SStreamTask** ppTask = taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); + if (ppTask != NULL) { + setParam(*ppTask, &initTs, &hasHistoryTask, &fId); + streamMetaRUnLock(pMeta); + + if (hasHistoryTask) { + streamMetaUpdateTaskDownstreamStatus(pMeta, fId.streamId, fId.taskId, initTs, now, false); + } + } else { + streamMetaRUnLock(pMeta); + } + + streamMetaUpdateTaskDownstreamStatus(pMeta, rsp.streamId, rsp.upstreamTaskId, initTs, now, false); tqError("tq failed to locate the stream task:0x%" PRIx64 "-0x%x (vgId:%d), it may have been destroyed or stopped", rsp.streamId, rsp.upstreamTaskId, vgId); - terrno = TSDB_CODE_STREAM_TASK_NOT_EXIST; - return -1; + + code = terrno = TSDB_CODE_STREAM_TASK_NOT_EXIST; + return code; } code = streamProcessCheckRsp(pTask, &rsp); @@ -489,8 +603,8 @@ int32_t tqStreamTaskProcessCheckpointReadyMsg(SStreamMeta* pMeta, SRpcMsg* pMsg) return code; } -int32_t tqStreamTaskProcessDeployReq(SStreamMeta* pMeta, int64_t sversion, char* msg, int32_t msgLen, bool isLeader, - bool restored) { +int32_t tqStreamTaskProcessDeployReq(SStreamMeta* pMeta, SMsgCb* cb, int64_t sversion, char* msg, int32_t msgLen, + bool isLeader, bool restored) { int32_t code = 0; int32_t vgId = pMeta->vgId; @@ -542,18 +656,19 @@ int32_t tqStreamTaskProcessDeployReq(SStreamMeta* pMeta, int64_t sversion, char* // only handled in the leader node if (isLeader) { tqDebug("vgId:%d s-task:0x%x is deployed and add into meta, numOfTasks:%d", vgId, taskId, numOfTasks); - SStreamTask* p = streamMetaAcquireTask(pMeta, streamId, taskId); - if (p != NULL && restored && p->info.fillHistory == 0) { - EStreamTaskEvent event = TASK_EVENT_INIT; - streamTaskHandleEvent(p->status.pSM, event); - } else if (!restored) { - tqWarn("s-task:%s not launched since vnode(vgId:%d) not ready", p->id.idStr, vgId); + if (restored) { + SStreamTask* p = streamMetaAcquireTask(pMeta, streamId, taskId); + if (p != NULL && (p->info.fillHistory == 0)) { + tqStreamOneTaskStartAsync(pMeta, cb, streamId, taskId); + } + if (p != NULL) { + streamMetaReleaseTask(pMeta, p); + } + } else { + tqWarn("s-task:0x%x not launched since vnode(vgId:%d) not ready", taskId, vgId); } - if (p != NULL) { - streamMetaReleaseTask(pMeta, p); - } } else { tqDebug("vgId:%d not leader, not launch stream task s-task:0x%x", vgId, taskId); } @@ -598,21 +713,25 @@ int32_t tqStreamTaskProcessDropReq(SStreamMeta* pMeta, char* msg, int32_t msgLen return 0; } -int32_t tqStreamTaskResetStatus(SStreamMeta* pMeta) { +int32_t tqStreamTaskResetStatus(SStreamMeta* pMeta, int32_t* numOfTasks) { int32_t vgId = pMeta->vgId; - int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); + *numOfTasks = taosArrayGetSize(pMeta->pTaskList); - tqDebug("vgId:%d reset all %d stream task(s) status to be uninit", vgId, numOfTasks); - if (numOfTasks == 0) { + tqDebug("vgId:%d reset all %d stream task(s) status to be uninit", vgId, *numOfTasks); + if (*numOfTasks == 0) { return TSDB_CODE_SUCCESS; } - for (int32_t i = 0; i < numOfTasks; ++i) { + for (int32_t i = 0; i < (*numOfTasks); ++i) { SStreamTaskId* pTaskId = taosArrayGet(pMeta->pTaskList, i); STaskId id = {.streamId = pTaskId->streamId, .taskId = pTaskId->taskId}; SStreamTask** pTask = taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); streamTaskResetStatus(*pTask); + +// if ((*pTask)->info.fillHistory == 1) { +// streamResetParamForScanHistory(*pTask); +// } } return 0; @@ -658,9 +777,18 @@ static int32_t restartStreamTasks(SStreamMeta* pMeta, bool isLeader) { return code; } + { + STaskStartInfo* pStartInfo = &pMeta->startInfo; + taosHashClear(pStartInfo->pReadyTaskSet); + taosHashClear(pStartInfo->pFailedTaskSet); + pStartInfo->readyTs = 0; + } + if (isLeader && !tsDisableStream) { - tqStreamTaskResetStatus(pMeta); + int32_t numOfTasks = 0; + tqStreamTaskResetStatus(pMeta, &numOfTasks); streamMetaWUnLock(pMeta); + streamMetaStartAllTasks(pMeta); } else { streamMetaResetStartInfo(&pMeta->startInfo); @@ -675,21 +803,44 @@ static int32_t restartStreamTasks(SStreamMeta* pMeta, bool isLeader) { int32_t tqStreamTaskProcessRunReq(SStreamMeta* pMeta, SRpcMsg* pMsg, bool isLeader) { SStreamTaskRunReq* pReq = pMsg->pCont; - int32_t taskId = pReq->taskId; + int32_t type = pReq->reqType; int32_t vgId = pMeta->vgId; - if (taskId == STREAM_EXEC_START_ALL_TASKS_ID) { + if (type == STREAM_EXEC_T_START_ONE_TASK) { + streamMetaStartOneTask(pMeta, pReq->streamId, pReq->taskId); + return 0; + } else if (type == STREAM_EXEC_T_START_ALL_TASKS) { streamMetaStartAllTasks(pMeta); return 0; - } else if (taskId == STREAM_EXEC_RESTART_ALL_TASKS_ID) { + } else if (type == STREAM_EXEC_T_RESTART_ALL_TASKS) { restartStreamTasks(pMeta, isLeader); return 0; - } else if (taskId == STREAM_EXEC_STOP_ALL_TASKS_ID) { + } else if (type == STREAM_EXEC_T_STOP_ALL_TASKS) { streamMetaStopAllTasks(pMeta); + return 0; + } else if (type == STREAM_EXEC_T_RESUME_TASK) { // task resume to run after idle for a while + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId); + + if (pTask != NULL) { + char* pStatus = NULL; + if (streamTaskReadyToRun(pTask, &pStatus)) { + int64_t execTs = pTask->status.lastExecTs; + int32_t idle = taosGetTimestampMs() - execTs; + tqDebug("s-task:%s task resume to run after idle for:%dms from:%" PRId64, pTask->id.idStr, idle, execTs); + + streamResumeTask(pTask); + } else { + int8_t status = streamTaskSetSchedStatusInactive(pTask); + tqDebug("vgId:%d s-task:%s ignore run req since not in ready state, status:%s, sched-status:%d", vgId, + pTask->id.idStr, pStatus, status); + } + streamMetaReleaseTask(pMeta, pTask); + } + return 0; } - SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, taskId); + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId); if (pTask != NULL) { // even in halt status, the data in inputQ must be processed char* p = NULL; if (streamTaskReadyToRun(pTask, &p)) { @@ -706,29 +857,34 @@ int32_t tqStreamTaskProcessRunReq(SStreamMeta* pMeta, SRpcMsg* pMsg, bool isLead return 0; } else { // NOTE: pTask->status.schedStatus is not updated since it is not be handled by the run exec. // todo add one function to handle this - tqError("vgId:%d failed to found s-task, taskId:0x%x may have been dropped", vgId, taskId); + tqError("vgId:%d failed to found s-task, taskId:0x%x may have been dropped", vgId, pReq->taskId); return -1; } } int32_t tqStartTaskCompleteCallback(SStreamMeta* pMeta) { STaskStartInfo* pStartInfo = &pMeta->startInfo; + int32_t vgId = pMeta->vgId; + streamMetaWLock(pMeta); + if (pStartInfo->taskStarting == 1) { + tqDebug("vgId:%d already in start tasks procedure in other thread, restartCounter:%d, do nothing", vgId, + pMeta->startInfo.restartCount); + } else { // not in starting procedure + if (pStartInfo->restartCount > 0) { + pStartInfo->restartCount -= 1; + tqDebug("vgId:%d role:%d need to restart all tasks again, restartCounter:%d", vgId, pMeta->role, + pStartInfo->restartCount); - if (pStartInfo->restartCount > 0) { - pStartInfo->restartCount -= 1; - - ASSERT(pStartInfo->taskStarting == 0); - tqDebug("vgId:%d role:%d need to restart all tasks again, restartCounter:%d", pMeta->vgId, pMeta->role, - pStartInfo->restartCount); - - streamMetaWUnLock(pMeta); - restartStreamTasks(pMeta, (pMeta->role == NODE_ROLE_LEADER)); - } else { - streamMetaWUnLock(pMeta); - tqDebug("vgId:%d start all tasks completed", pMeta->vgId); + streamMetaWUnLock(pMeta); + restartStreamTasks(pMeta, (pMeta->role == NODE_ROLE_LEADER)); + return TSDB_CODE_SUCCESS; + } else { + tqDebug("vgId:%d start all tasks completed in callbackFn", pMeta->vgId); + } } + streamMetaWUnLock(pMeta); return TSDB_CODE_SUCCESS; } @@ -745,7 +901,7 @@ int32_t tqStreamTaskProcessTaskResetReq(SStreamMeta* pMeta, SRpcMsg* pMsg) { tqDebug("s-task:%s receive task-reset msg from mnode, reset status and ready for data processing", pTask->id.idStr); // clear flag set during do checkpoint, and open inputQ for all upstream tasks - if (streamTaskGetStatus(pTask, NULL) == TASK_STATUS__CK) { + if (streamTaskGetStatus(pTask)->state == TASK_STATUS__CK) { streamTaskClearCheckInfo(pTask, true); streamTaskSetStatusReady(pTask); } @@ -799,7 +955,7 @@ static int32_t tqProcessTaskResumeImpl(void* handle, SStreamTask* pTask, int64_t } streamTaskResume(pTask); - ETaskStatus status = streamTaskGetStatus(pTask, NULL); + ETaskStatus status = streamTaskGetStatus(pTask)->state; int32_t level = pTask->info.taskLevel; if (level == TASK_LEVEL__SINK) { diff --git a/source/dnode/vnode/src/tsdb/tsdbCache.c b/source/dnode/vnode/src/tsdb/tsdbCache.c index ab504acea7..9b29329cf3 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCache.c +++ b/source/dnode/vnode/src/tsdb/tsdbCache.c @@ -1990,9 +1990,9 @@ static int32_t getNextRowFromFS(void *iter, TSDBROW **ppRow, bool *pIgnoreEarlie if (SFSNEXTROW_FILESET == state->state) { _next_fileset: - if (--state->iFileSet < 0) { - clearLastFileSet(state); + clearLastFileSet(state); + if (--state->iFileSet < 0) { *ppRow = NULL; return code; } else { @@ -2862,7 +2862,9 @@ static int32_t mergeLastCid(tb_uid_t uid, STsdb *pTsdb, SArray **ppLastArray, SC taosArraySet(pColArray, iCol, &lastCol); int32_t aColIndex = taosArraySearchIdx(aColArray, &lastCol.colVal.cid, compareInt16Val, TD_EQ); - taosArrayRemove(aColArray, aColIndex); + if (aColIndex >= 0) { + taosArrayRemove(aColArray, aColIndex); + } } else if (!COL_VAL_IS_VALUE(tColVal) && !COL_VAL_IS_VALUE(pColVal) && !setNoneCol) { noneCol = iCol; setNoneCol = true; diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c index 62be0968e3..8a7ec981e8 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead2.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -73,6 +73,7 @@ static int32_t getInitialDelIndex(const SArray* pDelSkyline, int32_t order static void resetTableListIndex(SReaderStatus* pStatus); static void getMemTableTimeRange(STsdbReader* pReader, int64_t* pMaxKey, int64_t* pMinKey); static void updateComposedBlockInfo(STsdbReader* pReader, double el, STableBlockScanInfo* pBlockScanInfo); +static int32_t buildFromPreFilesetBuffer(STsdbReader* pReader); static bool outOfTimeWindow(int64_t ts, STimeWindow* pWindow) { return (ts > pWindow->ekey) || (ts < pWindow->skey); } @@ -3040,6 +3041,17 @@ static ERetrieveType doReadDataFromSttFiles(STsdbReader* pReader) { return TSDB_READ_RETURN; } + if (pReader->status.bProcMemPreFileset) { + code = buildFromPreFilesetBuffer(pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + if (pResBlock->info.rows > 0) { + pReader->status.processingMemPreFileSet = true; + return TSDB_READ_RETURN; + } + } + if (pBlockIter->numOfBlocks > 0) { // there are data blocks existed. return TSDB_READ_CONTINUE; } else { // all blocks in data file are checked, let's check the data in last files @@ -4297,6 +4309,7 @@ static int32_t buildFromPreFilesetBuffer(STsdbReader* pReader) { } else { tsdbDebug("finished pre-fileset %d buffer processing. %s", fid, pReader->idStr); pStatus->bProcMemPreFileset = false; + pStatus->processingMemPreFileSet = false; if (pReader->notifyFn) { STsdReaderNotifyInfo info = {0}; info.duration.filesetId = fid; @@ -4329,7 +4342,7 @@ static int32_t doTsdbNextDataBlockFilesetDelimited(STsdbReader* pReader) { pStatus->bProcMemFirstFileset, pReader->idStr); if (pStatus->bProcMemPreFileset) { if (pBlock->info.rows > 0) { - if (pReader->notifyFn) { + if (pReader->notifyFn && !pReader->status.processingMemPreFileSet) { int32_t fid = pReader->status.pCurrentFileset->fid; STsdReaderNotifyInfo info = {0}; info.duration.filesetId = fid; diff --git a/source/dnode/vnode/src/tsdb/tsdbReadUtil.h b/source/dnode/vnode/src/tsdb/tsdbReadUtil.h index 7a93e73a07..16ee90823b 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReadUtil.h +++ b/source/dnode/vnode/src/tsdb/tsdbReadUtil.h @@ -238,6 +238,7 @@ typedef struct SReaderStatus { int64_t prevFilesetStartKey; int64_t prevFilesetEndKey; bool bProcMemFirstFileset; + bool processingMemPreFileSet; STableUidList procMemUidList; STableBlockScanInfo** pProcMemTableIter; } SReaderStatus; diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 82baaa652b..e6a80757ff 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -14,11 +14,11 @@ */ #define _DEFAULT_SOURCE -#include "tq.h" #include "sync.h" +#include "tq.h" +#include "tqCommon.h" #include "tsdb.h" #include "vnd.h" -#include "tqCommon.h" #define BATCH_ENABLE 0 @@ -411,7 +411,7 @@ static int32_t vnodeSyncEqMsg(const SMsgCb *msgcb, SRpcMsg *pMsg) { } static int32_t vnodeSyncSendMsg(const SEpSet *pEpSet, SRpcMsg *pMsg) { - int32_t code = tmsgSendReq(pEpSet, pMsg); + int32_t code = tmsgSendSyncReq(pEpSet, pMsg); if (code != 0) { rpcFreeCont(pMsg->pCont); pMsg->pCont = NULL; @@ -477,8 +477,8 @@ static void vnodeSyncRollBackMsg(const SSyncFSM *pFsm, SRpcMsg *pMsg, SFsmCbMeta } static int32_t vnodeSnapshotStartRead(const SSyncFSM *pFsm, void *pParam, void **ppReader) { - SVnode *pVnode = pFsm->data; - int32_t code = vnodeSnapReaderOpen(pVnode, (SSnapshotParam *)pParam, (SVSnapReader **)ppReader); + SVnode *pVnode = pFsm->data; + int32_t code = vnodeSnapReaderOpen(pVnode, (SSnapshotParam *)pParam, (SVSnapReader **)ppReader); return code; } @@ -555,7 +555,7 @@ static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx) walApplyVer(pVnode->pWal, commitIdx); pVnode->restored = true; - SStreamMeta* pMeta = pVnode->pTq->pStreamMeta; + SStreamMeta *pMeta = pVnode->pTq->pStreamMeta; streamMetaWLock(pMeta); if (pMeta->startInfo.tasksWillRestart) { @@ -567,29 +567,30 @@ static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx) if (vnodeIsRoleLeader(pVnode)) { // start to restore all stream tasks if (tsDisableStream) { - streamMetaWUnLock(pMeta); vInfo("vgId:%d, sync restore finished, not launch stream tasks, since stream tasks are disabled", vgId); } else { - vInfo("vgId:%d sync restore finished, start to launch stream tasks", pVnode->config.vgId); - tqStreamTaskResetStatus(pVnode->pTq->pStreamMeta); + vInfo("vgId:%d sync restore finished, start to launch stream task(s)", pVnode->config.vgId); + int32_t numOfTasks = 0; + tqStreamTaskResetStatus(pMeta, &numOfTasks); - { + if (numOfTasks > 0) { if (pMeta->startInfo.taskStarting == 1) { pMeta->startInfo.restartCount += 1; tqDebug("vgId:%d in start tasks procedure, inc restartCounter by 1, remaining restart:%d", vgId, pMeta->startInfo.restartCount); - streamMetaWUnLock(pMeta); } else { pMeta->startInfo.taskStarting = 1; streamMetaWUnLock(pMeta); tqStreamTaskStartAsync(pMeta, &pVnode->msgCb, false); + return; } } } } else { - streamMetaWUnLock(pMeta); vInfo("vgId:%d, sync restore finished, not launch stream tasks since not leader", vgId); } + + streamMetaWUnLock(pMeta); } static void vnodeBecomeFollower(const SSyncFSM *pFsm) { diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index b713b9b112..efab09f02b 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -1838,7 +1838,7 @@ static STimeWindow doCalculateTimeWindow(int64_t ts, SInterval* pInterval) { STimeWindow w = {0}; w.skey = taosTimeTruncate(ts, pInterval); - w.ekey = taosTimeAdd(w.skey, pInterval->interval, pInterval->intervalUnit, pInterval->precision) - 1; + w.ekey = taosTimeGetIntervalEnd(w.skey, pInterval); return w; } @@ -1887,31 +1887,17 @@ STimeWindow getActiveTimeWindow(SDiskbasedBuf* pBuf, SResultRowInfo* pResultRowI } void getNextTimeWindow(const SInterval* pInterval, STimeWindow* tw, int32_t order) { + int64_t slidingStart = 0; + if (pInterval->offset > 0) { + slidingStart = taosTimeAdd(tw->skey, -1 * pInterval->offset, pInterval->offsetUnit, pInterval->precision); + } else { + slidingStart = tw->skey; + } int32_t factor = GET_FORWARD_DIRECTION_FACTOR(order); - if (!IS_CALENDAR_TIME_DURATION(pInterval->slidingUnit)) { - tw->skey += pInterval->sliding * factor; - tw->ekey = taosTimeAdd(tw->skey, pInterval->interval, pInterval->intervalUnit, pInterval->precision) - 1; - return; - } - - // convert key to second - int64_t key = convertTimePrecision(tw->skey, pInterval->precision, TSDB_TIME_PRECISION_MILLI) / 1000; - - int64_t duration = pInterval->sliding; - if (pInterval->slidingUnit == 'y') { - duration *= 12; - } - - struct tm tm; - time_t t = (time_t)key; - taosLocalTime(&t, &tm, NULL); - - int mon = (int)(tm.tm_year * 12 + tm.tm_mon + duration * factor); - tm.tm_year = mon / 12; - tm.tm_mon = mon % 12; - tw->skey = convertTimePrecision((int64_t)taosMktime(&tm) * 1000LL, TSDB_TIME_PRECISION_MILLI, pInterval->precision); - - tw->ekey = taosTimeAdd(tw->skey, pInterval->interval, pInterval->intervalUnit, pInterval->precision) - 1; + slidingStart = taosTimeAdd(slidingStart, factor * pInterval->sliding, pInterval->slidingUnit, pInterval->precision); + tw->skey = taosTimeAdd(slidingStart, pInterval->offset, pInterval->offsetUnit, pInterval->precision); + int64_t slidingEnd = taosTimeAdd(slidingStart, pInterval->interval, pInterval->intervalUnit, pInterval->precision) - 1; + tw->ekey = taosTimeAdd(slidingEnd, pInterval->offset, pInterval->offsetUnit, pInterval->precision); } bool hasLimitOffsetInfo(SLimitInfo* pLimitInfo) { diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index fb39de484f..d27daad550 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -922,8 +922,8 @@ int32_t qStreamSourceScanParamForHistoryScanStep2(qTaskInfo_t tinfo, SVersionRan pStreamInfo->fillHistoryWindow = *pWindow; pStreamInfo->recoverStep = STREAM_RECOVER_STEP__PREPARE2; - qDebug("%s step 2. set param for stream scanner for scan-history data, verRange:%" PRId64 " - %" PRId64 - ", window:%" PRId64 " - %" PRId64, + qDebug("%s step 2. set param for stream scanner scan wal, verRange:%" PRId64 " - %" PRId64 ", window:%" PRId64 + " - %" PRId64, GET_TASKID(pTaskInfo), pStreamInfo->fillHistoryVer.minVer, pStreamInfo->fillHistoryVer.maxVer, pWindow->skey, pWindow->ekey); return 0; @@ -1023,6 +1023,57 @@ int32_t qSetStreamOperatorOptionForScanHistory(qTaskInfo_t tinfo) { return 0; } +int32_t qResetStreamOperatorOptionForScanHistory(qTaskInfo_t tinfo) { + SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; + SOperatorInfo* pOperator = pTaskInfo->pRoot; + + while (1) { + int32_t type = pOperator->operatorType; + if (type == QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL || type == QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL || + type == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_INTERVAL) { + SStreamIntervalOperatorInfo* pInfo = pOperator->info; + STimeWindowAggSupp* pSup = &pInfo->twAggSup; + + pSup->calTriggerSaved = 0; + pSup->deleteMarkSaved = 0; + qInfo("reset stream param for interval: %d, %" PRId64, pSup->calTrigger, pSup->deleteMark); + + } else if (type == QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION || + type == QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_SESSION || + type == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION) { + SStreamSessionAggOperatorInfo* pInfo = pOperator->info; + STimeWindowAggSupp* pSup = &pInfo->twAggSup; + + pSup->calTriggerSaved = 0; + pSup->deleteMarkSaved = 0; + qInfo("reset stream param for session: %d, %" PRId64, pSup->calTrigger, pSup->deleteMark); + + } else if (type == QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE) { + SStreamStateAggOperatorInfo* pInfo = pOperator->info; + STimeWindowAggSupp* pSup = &pInfo->twAggSup; + + pSup->calTriggerSaved = 0; + pSup->deleteMarkSaved = 0; + qInfo("reset stream param for state: %d, %" PRId64, pSup->calTrigger, pSup->deleteMark); + + } else if (type == QUERY_NODE_PHYSICAL_PLAN_STREAM_EVENT) { + SStreamEventAggOperatorInfo* pInfo = pOperator->info; + STimeWindowAggSupp* pSup = &pInfo->twAggSup; + + pSup->calTriggerSaved = 0; + pSup->deleteMarkSaved = 0; + qInfo("save stream param for state: %d, %" PRId64, pSup->calTrigger, pSup->deleteMark); + } + + // iterate operator tree + if (pOperator->numOfDownstream != 1 || pOperator->pDownstream[0] == NULL) { + return 0; + } else { + pOperator = pOperator->pDownstream[0]; + } + } +} + int32_t qRestoreStreamOperatorOption(qTaskInfo_t tinfo) { SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; const char* id = GET_TASKID(pTaskInfo); @@ -1078,7 +1129,7 @@ int32_t qStreamInfoResetTimewindowFilter(qTaskInfo_t tinfo) { SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; STimeWindow* pWindow = &pTaskInfo->streamInfo.fillHistoryWindow; - qDebug("%s remove scan-history filter window:%" PRId64 "-%" PRId64 ", set new window:%" PRId64 "-%" PRId64, + qDebug("%s remove timeWindow filter:%" PRId64 "-%" PRId64 ", set new window:%" PRId64 "-%" PRId64, GET_TASKID(pTaskInfo), pWindow->skey, pWindow->ekey, INT64_MIN, INT64_MAX); pWindow->skey = INT64_MIN; diff --git a/source/libs/executor/src/executorInt.c b/source/libs/executor/src/executorInt.c index 95d26fdd0e..ff4d3d0d27 100644 --- a/source/libs/executor/src/executorInt.c +++ b/source/libs/executor/src/executorInt.c @@ -449,7 +449,7 @@ STimeWindow getAlignQueryTimeWindow(const SInterval* pInterval, int64_t key) { * if the realSkey > INT64_MAX - pInterval->interval, the query duration between * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges. */ - win.ekey = taosTimeAdd(win.skey, pInterval->interval, pInterval->intervalUnit, pInterval->precision) - 1; + win.ekey = taosTimeGetIntervalEnd(win.skey, pInterval); if (win.ekey < win.skey) { win.ekey = INT64_MAX; } diff --git a/source/libs/executor/src/streamtimewindowoperator.c b/source/libs/executor/src/streamtimewindowoperator.c index b2d0f25466..22339aa384 100644 --- a/source/libs/executor/src/streamtimewindowoperator.c +++ b/source/libs/executor/src/streamtimewindowoperator.c @@ -427,6 +427,7 @@ void destroyStreamFinalIntervalOperatorInfo(void* param) { taosMemoryFreeClear(param); } +#ifdef BUILD_NO_CALL static bool allInvertible(SqlFunctionCtx* pFCtx, int32_t numOfCols) { for (int32_t i = 0; i < numOfCols; i++) { if (fmIsUserDefinedFunc(pFCtx[i].functionId) || !fmIsInvertible(pFCtx[i].functionId)) { @@ -435,6 +436,7 @@ static bool allInvertible(SqlFunctionCtx* pFCtx, int32_t numOfCols) { } return true; } +#endif void reloadFromDownStream(SOperatorInfo* downstream, SStreamIntervalOperatorInfo* pInfo) { SStateStore* pAPI = &downstream->pTaskInfo->storageAPI.stateStore; @@ -3846,6 +3848,7 @@ _error: return NULL; } +#ifdef BUILD_NO_CALL static void setInverFunction(SqlFunctionCtx* pCtx, int32_t num, EStreamType type) { for (int i = 0; i < num; i++) { if (type == STREAM_INVERT) { @@ -3855,6 +3858,7 @@ static void setInverFunction(SqlFunctionCtx* pCtx, int32_t num, EStreamType type } } } +#endif static SSDataBlock* doStreamIntervalAgg(SOperatorInfo* pOperator) { SStreamIntervalOperatorInfo* pInfo = pOperator->info; @@ -3947,9 +3951,11 @@ static SSDataBlock* doStreamIntervalAgg(SOperatorInfo* pOperator) { // caller. Note that all the time window are not close till now. // the pDataBlock are always the same one, no need to call this again setInputDataBlock(pSup, pBlock, TSDB_ORDER_ASC, MAIN_SCAN, true); +#ifdef BUILD_NO_CALL if (pInfo->invertible) { setInverFunction(pSup->pCtx, pOperator->exprSupp.numOfExprs, pBlock->info.type); } +#endif doStreamIntervalAggImpl(pOperator, pBlock, pBlock->info.id.groupId, pInfo->pUpdatedMap); pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, pBlock->info.window.ekey); diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index afe1921d30..b86253a8d1 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -450,7 +450,7 @@ int32_t getNextQualifiedWindow(SInterval* pInterval, STimeWindow* pNext, SDataBl TSKEY next = primaryKeys[startPos]; if (pInterval->intervalUnit == 'n' || pInterval->intervalUnit == 'y') { pNext->skey = taosTimeTruncate(next, pInterval); - pNext->ekey = taosTimeAdd(pNext->skey, pInterval->interval, pInterval->intervalUnit, precision) - 1; + pNext->ekey = taosTimeGetIntervalEnd(pNext->skey, pInterval); } else { pNext->ekey += ((next - pNext->ekey + pInterval->sliding - 1) / pInterval->sliding) * pInterval->sliding; pNext->skey = pNext->ekey - pInterval->interval + 1; @@ -459,7 +459,7 @@ int32_t getNextQualifiedWindow(SInterval* pInterval, STimeWindow* pNext, SDataBl TSKEY next = primaryKeys[startPos]; if (pInterval->intervalUnit == 'n' || pInterval->intervalUnit == 'y') { pNext->skey = taosTimeTruncate(next, pInterval); - pNext->ekey = taosTimeAdd(pNext->skey, pInterval->interval, pInterval->intervalUnit, precision) - 1; + pNext->ekey = taosTimeGetIntervalEnd(pNext->skey, pInterval); } else { pNext->skey -= ((pNext->skey - next + pInterval->sliding - 1) / pInterval->sliding) * pInterval->sliding; pNext->ekey = pNext->skey + pInterval->interval - 1; @@ -1079,16 +1079,6 @@ static SSDataBlock* doBuildIntervalResult(SOperatorInfo* pOperator) { return (rows == 0) ? NULL : pBlock; } -static void setInverFunction(SqlFunctionCtx* pCtx, int32_t num, EStreamType type) { - for (int i = 0; i < num; i++) { - if (type == STREAM_INVERT) { - fmSetInvertFunc(pCtx[i].functionId, &(pCtx[i].fpSet)); - } else if (type == STREAM_NORMAL) { - fmSetNormalFunc(pCtx[i].functionId, &(pCtx[i].fpSet)); - } - } -} - static void doClearWindowImpl(SResultRowPosition* p1, SDiskbasedBuf* pResultBuf, SExprSupp* pSup, int32_t numOfOutput) { SResultRow* pResult = getResultRowByPos(pResultBuf, p1, false); if (NULL == pResult) { diff --git a/source/libs/executor/test/timewindowTest.cpp b/source/libs/executor/test/timewindowTest.cpp index 2894c66587..3639bf15e1 100644 --- a/source/libs/executor/test/timewindowTest.cpp +++ b/source/libs/executor/test/timewindowTest.cpp @@ -158,4 +158,32 @@ TEST(testCase, timewindow_gen) { } +TEST(testCase, timewindow_natural) { + osSetTimezone("CST"); + + int32_t precision = TSDB_TIME_PRECISION_MILLI; + + SInterval interval2 = createInterval(17, 17, 13392000000, 'n', 'n', 0, precision); + int64_t key = 1648970865984; + STimeWindow w0 = getAlignQueryTimeWindow(&interval2, key); + printTimeWindow(&w0, precision, key); + ASSERT_GE(w0.ekey, key); + + int64_t key1 = 1633446027072; + STimeWindow w1 = {0}; + getInitialStartTimeWindow(&interval2, key1, &w1, true); + printTimeWindow(&w1, precision, key1); + STimeWindow w3 = getAlignQueryTimeWindow(&interval2, key1); + printf("%ld win %ld, %ld\n", key1, w3.skey, w3.ekey); + + int64_t key2 = 1648758398208; + STimeWindow w2 = {0}; + getInitialStartTimeWindow(&interval2, key2, &w2, true); + printTimeWindow(&w2, precision, key2); + STimeWindow w4 = getAlignQueryTimeWindow(&interval2, key2); + printf("%ld win %ld, %ld\n", key2, w3.skey, w3.ekey); + + ASSERT_EQ(w3.skey, w4.skey); + ASSERT_EQ(w3.ekey, w4.ekey); +} #pragma GCC diagnostic pop \ No newline at end of file diff --git a/source/libs/function/inc/builtins.h b/source/libs/function/inc/builtins.h index e7fcc38818..6181a9b929 100644 --- a/source/libs/function/inc/builtins.h +++ b/source/libs/function/inc/builtins.h @@ -40,7 +40,9 @@ typedef struct SBuiltinFuncDefinition { FExecProcess processFunc; FScalarExecProcess sprocessFunc; FExecFinalize finalizeFunc; +#ifdef BUILD_NO_CALL FExecProcess invertFunc; +#endif FExecCombine combineFunc; const char* pPartialFunc; const char* pMergeFunc; diff --git a/source/libs/function/inc/builtinsimpl.h b/source/libs/function/inc/builtinsimpl.h index d2f19ed2eb..ba7bf72aea 100644 --- a/source/libs/function/inc/builtinsimpl.h +++ b/source/libs/function/inc/builtinsimpl.h @@ -59,12 +59,19 @@ int32_t combineFunction(SqlFunctionCtx* pDestCtx, SqlFunctionCtx* pSourceCtx); EFuncDataRequired countDataRequired(SFunctionNode* pFunc, STimeWindow* pTimeWindow); bool getCountFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv); int32_t countFunction(SqlFunctionCtx* pCtx); + +#ifdef BUILD_NO_CALL int32_t countInvertFunction(SqlFunctionCtx* pCtx); +#endif EFuncDataRequired statisDataRequired(SFunctionNode* pFunc, STimeWindow* pTimeWindow); bool getSumFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv); int32_t sumFunction(SqlFunctionCtx* pCtx); + +#ifdef BUILD_NO_CALL int32_t sumInvertFunction(SqlFunctionCtx* pCtx); +#endif + int32_t sumCombine(SqlFunctionCtx* pDestCtx, SqlFunctionCtx* pSourceCtx); bool minmaxFunctionSetup(SqlFunctionCtx* pCtx, SResultRowEntryInfo* pResultInfo); @@ -81,7 +88,11 @@ int32_t avgFunction(SqlFunctionCtx* pCtx); int32_t avgFunctionMerge(SqlFunctionCtx* pCtx); int32_t avgFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock); int32_t avgPartialFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock); + +#ifdef BUILD_NO_CALL int32_t avgInvertFunction(SqlFunctionCtx* pCtx); +#endif + int32_t avgCombine(SqlFunctionCtx* pDestCtx, SqlFunctionCtx* pSourceCtx); int32_t getAvgInfoSize(); @@ -91,7 +102,11 @@ int32_t stddevFunction(SqlFunctionCtx* pCtx); int32_t stddevFunctionMerge(SqlFunctionCtx* pCtx); int32_t stddevFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock); int32_t stddevPartialFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock); + +#ifdef BUILD_NO_CALL int32_t stddevInvertFunction(SqlFunctionCtx* pCtx); +#endif + int32_t stddevCombine(SqlFunctionCtx* pDestCtx, SqlFunctionCtx* pSourceCtx); int32_t getStddevInfoSize(); @@ -99,7 +114,6 @@ bool getLeastSQRFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv); bool leastSQRFunctionSetup(SqlFunctionCtx* pCtx, SResultRowEntryInfo* pResultInfo); int32_t leastSQRFunction(SqlFunctionCtx* pCtx); int32_t leastSQRFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock); -int32_t leastSQRInvertFunction(SqlFunctionCtx* pCtx); int32_t leastSQRCombine(SqlFunctionCtx* pDestCtx, SqlFunctionCtx* pSourceCtx); bool getPercentileFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv); diff --git a/source/libs/function/src/builtins.c b/source/libs/function/src/builtins.c index 98fda024fa..61149f593a 100644 --- a/source/libs/function/src/builtins.c +++ b/source/libs/function/src/builtins.c @@ -2368,7 +2368,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .processFunc = countFunction, .sprocessFunc = countScalarFunction, .finalizeFunc = functionFinalize, +#ifdef BUILD_NO_CALL .invertFunc = countInvertFunction, +#endif .combineFunc = combineFunction, .pPartialFunc = "count", .pMergeFunc = "sum" @@ -2384,7 +2386,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .processFunc = sumFunction, .sprocessFunc = sumScalarFunction, .finalizeFunc = functionFinalize, +#ifdef BUILD_NO_CALL .invertFunc = sumInvertFunction, +#endif .combineFunc = sumCombine, .pPartialFunc = "sum", .pMergeFunc = "sum" @@ -2429,7 +2433,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .processFunc = stddevFunction, .sprocessFunc = stddevScalarFunction, .finalizeFunc = stddevFinalize, +#ifdef BUILD_NO_CALL .invertFunc = stddevInvertFunction, +#endif .combineFunc = stddevCombine, .pPartialFunc = "_stddev_partial", .pMergeFunc = "_stddev_merge" @@ -2443,7 +2449,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .initFunc = stddevFunctionSetup, .processFunc = stddevFunction, .finalizeFunc = stddevPartialFinalize, +#ifdef BUILD_NO_CALL .invertFunc = stddevInvertFunction, +#endif .combineFunc = stddevCombine, }, { @@ -2455,7 +2463,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .initFunc = stddevFunctionSetup, .processFunc = stddevFunctionMerge, .finalizeFunc = stddevFinalize, +#ifdef BUILD_NO_CALL .invertFunc = stddevInvertFunction, +#endif .combineFunc = stddevCombine, }, { @@ -2468,7 +2478,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .processFunc = leastSQRFunction, .sprocessFunc = leastSQRScalarFunction, .finalizeFunc = leastSQRFinalize, +#ifdef BUILD_NO_CALL .invertFunc = NULL, +#endif .combineFunc = leastSQRCombine, }, { @@ -2482,7 +2494,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .processFunc = avgFunction, .sprocessFunc = avgScalarFunction, .finalizeFunc = avgFinalize, +#ifdef BUILD_NO_CALL .invertFunc = avgInvertFunction, +#endif .combineFunc = avgCombine, .pPartialFunc = "_avg_partial", .pMergeFunc = "_avg_merge" @@ -2497,7 +2511,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .initFunc = avgFunctionSetup, .processFunc = avgFunction, .finalizeFunc = avgPartialFinalize, +#ifdef BUILD_NO_CALL .invertFunc = avgInvertFunction, +#endif .combineFunc = avgCombine, }, { @@ -2509,7 +2525,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .initFunc = avgFunctionSetup, .processFunc = avgFunctionMerge, .finalizeFunc = avgFinalize, +#ifdef BUILD_NO_CALL .invertFunc = avgInvertFunction, +#endif .combineFunc = avgCombine, }, { @@ -2523,7 +2541,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .processFunc = percentileFunction, .sprocessFunc = percentileScalarFunction, .finalizeFunc = percentileFinalize, +#ifdef BUILD_NO_CALL .invertFunc = NULL, +#endif .combineFunc = NULL, }, { @@ -2536,7 +2556,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .processFunc = apercentileFunction, .sprocessFunc = apercentileScalarFunction, .finalizeFunc = apercentileFinalize, +#ifdef BUILD_NO_CALL .invertFunc = NULL, +#endif .combineFunc = apercentileCombine, .pPartialFunc = "_apercentile_partial", .pMergeFunc = "_apercentile_merge", @@ -2551,7 +2573,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .initFunc = apercentileFunctionSetup, .processFunc = apercentileFunction, .finalizeFunc = apercentilePartialFinalize, +#ifdef BUILD_NO_CALL .invertFunc = NULL, +#endif .combineFunc = apercentileCombine, }, { @@ -2563,7 +2587,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .initFunc = apercentileFunctionSetup, .processFunc = apercentileFunctionMerge, .finalizeFunc = apercentileFinalize, +#ifdef BUILD_NO_CALL .invertFunc = NULL, +#endif .combineFunc = apercentileCombine, }, { @@ -2609,7 +2635,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .processFunc = spreadFunction, .sprocessFunc = spreadScalarFunction, .finalizeFunc = spreadFinalize, +#ifdef BUILD_NO_CALL .invertFunc = NULL, +#endif .combineFunc = spreadCombine, .pPartialFunc = "_spread_partial", .pMergeFunc = "_spread_merge" @@ -2624,7 +2652,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .initFunc = spreadFunctionSetup, .processFunc = spreadFunction, .finalizeFunc = spreadPartialFinalize, +#ifdef BUILD_NO_CALL .invertFunc = NULL, +#endif .combineFunc = spreadCombine, }, { @@ -2637,7 +2667,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .initFunc = spreadFunctionSetup, .processFunc = spreadFunctionMerge, .finalizeFunc = spreadFinalize, +#ifdef BUILD_NO_CALL .invertFunc = NULL, +#endif .combineFunc = spreadCombine, }, { @@ -2651,7 +2683,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .initFunc = elapsedFunctionSetup, .processFunc = elapsedFunction, .finalizeFunc = elapsedFinalize, +#ifdef BUILD_NO_CALL .invertFunc = NULL, +#endif .combineFunc = elapsedCombine, }, { @@ -2664,7 +2698,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .initFunc = elapsedFunctionSetup, .processFunc = elapsedFunction, .finalizeFunc = elapsedPartialFinalize, +#ifdef BUILD_NO_CALL .invertFunc = NULL, +#endif .combineFunc = elapsedCombine, }, { @@ -2677,7 +2713,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .initFunc = elapsedFunctionSetup, .processFunc = elapsedFunctionMerge, .finalizeFunc = elapsedFinalize, + #ifdef BUILD_NO_CALL .invertFunc = NULL, + #endif .combineFunc = elapsedCombine, }, { @@ -2907,7 +2945,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .processFunc = histogramFunction, .sprocessFunc = histogramScalarFunction, .finalizeFunc = histogramFinalize, +#ifdef BUILD_NO_CALL .invertFunc = NULL, +#endif .combineFunc = histogramCombine, .pPartialFunc = "_histogram_partial", .pMergeFunc = "_histogram_merge", @@ -2921,7 +2961,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .initFunc = histogramFunctionSetup, .processFunc = histogramFunctionPartial, .finalizeFunc = histogramPartialFinalize, +#ifdef BUILD_NO_CALL .invertFunc = NULL, +#endif .combineFunc = histogramCombine, }, { @@ -2933,7 +2975,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .initFunc = functionSetup, .processFunc = histogramFunctionMerge, .finalizeFunc = histogramFinalize, +#ifdef BUILD_NO_CALL .invertFunc = NULL, +#endif .combineFunc = histogramCombine, }, { @@ -2946,7 +2990,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .processFunc = hllFunction, .sprocessFunc = hllScalarFunction, .finalizeFunc = hllFinalize, +#ifdef BUILD_NO_CALL .invertFunc = NULL, +#endif .combineFunc = hllCombine, .pPartialFunc = "_hyperloglog_partial", .pMergeFunc = "_hyperloglog_merge" @@ -2960,7 +3006,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .initFunc = functionSetup, .processFunc = hllFunction, .finalizeFunc = hllPartialFinalize, +#ifdef BUILD_NO_CALL .invertFunc = NULL, +#endif .combineFunc = hllCombine, }, { @@ -2972,7 +3020,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .initFunc = functionSetup, .processFunc = hllFunctionMerge, .finalizeFunc = hllFinalize, +#ifdef BUILD_NO_CALL .invertFunc = NULL, +#endif .combineFunc = hllCombine, }, { diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index 8a2e118fe7..390190e8db 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -549,6 +549,7 @@ int32_t countFunction(SqlFunctionCtx* pCtx) { return TSDB_CODE_SUCCESS; } +#ifdef BUILD_NO_CALL int32_t countInvertFunction(SqlFunctionCtx* pCtx) { int64_t numOfElem = getNumOfElems(pCtx); @@ -559,6 +560,7 @@ int32_t countInvertFunction(SqlFunctionCtx* pCtx) { SET_VAL(pResInfo, *((int64_t*)buf), 1); return TSDB_CODE_SUCCESS; } +#endif int32_t combineFunction(SqlFunctionCtx* pDestCtx, SqlFunctionCtx* pSourceCtx) { SResultRowEntryInfo* pDResInfo = GET_RES_INFO(pDestCtx); @@ -642,6 +644,7 @@ _sum_over: return TSDB_CODE_SUCCESS; } +#ifdef BUILD_NO_CALL int32_t sumInvertFunction(SqlFunctionCtx* pCtx) { int32_t numOfElem = 0; @@ -699,6 +702,7 @@ int32_t sumInvertFunction(SqlFunctionCtx* pCtx) { SET_VAL(GET_RES_INFO(pCtx), numOfElem, 1); return TSDB_CODE_SUCCESS; } +#endif int32_t sumCombine(SqlFunctionCtx* pDestCtx, SqlFunctionCtx* pSourceCtx) { SResultRowEntryInfo* pDResInfo = GET_RES_INFO(pDestCtx); @@ -1230,6 +1234,7 @@ int32_t stddevFunctionMerge(SqlFunctionCtx* pCtx) { return TSDB_CODE_SUCCESS; } +#ifdef BUILD_NO_CALL int32_t stddevInvertFunction(SqlFunctionCtx* pCtx) { int32_t numOfElem = 0; @@ -1294,6 +1299,7 @@ int32_t stddevInvertFunction(SqlFunctionCtx* pCtx) { SET_VAL(GET_RES_INFO(pCtx), numOfElem, 1); return TSDB_CODE_SUCCESS; } +#endif int32_t stddevFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { SInputColumnInfoData* pInput = &pCtx->input; @@ -1578,11 +1584,6 @@ int32_t leastSQRFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { return pResInfo->numOfRes; } -int32_t leastSQRInvertFunction(SqlFunctionCtx* pCtx) { - // TODO - return TSDB_CODE_SUCCESS; -} - int32_t leastSQRCombine(SqlFunctionCtx* pDestCtx, SqlFunctionCtx* pSourceCtx) { SResultRowEntryInfo* pDResInfo = GET_RES_INFO(pDestCtx); SLeastSQRInfo* pDBuf = GET_ROWCELL_INTERBUF(pDResInfo); diff --git a/source/libs/function/src/detail/tavgfunction.c b/source/libs/function/src/detail/tavgfunction.c index e626c937da..6bcbd1c3a7 100644 --- a/source/libs/function/src/detail/tavgfunction.c +++ b/source/libs/function/src/detail/tavgfunction.c @@ -724,6 +724,7 @@ int32_t avgFunctionMerge(SqlFunctionCtx* pCtx) { return TSDB_CODE_SUCCESS; } +#ifdef BUILD_NO_CALL int32_t avgInvertFunction(SqlFunctionCtx* pCtx) { int32_t numOfElem = 0; @@ -786,6 +787,7 @@ int32_t avgInvertFunction(SqlFunctionCtx* pCtx) { SET_VAL(GET_RES_INFO(pCtx), numOfElem, 1); return TSDB_CODE_SUCCESS; } +#endif int32_t avgCombine(SqlFunctionCtx* pDestCtx, SqlFunctionCtx* pSourceCtx) { SResultRowEntryInfo* pDResInfo = GET_RES_INFO(pDestCtx); diff --git a/source/libs/function/src/functionMgt.c b/source/libs/function/src/functionMgt.c index 036e4238d4..7bb863839a 100644 --- a/source/libs/function/src/functionMgt.c +++ b/source/libs/function/src/functionMgt.c @@ -281,6 +281,7 @@ void fmFuncMgtDestroy() { } } +#ifdef BUILD_NO_CALL int32_t fmSetInvertFunc(int32_t funcId, SFuncExecFuncs* pFpSet) { if (fmIsUserDefinedFunc(funcId) || funcId < 0 || funcId >= funcMgtBuiltinsNum) { return TSDB_CODE_FAILED; @@ -314,6 +315,7 @@ bool fmIsInvertible(int32_t funcId) { } return res; } +#endif // function has same input/output type bool fmIsSameInOutType(int32_t funcId) { diff --git a/source/libs/parser/src/parAstCreater.c b/source/libs/parser/src/parAstCreater.c index ac2016229b..689153010a 100644 --- a/source/libs/parser/src/parAstCreater.c +++ b/source/libs/parser/src/parAstCreater.c @@ -371,6 +371,18 @@ SNode* createValueNode(SAstCreateContext* pCxt, int32_t dataType, const SToken* return (SNode*)val; } +static bool hasHint(SNodeList* pHintList, EHintOption hint) { + if (!pHintList) return false; + SNode* pNode; + FOREACH(pNode, pHintList) { + SHintNode* pHint = (SHintNode*)pNode; + if (pHint->option == hint) { + return true; + } + } + return false; +} + bool addHintNodeToList(SAstCreateContext* pCxt, SNodeList** ppHintList, EHintOption opt, SToken* paramList, int32_t paramNum) { void* value = NULL; @@ -384,6 +396,10 @@ bool addHintNodeToList(SAstCreateContext* pCxt, SNodeList** ppHintList, EHintOpt } case HINT_SORT_FOR_GROUP: if (paramNum > 0) return true; + if (hasHint(*ppHintList, HINT_PARTITION_FIRST)) return true; + break; + case HINT_PARTITION_FIRST: + if (paramNum > 0 || hasHint(*ppHintList, HINT_SORT_FOR_GROUP)) return true; break; default: return true; @@ -455,6 +471,14 @@ SNodeList* createHintNodeList(SAstCreateContext* pCxt, const SToken* pLiteral) { } opt = HINT_SORT_FOR_GROUP; break; + case TK_PARTITION_FIRST: + lastComma = false; + if (0 != opt || inParamList) { + quit = true; + break; + } + opt = HINT_PARTITION_FIRST; + break; case TK_NK_LP: lastComma = false; if (0 == opt || inParamList) { diff --git a/source/libs/parser/src/parTokenizer.c b/source/libs/parser/src/parTokenizer.c index 03a5317cd3..072892fe7f 100644 --- a/source/libs/parser/src/parTokenizer.c +++ b/source/libs/parser/src/parTokenizer.c @@ -170,6 +170,7 @@ static SKeyword keywordTable[] = { {"PAGES", TK_PAGES}, {"PAGESIZE", TK_PAGESIZE}, {"PARTITION", TK_PARTITION}, + {"PARTITION_FIRST", TK_PARTITION_FIRST}, {"PASS", TK_PASS}, {"PORT", TK_PORT}, {"PPS", TK_PPS}, diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index b0eaa3c4f7..66074c1d21 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -3158,7 +3158,11 @@ int32_t translateTable(STranslateContext* pCxt, SNode** pTable) { ((SSelectStmt*)pTempTable->pSubquery)->isEmptyResult && isSelectStmt(pCxt->pCurrStmt)) { ((SSelectStmt*)pCxt->pCurrStmt)->isEmptyResult = true; } - + if (QUERY_NODE_SELECT_STMT == nodeType(pTempTable->pSubquery) && isSelectStmt(pCxt->pCurrStmt)) { + SSelectStmt* pSubStmt = (SSelectStmt*)pTempTable->pSubquery; + SSelectStmt* pCurrSmt = (SSelectStmt*)(pCxt->pCurrStmt); + pCurrSmt->timeLineResMode = pSubStmt->timeLineResMode; + } pTempTable->table.precision = getStmtPrecision(pTempTable->pSubquery); pTempTable->table.singleTable = stmtIsSingleTable(pTempTable->pSubquery); code = addNamespace(pCxt, pTempTable); diff --git a/source/libs/planner/inc/planInt.h b/source/libs/planner/inc/planInt.h index e2a4ded5a9..d4074e1373 100644 --- a/source/libs/planner/inc/planInt.h +++ b/source/libs/planner/inc/planInt.h @@ -47,6 +47,7 @@ int32_t validateQueryPlan(SPlanContext* pCxt, SQueryPlan* pPlan); bool getBatchScanOptionFromHint(SNodeList* pList); bool getSortForGroupOptHint(SNodeList* pList); +bool getOptHint(SNodeList* pList, EHintOption hint); SLogicNode* getLogicNodeRootNode(SLogicNode* pCurr); int32_t collectTableAliasFromNodes(SNode* pNode, SSHashObj** ppRes); bool isPartTableAgg(SAggLogicNode* pAgg); diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index 035377d22e..730dcd9352 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -3997,7 +3997,8 @@ static int32_t partitionColsOpt(SOptimizeContext* pCxt, SLogicSubplan* pLogicSub } } return code; - } else if (pNode->node.pParent && nodeType(pNode->node.pParent) == QUERY_NODE_LOGIC_PLAN_AGG) { + } else if (pNode->node.pParent && nodeType(pNode->node.pParent) == QUERY_NODE_LOGIC_PLAN_AGG && + !getOptHint(pRootNode->pHint, HINT_PARTITION_FIRST)) { // Check if we can delete partition node SAggLogicNode* pAgg = (SAggLogicNode*)pNode->node.pParent; FOREACH(node, pNode->pPartitionKeys) { diff --git a/source/libs/planner/src/planUtil.c b/source/libs/planner/src/planUtil.c index 2da270e42d..aeef3f2487 100644 --- a/source/libs/planner/src/planUtil.c +++ b/source/libs/planner/src/planUtil.c @@ -432,6 +432,7 @@ bool getBatchScanOptionFromHint(SNodeList* pList) { } bool getSortForGroupOptHint(SNodeList* pList) { + if (!pList) return false; SNode* pNode; FOREACH(pNode, pList) { SHintNode* pHint = (SHintNode*)pNode; @@ -442,6 +443,18 @@ bool getSortForGroupOptHint(SNodeList* pList) { return false; } +bool getOptHint(SNodeList* pList, EHintOption hint) { + if (!pList) return false; + SNode* pNode; + FOREACH(pNode, pList) { + SHintNode* pHint = (SHintNode*)pNode; + if (pHint->option == hint) { + return true; + } + } + return false; +} + int32_t collectTableAliasFromNodes(SNode* pNode, SSHashObj** ppRes) { int32_t code = TSDB_CODE_SUCCESS; SLogicNode* pCurr = (SLogicNode*)pNode; diff --git a/source/libs/stream/inc/streamInt.h b/source/libs/stream/inc/streamInt.h index 9b5134d449..1d1eda74f6 100644 --- a/source/libs/stream/inc/streamInt.h +++ b/source/libs/stream/inc/streamInt.h @@ -92,6 +92,9 @@ extern int32_t streamBackendId; extern int32_t streamBackendCfWrapperId; extern int32_t taskDbWrapperId; +int32_t streamTimerInit(); +void streamTimerCleanUp(); + void streamRetryDispatchData(SStreamTask* pTask, int64_t waitDuration); int32_t streamDispatchStreamBlock(SStreamTask* pTask); void destroyDispatchMsg(SStreamDispatchReq* pReq, int32_t numOfVgroups); diff --git a/source/libs/stream/inc/streamsm.h b/source/libs/stream/inc/streamsm.h index ea0522bd5a..abdafc0240 100644 --- a/source/libs/stream/inc/streamsm.h +++ b/source/libs/stream/inc/streamsm.h @@ -23,11 +23,6 @@ extern "C" { #endif // moore finite state machine for stream task -typedef struct SStreamTaskState { - ETaskStatus state; - char* name; -} SStreamTaskState; - typedef int32_t (*__state_trans_fn)(SStreamTask*); typedef int32_t (*__state_trans_succ_fn)(SStreamTask*); diff --git a/source/libs/stream/src/stream.c b/source/libs/stream/src/stream.c index 1bef42bf14..7d507a1394 100644 --- a/source/libs/stream/src/stream.c +++ b/source/libs/stream/src/stream.c @@ -21,13 +21,18 @@ void* streamTimer = NULL; int32_t streamTimerInit() { streamTimer = taosTmrInit(1000, 100, 10000, "STREAM"); if (streamTimer == NULL) { + stError("init stream timer failed, code:%s", tstrerror(terrno)); return -1; } + + stInfo("init stream timer, %p", streamTimer); return 0; } void streamTimerCleanUp() { + stInfo("cleanup stream timer, %p", streamTimer); taosTmrCleanUp(streamTimer); + streamTimer = NULL; } char* createStreamTaskIdStr(int64_t streamId, int32_t taskId) { @@ -49,7 +54,7 @@ static void streamSchedByTimer(void* param, void* tmrId) { return; } - if (streamTaskGetStatus(pTask, NULL) == TASK_STATUS__CK) { + if (streamTaskGetStatus(pTask)->state == TASK_STATUS__CK) { stDebug("s-task:%s in checkpoint procedure, not retrieve result, next:%dms", id, nextTrigger); } else { if (status == TASK_TRIGGER_STATUS__ACTIVE) { @@ -57,7 +62,7 @@ static void streamSchedByTimer(void* param, void* tmrId) { if (pTrigger == NULL) { stError("s-task:%s failed to prepare retrieve data trigger, code:%s, try again in %dms", id, "out of memory", nextTrigger); - taosTmrReset(streamSchedByTimer, nextTrigger, pTask, streamTimer, &pTask->schedInfo.pTimer); + taosTmrReset(streamSchedByTimer, nextTrigger, pTask, streamTimer, &pTask->schedInfo.pDelayTimer); return; } @@ -68,7 +73,7 @@ static void streamSchedByTimer(void* param, void* tmrId) { stError("s-task:%s failed to prepare retrieve data trigger, code:%s, try again in %dms", id, "out of memory", nextTrigger); - taosTmrReset(streamSchedByTimer, nextTrigger, pTask, streamTimer, &pTask->schedInfo.pTimer); + taosTmrReset(streamSchedByTimer, nextTrigger, pTask, streamTimer, &pTask->schedInfo.pDelayTimer); return; } @@ -77,7 +82,7 @@ static void streamSchedByTimer(void* param, void* tmrId) { int32_t code = streamTaskPutDataIntoInputQ(pTask, (SStreamQueueItem*)pTrigger); if (code != TSDB_CODE_SUCCESS) { - taosTmrReset(streamSchedByTimer, nextTrigger, pTask, streamTimer, &pTask->schedInfo.pTimer); + taosTmrReset(streamSchedByTimer, nextTrigger, pTask, streamTimer, &pTask->schedInfo.pDelayTimer); return; } @@ -85,17 +90,17 @@ static void streamSchedByTimer(void* param, void* tmrId) { } } - taosTmrReset(streamSchedByTimer, nextTrigger, pTask, streamTimer, &pTask->schedInfo.pTimer); + taosTmrReset(streamSchedByTimer, nextTrigger, pTask, streamTimer, &pTask->schedInfo.pDelayTimer); } int32_t streamSetupScheduleTrigger(SStreamTask* pTask) { if (pTask->info.triggerParam != 0 && pTask->info.fillHistory == 0) { int32_t ref = atomic_add_fetch_32(&pTask->refCnt, 1); - ASSERT(ref == 2 && pTask->schedInfo.pTimer == NULL); + ASSERT(ref == 2 && pTask->schedInfo.pDelayTimer == NULL); stDebug("s-task:%s setup scheduler trigger, delay:%" PRId64 " ms", pTask->id.idStr, pTask->info.triggerParam); - pTask->schedInfo.pTimer = taosTmrStart(streamSchedByTimer, (int32_t)pTask->info.triggerParam, pTask, streamTimer); + pTask->schedInfo.pDelayTimer = taosTmrStart(streamSchedByTimer, (int32_t)pTask->info.triggerParam, pTask, streamTimer); pTask->schedInfo.status = TASK_TRIGGER_STATUS__INACTIVE; } @@ -209,8 +214,9 @@ int32_t streamTaskEnqueueRetrieve(SStreamTask* pTask, SStreamRetrieveReq* pReq, } int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pRsp) { - int32_t status = 0; - const char* id = pTask->id.idStr; + int32_t status = 0; + SStreamMeta* pMeta = pTask->pMeta; + const char* id = pTask->id.idStr; stDebug("s-task:%s receive dispatch msg from taskId:0x%x(vgId:%d), msgLen:%" PRId64 ", msgId:%d", id, pReq->upstreamTaskId, pReq->upstreamNodeId, pReq->totalLen, pReq->msgId); @@ -218,7 +224,7 @@ int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, S SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, pReq->upstreamTaskId); ASSERT(pInfo != NULL); - if (pTask->pMeta->role == NODE_ROLE_FOLLOWER) { + if (pMeta->role == NODE_ROLE_FOLLOWER) { stError("s-task:%s task on follower received dispatch msgs, dispatch msg rejected", id); status = TASK_INPUT_STATUS__REFUSED; } else { @@ -239,6 +245,22 @@ int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, S atomic_add_fetch_32(&pTask->upstreamInfo.numOfClosed, 1); streamTaskCloseUpstreamInput(pTask, pReq->upstreamTaskId); stDebug("s-task:%s close inputQ for upstream:0x%x, msgId:%d", id, pReq->upstreamTaskId, pReq->msgId); + } else if (pReq->type == STREAM_INPUT__TRANS_STATE) { + atomic_add_fetch_32(&pTask->upstreamInfo.numOfClosed, 1); + streamTaskCloseUpstreamInput(pTask, pReq->upstreamTaskId); + + // disable the related stream task here to avoid it to receive the newly arrived data after the transfer-state + STaskId* pRelTaskId = &pTask->streamTaskId; + SStreamTask* pStreamTask = streamMetaAcquireTask(pMeta, pRelTaskId->streamId, pRelTaskId->taskId); + if (pStreamTask != NULL) { + atomic_add_fetch_32(&pStreamTask->upstreamInfo.numOfClosed, 1); + streamTaskCloseUpstreamInput(pStreamTask, pReq->upstreamRelTaskId); + streamMetaReleaseTask(pMeta, pStreamTask); + } + + stDebug("s-task:%s close inputQ for upstream:0x%x since trans-state msgId:%d recv, rel stream-task:0x%" PRIx64 + " close inputQ for upstream:0x%x", + id, pReq->upstreamTaskId, pReq->msgId, pTask->streamTaskId.taskId, pReq->upstreamRelTaskId); } status = streamTaskAppendInputBlocks(pTask, pReq); @@ -247,9 +269,9 @@ int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, S } // disable the data from upstream tasks - if (streamTaskGetStatus(pTask, NULL) == TASK_STATUS__HALT) { - status = TASK_INPUT_STATUS__BLOCKED; - } +// if (streamTaskGetStatus(pTask)->state == TASK_STATUS__HALT) { +// status = TASK_INPUT_STATUS__BLOCKED; +// } { // do send response with the input status @@ -290,6 +312,7 @@ void streamTaskOpenAllUpstreamInput(SStreamTask* pTask) { } pTask->upstreamInfo.numOfClosed = 0; + stDebug("s-task:%s opening up inputQ from upstream tasks", pTask->id.idStr); } void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId) { diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index b54adb0f96..1c9361fa61 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -187,7 +187,7 @@ int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBloc int32_t code = TSDB_CODE_SUCCESS; // set task status - if (streamTaskGetStatus(pTask, NULL) != TASK_STATUS__CK) { + if (streamTaskGetStatus(pTask)->state != TASK_STATUS__CK) { pTask->chkInfo.checkpointingId = checkpointId; code = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_GEN_CHECKPOINT); if (code != TSDB_CODE_SUCCESS) { @@ -288,10 +288,11 @@ void streamTaskClearCheckInfo(SStreamTask* pTask, bool clearChkpReadyMsg) { } int32_t streamSaveTaskCheckpointInfo(SStreamTask* p, int64_t checkpointId) { - SStreamMeta* pMeta = p->pMeta; - int32_t vgId = pMeta->vgId; - const char* id = p->id.idStr; - int32_t code = 0; + SStreamMeta* pMeta = p->pMeta; + int32_t vgId = pMeta->vgId; + const char* id = p->id.idStr; + int32_t code = 0; + SCheckpointInfo* pCKInfo = &p->chkInfo; if (p->info.fillHistory == 1) { return code; @@ -303,26 +304,33 @@ int32_t streamSaveTaskCheckpointInfo(SStreamTask* p, int64_t checkpointId) { taosThreadMutexLock(&p->lock); - ASSERT(p->chkInfo.checkpointId <= p->chkInfo.checkpointingId && p->chkInfo.checkpointingId == checkpointId && - p->chkInfo.checkpointVer <= p->chkInfo.processedVer); - p->chkInfo.checkpointId = p->chkInfo.checkpointingId; - p->chkInfo.checkpointVer = p->chkInfo.processedVer; + SStreamTaskState* pStatus = streamTaskGetStatus(p); + if (pStatus->state == TASK_STATUS__CK) { + ASSERT(pCKInfo->checkpointId <= pCKInfo->checkpointingId && pCKInfo->checkpointingId == checkpointId && + pCKInfo->checkpointVer <= pCKInfo->processedVer); - streamTaskClearCheckInfo(p, false); - char* str = NULL; - streamTaskGetStatus(p, &str); + pCKInfo->checkpointId = pCKInfo->checkpointingId; + pCKInfo->checkpointVer = pCKInfo->processedVer; - code = streamTaskHandleEvent(p->status.pSM, TASK_EVENT_CHECKPOINT_DONE); - taosThreadMutexUnlock(&p->lock); + streamTaskClearCheckInfo(p, false); + code = streamTaskHandleEvent(p->status.pSM, TASK_EVENT_CHECKPOINT_DONE); + taosThreadMutexUnlock(&p->lock); + } else { + stDebug("s-task:%s vgId:%d status:%s not keep the checkpoint metaInfo, checkpoint:%" PRId64 " failed", id, vgId, + pStatus->name, pCKInfo->checkpointingId); + taosThreadMutexUnlock(&p->lock); + + return TSDB_CODE_STREAM_TASK_IVLD_STATUS; + } if (code != TSDB_CODE_SUCCESS) { stDebug("s-task:%s vgId:%d handle event:checkpoint-done failed", id, vgId); - return -1; + return code; } stDebug("vgId:%d s-task:%s level:%d open upstream inputQ, save status after checkpoint, checkpointId:%" PRId64 ", Ver(saved):%" PRId64 " currentVer:%" PRId64 ", status: normal, prev:%s", - vgId, id, p->info.taskLevel, checkpointId, p->chkInfo.checkpointVer, p->chkInfo.nextProcessVer, str); + vgId, id, p->info.taskLevel, checkpointId, pCKInfo->checkpointVer, pCKInfo->nextProcessVer, pStatus->name); // save the task if not sink task if (p->info.taskLevel != TASK_LEVEL__SINK) { @@ -344,6 +352,7 @@ int32_t streamSaveTaskCheckpointInfo(SStreamTask* p, int64_t checkpointId) { streamMetaWUnLock(pMeta); } + return code; } @@ -475,7 +484,7 @@ int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) { code = streamSaveTaskCheckpointInfo(pTask, ckId); if (code != TSDB_CODE_SUCCESS) { stError("s-task:%s commit taskInfo failed, checkpoint:%" PRId64 " failed, code:%s", pTask->id.idStr, ckId, - tstrerror(terrno)); + tstrerror(code)); } else { code = streamTaskUploadChkp(pTask, ckId, (char*)pTask->id.idStr); if (code != 0) { diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index 00a8940b6a..e2c8704b1c 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -58,6 +58,7 @@ int32_t tEncodeStreamDispatchReq(SEncoder* pEncoder, const SStreamDispatchReq* p if (tEncodeI32(pEncoder, pReq->upstreamTaskId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->upstreamChildId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->upstreamNodeId) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->upstreamRelTaskId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->blockNum) < 0) return -1; if (tEncodeI64(pEncoder, pReq->totalLen) < 0) return -1; ASSERT(taosArrayGetSize(pReq->data) == pReq->blockNum); @@ -84,6 +85,7 @@ int32_t tDecodeStreamDispatchReq(SDecoder* pDecoder, SStreamDispatchReq* pReq) { if (tDecodeI32(pDecoder, &pReq->upstreamTaskId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->upstreamChildId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->upstreamNodeId) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->upstreamRelTaskId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->blockNum) < 0) return -1; if (tDecodeI64(pDecoder, &pReq->totalLen) < 0) return -1; @@ -114,6 +116,7 @@ static int32_t tInitStreamDispatchReq(SStreamDispatchReq* pReq, const SStreamTas pReq->upstreamTaskId = pTask->id.taskId; pReq->upstreamChildId = pTask->info.selfChildId; pReq->upstreamNodeId = pTask->info.nodeId; + pReq->upstreamRelTaskId = pTask->streamTaskId.taskId; pReq->blockNum = numOfBlocks; pReq->taskId = dstTaskId; pReq->type = type; @@ -696,10 +699,9 @@ int32_t streamDispatchScanHistoryFinishMsg(SStreamTask* pTask) { int32_t numOfVgs = taosArrayGetSize(vgInfo); pTask->notReadyTasks = numOfVgs; - char* p = NULL; - streamTaskGetStatus(pTask, &p); + SStreamTaskState* pState = streamTaskGetStatus(pTask); stDebug("s-task:%s send scan-history data complete msg to downstream (shuffle-dispatch) %d tasks, status:%s", - pTask->id.idStr, numOfVgs, p); + pTask->id.idStr, numOfVgs, pState->name); for (int32_t i = 0; i < numOfVgs; i++) { SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i); req.downstreamTaskId = pVgInfo->taskId; @@ -819,9 +821,9 @@ int32_t doDispatchScanHistoryFinishMsg(SStreamTask* pTask, const SStreamScanHist initRpcMsg(&msg, TDMT_VND_STREAM_SCAN_HISTORY_FINISH, buf, tlen + sizeof(SMsgHead)); tmsgSendReq(pEpSet, &msg); - char* p = NULL; - streamTaskGetStatus(pTask, &p); - stDebug("s-task:%s status:%s dispatch scan-history finish msg to taskId:0x%x (vgId:%d)", pTask->id.idStr, p, + + SStreamTaskState* pState = streamTaskGetStatus(pTask); + stDebug("s-task:%s status:%s dispatch scan-history finish msg to taskId:0x%x (vgId:%d)", pTask->id.idStr, pState->name, pReq->downstreamTaskId, vgId); return 0; } diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index b9839dfc0c..c69fe2a79c 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -24,12 +24,12 @@ static int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask); bool streamTaskShouldStop(const SStreamTask* pTask) { - ETaskStatus s = streamTaskGetStatus(pTask, NULL); - return (s == TASK_STATUS__STOP) || (s == TASK_STATUS__DROPPING); + SStreamTaskState* pState = streamTaskGetStatus(pTask); + return (pState->state == TASK_STATUS__STOP) || (pState->state == TASK_STATUS__DROPPING); } bool streamTaskShouldPause(const SStreamTask* pTask) { - return (streamTaskGetStatus(pTask, NULL) == TASK_STATUS__PAUSE); + return (streamTaskGetStatus(pTask)->state == TASK_STATUS__PAUSE); } static int32_t doOutputResultBlockImpl(SStreamTask* pTask, SStreamDataBlock* pBlock) { @@ -107,12 +107,6 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i return 0; } - if (pTask->inputq.status == TASK_INPUT_STATUS__BLOCKED) { - stWarn("s-task:%s downstream task inputQ blocked, idle for 1sec and retry exec task", pTask->id.idStr); - taosMsleep(1000); - continue; - } - SSDataBlock* output = NULL; uint64_t ts = 0; if ((code = qExecTask(pExecutor, &output, &ts)) < 0) { @@ -344,11 +338,14 @@ int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask) { streamMetaWUnLock(pMeta); return TSDB_CODE_STREAM_TASK_NOT_EXIST; } else { - stDebug("s-task:%s fill-history task end, update related stream task:%s info, transfer exec state", id, - pStreamTask->id.idStr); + double el = (taosGetTimestampMs() - pTask->execInfo.step2Start) / 1000.; + stDebug( + "s-task:%s fill-history task end, scal wal elapsed time:%.2fSec,update related stream task:%s info, transfer " + "exec state", + id, el, pStreamTask->id.idStr); } - ETaskStatus status = streamTaskGetStatus(pStreamTask, NULL); + ETaskStatus status = streamTaskGetStatus(pStreamTask)->state; STimeWindow* pTimeWindow = &pStreamTask->dataRange.window; // It must be halted for a source stream task, since when the related scan-history-data task start scan the history @@ -374,8 +371,9 @@ int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask) { // In case of sink tasks, no need to halt them. // In case of source tasks and agg tasks, we should HALT them, and wait for them to be idle. And then, it's safe to // start the task state transfer procedure. - char* p = NULL; - status = streamTaskGetStatus(pStreamTask, &p); + SStreamTaskState* pState = streamTaskGetStatus(pStreamTask); + status = pState->state; + char* p = pState->name; if (status == TASK_STATUS__STOP || status == TASK_STATUS__DROPPING) { stError("s-task:%s failed to transfer state from fill-history task:%s, status:%s", id, pStreamTask->id.idStr, p); streamMetaReleaseTask(pMeta, pStreamTask); @@ -393,8 +391,12 @@ int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask) { } // 1. expand the query time window for stream task of WAL scanner - pTimeWindow->skey = INT64_MIN; - qStreamInfoResetTimewindowFilter(pStreamTask->exec.pExecutor); + if (pStreamTask->info.taskLevel == TASK_LEVEL__SOURCE) { + pTimeWindow->skey = INT64_MIN; + qStreamInfoResetTimewindowFilter(pStreamTask->exec.pExecutor); + } else { + stDebug("s-task:%s non-source task no need to reset filter window", pStreamTask->id.idStr); + } // 2. transfer the ownership of executor state streamTaskReleaseState(pTask); @@ -408,10 +410,13 @@ int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask) { // 4. free it and remove fill-history task from disk meta-store streamBuildAndSendDropTaskMsg(pTask->pMsgCb, pMeta->vgId, &pTask->id); - // 5. save to disk - pStreamTask->status.taskStatus = streamTaskGetStatus(pStreamTask, NULL); + // 5. assign the status to the value that will be kept in disk + pStreamTask->status.taskStatus = streamTaskGetStatus(pStreamTask)->state; - // 6. add empty delete block + // 6. open the inputQ for all upstream tasks + streamTaskOpenAllUpstreamInput(pStreamTask); + + // 7. add empty delete block if ((pStreamTask->info.taskLevel == TASK_LEVEL__SOURCE) && taosQueueEmpty(pStreamTask->inputq.queue->pQueue)) { SStreamRefDataBlock* pItem = taosAllocateQitem(sizeof(SStreamRefDataBlock), DEF_QITEM, 0); @@ -431,6 +436,8 @@ int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask) { int32_t streamTransferStateToStreamTask(SStreamTask* pTask) { int32_t code = TSDB_CODE_SUCCESS; + SStreamMeta* pMeta = pTask->pMeta; + ASSERT(pTask->status.appendTranstateBlock == 1); int32_t level = pTask->info.taskLevel; @@ -440,8 +447,14 @@ int32_t streamTransferStateToStreamTask(SStreamTask* pTask) { if (level == TASK_LEVEL__AGG || level == TASK_LEVEL__SOURCE) { // do transfer task operator states. code = streamDoTransferStateToStreamTask(pTask); - } else { // drop fill-history task - streamBuildAndSendDropTaskMsg(pTask->pMsgCb, pTask->pMeta->vgId, &pTask->id); + } else { // drop fill-history task and open inputQ of sink task + SStreamTask* pStreamTask = streamMetaAcquireTask(pMeta, pTask->streamTaskId.streamId, pTask->streamTaskId.taskId); + if (pStreamTask != NULL) { + streamTaskOpenAllUpstreamInput(pStreamTask); + streamMetaReleaseTask(pMeta, pStreamTask); + } + + streamBuildAndSendDropTaskMsg(pTask->pMsgCb, pMeta->vgId, &pTask->id); } return code; @@ -497,16 +510,17 @@ static void doSetStreamInputBlock(SStreamTask* pTask, const void* pInput, int64_ } } -int32_t streamProcessTranstateBlock(SStreamTask* pTask, SStreamDataBlock* pBlock) { +int32_t streamProcessTransstateBlock(SStreamTask* pTask, SStreamDataBlock* pBlock) { const char* id = pTask->id.idStr; int32_t code = TSDB_CODE_SUCCESS; + int32_t level = pTask->info.taskLevel; - int32_t level = pTask->info.taskLevel; if (level == TASK_LEVEL__AGG || level == TASK_LEVEL__SINK) { int32_t remain = streamAlignTransferState(pTask); + if (remain > 0) { streamFreeQitem((SStreamQueueItem*)pBlock); - stDebug("s-task:%s receive upstream transfer state msg, remain:%d", id, remain); + stDebug("s-task:%s receive upstream trans-state msg, not sent remain:%d", id, remain); return 0; } } @@ -537,7 +551,7 @@ int32_t streamProcessTranstateBlock(SStreamTask* pTask, SStreamDataBlock* pBlock } } else { // non-dispatch task, do task state transfer directly streamFreeQitem((SStreamQueueItem*)pBlock); - stDebug("s-task:%s non-dispatch task, level:%d start to transfer state directly", id, pTask->info.taskLevel); + stDebug("s-task:%s non-dispatch task, level:%d start to transfer state directly", id, level); ASSERT(pTask->info.fillHistory == 1); code = streamTransferStateToStreamTask(pTask); @@ -549,6 +563,10 @@ int32_t streamProcessTranstateBlock(SStreamTask* pTask, SStreamDataBlock* pBlock return code; } +static void setTaskSchedInfo(SStreamTask* pTask, int32_t idleTime) { pTask->status.schedIdleTime = idleTime; } +static void clearTaskSchedInfo(SStreamTask* pTask) { pTask->status.schedIdleTime = 0; } +static void setLastExecTs(SStreamTask* pTask, int64_t ts) { pTask->status.lastExecTs = ts; } + /** * todo: the batch of blocks should be tuned dynamic, according to the total elapsed time of each batch of blocks, the * appropriate batch of blocks should be handled in 5 to 10 sec. @@ -563,9 +581,28 @@ int32_t doStreamExecTask(SStreamTask* pTask) { int32_t blockSize = 0; int32_t numOfBlocks = 0; SStreamQueueItem* pInput = NULL; - if (streamTaskShouldStop(pTask) || (streamTaskGetStatus(pTask, NULL) == TASK_STATUS__UNINIT)) { + + if (streamTaskShouldStop(pTask) || (streamTaskGetStatus(pTask)->state == TASK_STATUS__UNINIT)) { stDebug("s-task:%s stream task is stopped", id); - break; + return 0; + } + + if (streamQueueIsFull(pTask->outputq.queue)) { + stWarn("s-task:%s outputQ is full, idle for 500ms and retry", id); + setTaskSchedInfo(pTask, 500); + return 0; + } + + if (pTask->inputq.status == TASK_INPUT_STATUS__BLOCKED) { + stWarn("s-task:%s downstream task inputQ blocked, idle for 1sec and retry", id); + setTaskSchedInfo(pTask, 1000); + return 0; + } + + if (taosGetTimestampMs() - pTask->status.lastExecTs < 50) { + stDebug("s-task:%s invoke with high frequency, idle and retry exec in 50ms", id); + setTaskSchedInfo(pTask, 50); + return 0; } /*int32_t code = */ streamTaskGetDataFromInputQ(pTask, &pInput, &numOfBlocks, &blockSize); @@ -574,16 +611,15 @@ int32_t doStreamExecTask(SStreamTask* pTask) { return 0; } - int32_t type = pInput->type; - // dispatch checkpoint msg to all downstream tasks + int32_t type = pInput->type; if (type == STREAM_INPUT__CHECKPOINT_TRIGGER) { streamProcessCheckpointBlock(pTask, (SStreamDataBlock*)pInput); continue; } - if (pInput->type == STREAM_INPUT__TRANS_STATE) { - streamProcessTranstateBlock(pTask, (SStreamDataBlock*)pInput); + if (type == STREAM_INPUT__TRANS_STATE) { + streamProcessTransstateBlock(pTask, (SStreamDataBlock*)pInput); continue; } @@ -623,7 +659,7 @@ int32_t doStreamExecTask(SStreamTask* pTask) { if (ver != pInfo->processedVer) { stDebug("s-task:%s update processedVer(unsaved) from %" PRId64 " to %" PRId64 " nextProcessVer:%" PRId64 " ckpt:%" PRId64, - pTask->id.idStr, pInfo->processedVer, ver, pInfo->nextProcessVer, pInfo->checkpointVer); + id, pInfo->processedVer, ver, pInfo->nextProcessVer, pInfo->checkpointVer); pInfo->processedVer = ver; } @@ -634,10 +670,9 @@ int32_t doStreamExecTask(SStreamTask* pTask) { if (type == STREAM_INPUT__CHECKPOINT) { // todo add lock - char* p = NULL; - ETaskStatus s = streamTaskGetStatus(pTask, &p); - if (s == TASK_STATUS__CK) { - stDebug("s-task:%s checkpoint block received, set status:%s", pTask->id.idStr, p); + SStreamTaskState* pState = streamTaskGetStatus(pTask); + if (pState->state == TASK_STATUS__CK) { + stDebug("s-task:%s checkpoint block received, set status:%s", id, pState->name); streamTaskBuildCheckpoint(pTask); } else { // todo refactor @@ -650,8 +685,8 @@ int32_t doStreamExecTask(SStreamTask* pTask) { if (code != TSDB_CODE_SUCCESS) { // todo: let's retry send rsp to upstream/mnode - stError("s-task:%s failed to send checkpoint rsp to upstream, checkpointId:%d, code:%s", pTask->id.idStr, - 0, tstrerror(code)); + stError("s-task:%s failed to send checkpoint rsp to upstream, checkpointId:%d, code:%s", id, 0, + tstrerror(code)); } } @@ -665,15 +700,117 @@ int32_t doStreamExecTask(SStreamTask* pTask) { // the task may be set dropping/stopping, while it is still in the task queue, therefore, the sched-status can not // be updated by tryExec function, therefore, the schedStatus will always be the TASK_SCHED_STATUS__WAITING. bool streamTaskIsIdle(const SStreamTask* pTask) { - ETaskStatus status = streamTaskGetStatus(pTask, NULL); + ETaskStatus status = streamTaskGetStatus(pTask)->state; return (pTask->status.schedStatus == TASK_SCHED_STATUS__INACTIVE || status == TASK_STATUS__STOP || status == TASK_STATUS__DROPPING); } bool streamTaskReadyToRun(const SStreamTask* pTask, char** pStatus) { - ETaskStatus st = streamTaskGetStatus(pTask, NULL); - return (st == TASK_STATUS__READY || st == TASK_STATUS__SCAN_HISTORY/* || st == TASK_STATUS__STREAM_SCAN_HISTORY*/ || - st == TASK_STATUS__CK); + SStreamTaskState* pState = streamTaskGetStatus(pTask); + + ETaskStatus st = pState->state; + if (pStatus != NULL) { + *pStatus = pState->name; + } + + return (st == TASK_STATUS__READY || st == TASK_STATUS__SCAN_HISTORY || st == TASK_STATUS__CK); +} + +static void doStreamExecTaskHelper(void* param, void* tmrId) { + SStreamTask* pTask = (SStreamTask*)param; + + SStreamTaskState* p = streamTaskGetStatus(pTask); + if (p->state == TASK_STATUS__DROPPING || p->state == TASK_STATUS__STOP) { + streamTaskSetSchedStatusInactive(pTask); + + int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); + stDebug("s-task:%s status:%s not resume task, ref:%d", pTask->id.idStr, p->name, ref); + + streamMetaReleaseTask(pTask->pMeta, pTask); + return; + } + + // task resume running + SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq)); + if (pRunReq == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + /*int8_t status = */streamTaskSetSchedStatusInactive(pTask); + + int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); + stError("failed to create msg to resume s-task:%s, reason out of memory, ref:%d", pTask->id.idStr, ref); + + streamMetaReleaseTask(pTask->pMeta, pTask); + return; + } + + pRunReq->head.vgId = pTask->info.nodeId; + pRunReq->streamId = pTask->id.streamId; + pRunReq->taskId = pTask->id.taskId; + pRunReq->reqType = STREAM_EXEC_T_RESUME_TASK; + + int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); + stDebug("trigger to resume s-task:%s after being idled for %dms, ref:%d", pTask->id.idStr, pTask->status.schedIdleTime, ref); + + SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)}; + tmsgPutToQueue(pTask->pMsgCb, STREAM_QUEUE, &msg); + + // release the task ref count + clearTaskSchedInfo(pTask); + streamMetaReleaseTask(pTask->pMeta, pTask); +} + +static int32_t schedTaskInFuture(SStreamTask* pTask) { + int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1); + stDebug("s-task:%s task should idle, add into timer to retry in %dms, ref:%d", pTask->id.idStr, + pTask->status.schedIdleTime, ref); + + // add one ref count for task + // todo this may be failed, and add ref may be failed. + SStreamTask* pAddRefTask = streamMetaAcquireTask(pTask->pMeta, pTask->id.streamId, pTask->id.taskId); + + if (pTask->schedInfo.pIdleTimer == NULL) { + pTask->schedInfo.pIdleTimer = taosTmrStart(doStreamExecTaskHelper, pTask->status.schedIdleTime, pTask, streamTimer); + } else { + taosTmrReset(doStreamExecTaskHelper, pTask->status.schedIdleTime, pTask, streamTimer, &pTask->schedInfo.pIdleTimer); + } + + return TSDB_CODE_SUCCESS; +} + +int32_t streamResumeTask(SStreamTask* pTask) { + ASSERT(pTask->status.schedStatus == TASK_SCHED_STATUS__ACTIVE); + const char* id = pTask->id.idStr; + + while (1) { + /*int32_t code = */doStreamExecTask(pTask); + taosThreadMutexLock(&pTask->lock); + + // check if this task needs to be idle for a while + if (pTask->status.schedIdleTime > 0) { + schedTaskInFuture(pTask); + + taosThreadMutexUnlock(&pTask->lock); + setLastExecTs(pTask, taosGetTimestampMs()); + return 0; + } else { + int32_t numOfItems = streamQueueGetNumOfItems(pTask->inputq.queue); + + if ((numOfItems == 0) || streamTaskShouldStop(pTask) || streamTaskShouldPause(pTask)) { + atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); + taosThreadMutexUnlock(&pTask->lock); + + setLastExecTs(pTask, taosGetTimestampMs()); + + char* p = streamTaskGetStatus(pTask)->name; + stDebug("s-task:%s exec completed, status:%s, sched-status:%d, lastExecTs:%" PRId64, id, p, + pTask->status.schedStatus, pTask->status.lastExecTs); + + return 0; + } + } + + taosThreadMutexUnlock(&pTask->lock); + } } int32_t streamExecTask(SStreamTask* pTask) { @@ -682,29 +819,9 @@ int32_t streamExecTask(SStreamTask* pTask) { int8_t schedStatus = streamTaskSetSchedStatusActive(pTask); if (schedStatus == TASK_SCHED_STATUS__WAITING) { - while (1) { - int32_t code = doStreamExecTask(pTask); - if (code < 0) { // todo this status should be removed - atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__FAILED); - return -1; - } - - taosThreadMutexLock(&pTask->lock); - if ((streamQueueGetNumOfItems(pTask->inputq.queue) == 0) || streamTaskShouldStop(pTask) || - streamTaskShouldPause(pTask)) { - atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); - taosThreadMutexUnlock(&pTask->lock); - - char* p = NULL; - streamTaskGetStatus(pTask, &p); - stDebug("s-task:%s exec completed, status:%s, sched-status:%d", id, p, pTask->status.schedStatus); - return 0; - } - taosThreadMutexUnlock(&pTask->lock); - } + streamResumeTask(pTask); } else { - char* p = NULL; - streamTaskGetStatus(pTask, &p); + char* p = streamTaskGetStatus(pTask)->name; stDebug("s-task:%s already started to exec by other thread, status:%s, sched-status:%d", id, p, pTask->status.schedStatus); } diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 4e30dc2555..68348f462f 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -61,9 +61,10 @@ static void streamMetaEnvInit() { streamMetaId = taosOpenRef(64, streamMetaCloseImpl); metaRefMgtInit(); + streamTimerInit(); } -void streamMetaInit() { taosThreadOnce(&streamMetaModuleInit, streamMetaEnvInit); } +void streamMetaInit() { taosThreadOnce(&streamMetaModuleInit, streamMetaEnvInit);} void streamMetaCleanup() { taosCloseRef(streamBackendId); @@ -71,6 +72,7 @@ void streamMetaCleanup() { taosCloseRef(streamMetaId); metaRefMgtCleanup(); + streamTimerCleanUp(); } void metaRefMgtInit() { @@ -359,7 +361,7 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF goto _err; } - pMeta->walScanCounter = 0; + pMeta->scanInfo.scanCounter = 0; pMeta->vgId = vgId; pMeta->ahandle = ahandle; pMeta->expandFunc = expandFunc; @@ -452,7 +454,7 @@ void streamMetaClear(SStreamMeta* pMeta) { // release the ref by timer if (p->info.triggerParam != 0 && p->info.fillHistory == 0) { // one more ref in timer stDebug("s-task:%s stop schedTimer, and (before) desc ref:%d", p->id.idStr, p->refCnt); - taosTmrStop(p->schedInfo.pTimer); + taosTmrStop(p->schedInfo.pDelayTimer); p->info.triggerParam = 0; streamMetaReleaseTask(pMeta, p); } @@ -467,6 +469,7 @@ void streamMetaClear(SStreamMeta* pMeta) { taosArrayClear(pMeta->pTaskList); taosArrayClear(pMeta->chkpSaved); taosArrayClear(pMeta->chkpInUse); + pMeta->numOfStreamTasks = 0; pMeta->numOfPausedTasks = 0; @@ -730,7 +733,7 @@ int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t t if (pTask->info.triggerParam != 0 && pTask->info.fillHistory == 0) { stDebug("s-task:%s stop schedTimer, and (before) desc ref:%d", pTask->id.idStr, pTask->refCnt); - taosTmrStop(pTask->schedInfo.pTimer); + taosTmrStop(pTask->schedInfo.pDelayTimer); pTask->info.triggerParam = 0; streamMetaReleaseTask(pMeta, pTask); } @@ -1093,13 +1096,13 @@ static int32_t metaHeartbeatToMnodeImpl(SStreamMeta* pMeta) { STaskStatusEntry entry = { .id = *pId, - .status = streamTaskGetStatus(*pTask, NULL), + .status = streamTaskGetStatus(*pTask)->state, .nodeId = hbMsg.vgId, .stage = pMeta->stage, .inputQUsed = SIZE_IN_MiB(streamQueueGetItemSize((*pTask)->inputq.queue)), }; - entry.inputRate = entry.inputQUsed * 100.0 / STREAM_TASK_QUEUE_CAPACITY_IN_SIZE; + entry.inputRate = entry.inputQUsed * 100.0 / (2*STREAM_TASK_QUEUE_CAPACITY_IN_SIZE); if ((*pTask)->info.taskLevel == TASK_LEVEL__SINK) { entry.sinkQuota = (*pTask)->outputInfo.pTokenBucket->quotaRate; entry.sinkDataSize = SIZE_IN_MiB((*pTask)->execInfo.sink.dataSize); @@ -1262,11 +1265,11 @@ void streamMetaNotifyClose(SStreamMeta* pMeta) { // wait for the stream meta hb function stopping if (pMeta->role == NODE_ROLE_LEADER) { - pMeta->pHbInfo->stopFlag = STREAM_META_WILL_STOP; - while (pMeta->pHbInfo->stopFlag != STREAM_META_OK_TO_STOP) { - taosMsleep(100); - stDebug("vgId:%d wait for meta to stop timer", pMeta->vgId); - } +// pMeta->pHbInfo->stopFlag = STREAM_META_WILL_STOP; +// while (pMeta->pHbInfo->stopFlag != STREAM_META_OK_TO_STOP) { +// taosMsleep(100); +// stDebug("vgId:%d wait for meta to stop timer", pMeta->vgId); +// } } stDebug("vgId:%d start to check all tasks", vgId); @@ -1362,13 +1365,13 @@ SArray* streamMetaSendMsgBeforeCloseTasks(SStreamMeta* pMeta) { } taosThreadMutexLock(&pTask->lock); - char* p = NULL; - ETaskStatus s = streamTaskGetStatus(pTask, &p); - if (s == TASK_STATUS__CK) { + + SStreamTaskState* pState = streamTaskGetStatus(pTask); + if (pState->state == TASK_STATUS__CK) { streamTaskSetCheckpointFailedId(pTask); stDebug("s-task:%s mark the checkpoint:%"PRId64" failed", pTask->id.idStr, pTask->chkInfo.checkpointingId); } else { - stDebug("s-task:%s status:%s not reset the checkpoint", pTask->id.idStr, p); + stDebug("s-task:%s status:%s not reset the checkpoint", pTask->id.idStr, pState->name); } taosThreadMutexUnlock(&pTask->lock); @@ -1467,9 +1470,12 @@ int32_t streamMetaStartAllTasks(SStreamMeta* pMeta) { continue; } + // todo: may be we should find the related fill-history task and set it failed. + // fill-history task can only be launched by related stream tasks. STaskExecStatisInfo* pInfo = &pTask->execInfo; if (pTask->info.fillHistory == 1) { + stDebug("s-task:%s fill-history task wait related stream task start", pTask->id.idStr); streamMetaReleaseTask(pMeta, pTask); continue; } @@ -1486,13 +1492,16 @@ int32_t streamMetaStartAllTasks(SStreamMeta* pMeta) { continue; } - EStreamTaskEvent event = /*(HAS_RELATED_FILLHISTORY_TASK(pTask)) ? TASK_EVENT_INIT_STREAM_SCANHIST : */TASK_EVENT_INIT; - int32_t ret = streamTaskHandleEvent(pTask->status.pSM, event); + int32_t ret = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_INIT); if (ret != TSDB_CODE_SUCCESS) { - stError("vgId:%d failed to handle event:%d", pMeta->vgId, event); + stError("vgId:%d failed to handle event:%d", pMeta->vgId, TASK_EVENT_INIT); code = ret; streamMetaUpdateTaskDownstreamStatus(pMeta, pTaskId->streamId, pTaskId->taskId, pInfo->init, pInfo->start, false); + if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { + STaskId* pId = &pTask->hTaskInfo.id; + streamMetaUpdateTaskDownstreamStatus(pMeta, pId->streamId, pId->taskId, pInfo->init, pInfo->start, false); + } } streamMetaReleaseTask(pMeta, pTask); @@ -1501,4 +1510,41 @@ int32_t streamMetaStartAllTasks(SStreamMeta* pMeta) { stInfo("vgId:%d start tasks completed", pMeta->vgId); taosArrayDestroy(pTaskList); return code; +} + +int32_t streamMetaStartOneTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId) { + int32_t vgId = pMeta->vgId; + stInfo("vgId:%d start to task:0x%x by checking downstream status", vgId, taskId); + + SStreamTask* pTask = streamMetaAcquireTask(pMeta, streamId, taskId); + if (pTask == NULL) { + stError("vgId:%d failed to acquire task:0x%x during start tasks", pMeta->vgId, taskId); + streamMetaUpdateTaskDownstreamStatus(pMeta, streamId, taskId, 0, taosGetTimestampMs(), false); + return TSDB_CODE_STREAM_TASK_IVLD_STATUS; + } + + // todo: may be we should find the related fill-history task and set it failed. + + // fill-history task can only be launched by related stream tasks. + STaskExecStatisInfo* pInfo = &pTask->execInfo; + if (pTask->info.fillHistory == 1) { + streamMetaReleaseTask(pMeta, pTask); + return TSDB_CODE_SUCCESS; + } + + ASSERT(pTask->status.downstreamReady == 0); + + int32_t ret = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_INIT); + if (ret != TSDB_CODE_SUCCESS) { + stError("vgId:%d failed to handle event:%d", pMeta->vgId, TASK_EVENT_INIT); + + streamMetaUpdateTaskDownstreamStatus(pMeta, streamId, taskId, pInfo->init, pInfo->start, false); + if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { + STaskId* pId = &pTask->hTaskInfo.id; + streamMetaUpdateTaskDownstreamStatus(pMeta, pId->streamId, pId->taskId, pInfo->init, pInfo->start, false); + } + } + + streamMetaReleaseTask(pMeta, pTask); + return ret; } \ No newline at end of file diff --git a/source/libs/stream/src/streamQueue.c b/source/libs/stream/src/streamQueue.c index d1610362f9..90823cd937 100644 --- a/source/libs/stream/src/streamQueue.c +++ b/source/libs/stream/src/streamQueue.c @@ -17,8 +17,7 @@ #define MAX_STREAM_EXEC_BATCH_NUM 32 #define MAX_SMOOTH_BURST_RATIO 5 // 5 sec -#define WAIT_FOR_DURATION 40 -#define OUTPUT_QUEUE_FULL_WAIT_DURATION 500 // 500 ms +#define WAIT_FOR_DURATION 10 // todo refactor: // read data from input queue @@ -148,8 +147,6 @@ const char* streamQueueItemGetTypeStr(int32_t type) { int32_t streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInput, int32_t* numOfBlocks, int32_t* blockSize) { - int32_t retryTimes = 0; - int32_t MAX_RETRY_TIMES = 5; const char* id = pTask->id.idStr; int32_t taskLevel = pTask->info.taskLevel; @@ -157,10 +154,10 @@ int32_t streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInpu *numOfBlocks = 0; *blockSize = 0; + // todo remove it // no available token in bucket for sink task, let's wait for a little bit if (taskLevel == TASK_LEVEL__SINK && (!streamTaskExtractAvailableToken(pTask->outputInfo.pTokenBucket, pTask->id.idStr))) { stDebug("s-task:%s no available token in bucket for sink data, wait for 10ms", id); - taosMsleep(10); return TSDB_CODE_SUCCESS; } @@ -172,10 +169,6 @@ int32_t streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInpu SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputq.queue); if (qItem == NULL) { - if ((taskLevel == TASK_LEVEL__SOURCE || taskLevel == TASK_LEVEL__SINK) && (++retryTimes) < MAX_RETRY_TIMES) { - taosMsleep(WAIT_FOR_DURATION); - continue; - } // restore the token to bucket if (*numOfBlocks > 0) { @@ -342,23 +335,6 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) // the result should be put into the outputQ in any cases, the result may be lost otherwise. int32_t streamTaskPutDataIntoOutputQ(SStreamTask* pTask, SStreamDataBlock* pBlock) { STaosQueue* pQueue = pTask->outputq.queue->pQueue; - - // wait for the output queue is available for new data to dispatch - while (streamQueueIsFull(pTask->outputq.queue)) { - if (streamTaskShouldStop(pTask)) { - stInfo("s-task:%s discard result block due to task stop", pTask->id.idStr); - return TSDB_CODE_STREAM_EXEC_CANCELLED; - } - - int32_t total = streamQueueGetNumOfItems(pTask->outputq.queue); - double size = SIZE_IN_MiB(taosQueueMemorySize(pQueue)); - // let's wait for there are enough space to hold this result pBlock - stDebug("s-task:%s outputQ is full, wait for %dms and retry, outputQ items:%d, size:%.2fMiB", pTask->id.idStr, - OUTPUT_QUEUE_FULL_WAIT_DURATION, total, size); - - taosMsleep(OUTPUT_QUEUE_FULL_WAIT_DURATION); - } - int32_t code = taosWriteQitem(pQueue, pBlock); int32_t total = streamQueueGetNumOfItems(pTask->outputq.queue); @@ -367,7 +343,14 @@ int32_t streamTaskPutDataIntoOutputQ(SStreamTask* pTask, SStreamDataBlock* pBloc stError("s-task:%s failed to put res into outputQ, outputQ items:%d, size:%.2fMiB code:%s, result lost", pTask->id.idStr, total + 1, size, tstrerror(code)); } else { - stDebug("s-task:%s data put into outputQ, outputQ items:%d, size:%.2fMiB", pTask->id.idStr, total, size); + if (streamQueueIsFull(pTask->outputq.queue)) { + stWarn( + "s-task:%s outputQ is full(outputQ items:%d, size:%.2fMiB), set the output status BLOCKING, wait for 500ms " + "after handle this batch of blocks", + pTask->id.idStr, total, size); + } else { + stDebug("s-task:%s data put into outputQ, outputQ items:%d, size:%.2fMiB", pTask->id.idStr, total, size); + } } return TSDB_CODE_SUCCESS; diff --git a/source/libs/stream/src/streamStart.c b/source/libs/stream/src/streamStart.c index 70f83e27a5..24909a9776 100644 --- a/source/libs/stream/src/streamStart.c +++ b/source/libs/stream/src/streamStart.c @@ -48,15 +48,13 @@ static void tryLaunchHistoryTask(void* param, void* tmrId); static void doProcessDownstreamReadyRsp(SStreamTask* pTask); int32_t streamTaskSetReady(SStreamTask* pTask) { - char* p = NULL; - int32_t numOfDowns = streamTaskGetNumOfDownstream(pTask); - ETaskStatus status = streamTaskGetStatus(pTask, &p); + int32_t numOfDowns = streamTaskGetNumOfDownstream(pTask); + SStreamTaskState* p = streamTaskGetStatus(pTask); - if ((status == TASK_STATUS__SCAN_HISTORY/* || status == TASK_STATUS__STREAM_SCAN_HISTORY*/) && - pTask->info.taskLevel != TASK_LEVEL__SOURCE) { + if ((p->state == TASK_STATUS__SCAN_HISTORY) && pTask->info.taskLevel != TASK_LEVEL__SOURCE) { pTask->numOfWaitingUpstream = taosArrayGetSize(pTask->upstreamInfo.pList); stDebug("s-task:%s level:%d task wait for %d upstream tasks complete scan-history procedure, status:%s", - pTask->id.idStr, pTask->info.taskLevel, pTask->numOfWaitingUpstream, p); + pTask->id.idStr, pTask->info.taskLevel, pTask->numOfWaitingUpstream, p->name); } ASSERT(pTask->status.downstreamReady == 0); @@ -65,7 +63,7 @@ int32_t streamTaskSetReady(SStreamTask* pTask) { pTask->execInfo.start = taosGetTimestampMs(); int64_t el = (pTask->execInfo.start - pTask->execInfo.init); stDebug("s-task:%s all %d downstream ready, init completed, elapsed time:%" PRId64 "ms, task status:%s", - pTask->id.idStr, numOfDowns, el, p); + pTask->id.idStr, numOfDowns, el, p->name); return TSDB_CODE_SUCCESS; } @@ -93,12 +91,11 @@ static void doReExecScanhistory(void* param, void* tmrId) { SStreamTask* pTask = param; pTask->schedHistoryInfo.numOfTicks -= 1; - char* p = NULL; - ETaskStatus status = streamTaskGetStatus(pTask, &p); - if (status == TASK_STATUS__DROPPING || status == TASK_STATUS__STOP) { + SStreamTaskState* p = streamTaskGetStatus(pTask); + if (p->state == TASK_STATUS__DROPPING || p->state == TASK_STATUS__STOP) { streamMetaReleaseTask(pTask->pMeta, pTask); int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); - stDebug("s-task:%s status:%s not start scan-history again, ref:%d", pTask->id.idStr, p, ref); + stDebug("s-task:%s status:%s not start scan-history again, ref:%d", pTask->id.idStr, p->name, ref); return; } @@ -155,10 +152,9 @@ static int32_t doStartScanHistoryTask(SStreamTask* pTask) { int32_t streamTaskStartScanHistory(SStreamTask* pTask) { int32_t level = pTask->info.taskLevel; - ETaskStatus status = streamTaskGetStatus(pTask, NULL); + ETaskStatus status = streamTaskGetStatus(pTask)->state; - ASSERT(pTask->status.downstreamReady == 1 && - ((status == TASK_STATUS__SCAN_HISTORY)/* || (status == TASK_STATUS__STREAM_SCAN_HISTORY)*/)); + ASSERT((pTask->status.downstreamReady == 1) && (status == TASK_STATUS__SCAN_HISTORY)); if (level == TASK_LEVEL__SOURCE) { return doStartScanHistoryTask(pTask); @@ -315,7 +311,7 @@ int32_t streamTaskCheckStatus(SStreamTask* pTask, int32_t upstreamTaskId, int32_ id, upstreamTaskId, vgId, stage, pInfo->stage); // record the checkpoint failure id and sent to mnode taosThreadMutexLock(&pTask->lock); - ETaskStatus status = streamTaskGetStatus(pTask, NULL); + ETaskStatus status = streamTaskGetStatus(pTask)->state; if (status == TASK_STATUS__CK) { streamTaskSetCheckpointFailedId(pTask); } @@ -325,7 +321,7 @@ int32_t streamTaskCheckStatus(SStreamTask* pTask, int32_t upstreamTaskId, int32_ if (pInfo->stage != stage) { taosThreadMutexLock(&pTask->lock); - ETaskStatus status = streamTaskGetStatus(pTask, NULL); + ETaskStatus status = streamTaskGetStatus(pTask)->state; if (status == TASK_STATUS__CK) { streamTaskSetCheckpointFailedId(pTask); } @@ -346,20 +342,20 @@ int32_t streamTaskOnNormalTaskReady(SStreamTask* pTask) { streamTaskSetReady(pTask); streamTaskSetRangeStreamCalc(pTask); - char* p = NULL; - ETaskStatus status = streamTaskGetStatus(pTask, &p); - ASSERT(status == TASK_STATUS__READY); + SStreamTaskState* p = streamTaskGetStatus(pTask); + ASSERT(p->state == TASK_STATUS__READY); + int8_t schedStatus = pTask->status.schedStatus; if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { int64_t startVer = walReaderGetCurrentVer(pTask->exec.pWalReader); if (startVer == -1) { startVer = pTask->chkInfo.nextProcessVer; } - stDebug("s-task:%s no need to scan-history data, status:%s, sched-status:%d, ready for data from wal ver:%" PRId64, - id, p, pTask->status.schedStatus, startVer); + stDebug("s-task:%s status:%s, sched-status:%d, ready for data from wal ver:%" PRId64, id, p->name, schedStatus, + startVer); } else { - stDebug("s-task:%s level:%d status:%s sched-status:%d", id, pTask->info.taskLevel, p, pTask->status.schedStatus); + stDebug("s-task:%s level:%d status:%s sched-status:%d", id, pTask->info.taskLevel, p->name, schedStatus); } return TSDB_CODE_SUCCESS; @@ -372,15 +368,14 @@ int32_t streamTaskOnScanhistoryTaskReady(SStreamTask* pTask) { streamTaskSetReady(pTask); streamTaskSetRangeStreamCalc(pTask); - char* p = NULL; - ETaskStatus status = streamTaskGetStatus(pTask, &p); - ASSERT(status == TASK_STATUS__SCAN_HISTORY/* || status == TASK_STATUS__STREAM_SCAN_HISTORY*/); + SStreamTaskState* p = streamTaskGetStatus(pTask); + ASSERT(p->state == TASK_STATUS__SCAN_HISTORY); if (pTask->info.fillHistory == 1) { - stDebug("s-task:%s fill-history task enters into scan-history data stage, status:%s", id, p); + stDebug("s-task:%s fill-history task enters into scan-history data stage, status:%s", id, p->name); streamTaskStartScanHistory(pTask); } else { - stDebug("s-task:%s scan wal data, status:%s", id, p); + stDebug("s-task:%s scan wal data, status:%s", id, p->name); } // NOTE: there will be an deadlock if launch fill history here. @@ -393,12 +388,7 @@ int32_t streamTaskOnScanhistoryTaskReady(SStreamTask* pTask) { } void doProcessDownstreamReadyRsp(SStreamTask* pTask) { - EStreamTaskEvent event; - if (pTask->info.fillHistory == 0) { - event = /*HAS_RELATED_FILLHISTORY_TASK(pTask) ? TASK_EVENT_INIT_STREAM_SCANHIST : */TASK_EVENT_INIT; - } else { - event = TASK_EVENT_INIT_SCANHIST; - } + EStreamTaskEvent event = (pTask->info.fillHistory == 0) ? TASK_EVENT_INIT : TASK_EVENT_INIT_SCANHIST; streamTaskOnHandleEventSuccess(pTask->status.pSM, event); @@ -500,22 +490,14 @@ int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRs addIntoNodeUpdateList(pTask, pRsp->downstreamNodeId); } - streamMetaUpdateTaskDownstreamStatus(pTask->pMeta, pTask->id.streamId, pTask->id.taskId, pTask->execInfo.init, - taosGetTimestampMs(), false); + int32_t startTs = pTask->execInfo.init; + int64_t now = taosGetTimestampMs(); + streamMetaUpdateTaskDownstreamStatus(pTask->pMeta, pTask->id.streamId, pTask->id.taskId, startTs, now, false); // automatically set the related fill-history task to be failed. if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { STaskId* pId = &pTask->hTaskInfo.id; - int64_t current = taosGetTimestampMs(); - - SStreamTask* pHTask = streamMetaAcquireTask(pTask->pMeta, pId->streamId, pId->taskId); - if (pHTask != NULL) { - streamMetaUpdateTaskDownstreamStatus(pTask->pMeta, pId->streamId, pId->taskId, pHTask->execInfo.init, current, - false); - streamMetaReleaseTask(pTask->pMeta, pHTask); - } else { - streamMetaUpdateTaskDownstreamStatus(pTask->pMeta, pId->streamId, pId->taskId, 0, current, false); - } + streamMetaUpdateTaskDownstreamStatus(pTask->pMeta, pId->streamId, pId->taskId, startTs, now, false); } } else { // TASK_DOWNSTREAM_NOT_READY, let's retry in 100ms STaskRecheckInfo* pInfo = createRecheckInfo(pTask, pRsp); @@ -562,6 +544,15 @@ int32_t streamSetParamForScanHistory(SStreamTask* pTask) { return qSetStreamOperatorOptionForScanHistory(pTask->exec.pExecutor); } +int32_t streamResetParamForScanHistory(SStreamTask* pTask) { + stDebug("s-task:%s reset operator option for scan-history data", pTask->id.idStr); + if (pTask->exec.pExecutor != NULL) { + return qResetStreamOperatorOptionForScanHistory(pTask->exec.pExecutor); + } else { + return TSDB_CODE_SUCCESS; + } +} + int32_t streamRestoreParam(SStreamTask* pTask) { stDebug("s-task:%s restore operator param after scan-history", pTask->id.idStr); return qRestoreStreamOperatorOption(pTask->exec.pExecutor); @@ -632,12 +623,11 @@ int32_t streamProcessScanHistoryFinishReq(SStreamTask* pTask, SStreamScanHistory ASSERT(taskLevel == TASK_LEVEL__AGG || taskLevel == TASK_LEVEL__SINK); const char* id = pTask->id.idStr; - char* p = NULL; - ETaskStatus status = streamTaskGetStatus(pTask, &p); + SStreamTaskState* p = streamTaskGetStatus(pTask); - if (status != TASK_STATUS__SCAN_HISTORY /*&& status != TASK_STATUS__STREAM_SCAN_HISTORY*/) { - stError("s-task:%s not in scan-history status, status:%s return upstream:0x%x scan-history finish directly", id, p, - pReq->upstreamTaskId); + if (p->state != TASK_STATUS__SCAN_HISTORY) { + stError("s-task:%s not in scan-history status, status:%s return upstream:0x%x scan-history finish directly", id, + p->name, pReq->upstreamTaskId); void* pBuf = NULL; int32_t len = 0; @@ -690,7 +680,7 @@ int32_t streamProcessScanHistoryFinishReq(SStreamTask* pTask, SStreamScanHistory } int32_t streamProcessScanHistoryFinishRsp(SStreamTask* pTask) { - ETaskStatus status = streamTaskGetStatus(pTask, NULL); + ETaskStatus status = streamTaskGetStatus(pTask)->state; // task restart now, not handle the scan-history finish rsp if (status == TASK_STATUS__UNINIT) { @@ -749,8 +739,7 @@ static void tryLaunchHistoryTask(void* param, void* tmrId) { ASSERT((*ppTask)->status.timerActive >= 1); if (streamTaskShouldStop(*ppTask)) { - char* p = NULL; - streamTaskGetStatus((*ppTask), &p); + char* p = streamTaskGetStatus(*ppTask)->name; int32_t ref = atomic_sub_fetch_32(&(*ppTask)->status.timerActive, 1); stDebug("s-task:%s status:%s should stop, quit launch fill-history task timer, retry:%d, ref:%d", (*ppTask)->id.idStr, p, (*ppTask)->hTaskInfo.retryTimes, ref); @@ -789,10 +778,9 @@ static void tryLaunchHistoryTask(void* param, void* tmrId) { // abort the timer if intend to stop task SStreamTask* pHTask = streamMetaAcquireTask(pMeta, pHTaskInfo->id.streamId, pHTaskInfo->id.taskId); if (pHTask == NULL && (!streamTaskShouldStop(pTask))) { - char* p = NULL; + char* p = streamTaskGetStatus(pTask)->name; int32_t hTaskId = pHTaskInfo->id.taskId; - streamTaskGetStatus(pTask, &p); stDebug("s-task:%s status:%s failed to launch fill-history task:0x%x, retry launch:%dms, retryCount:%d", pTask->id.idStr, p, hTaskId, pHTaskInfo->waitInterval, pHTaskInfo->retryTimes); @@ -836,11 +824,23 @@ SLaunchHTaskInfo* createHTaskLaunchInfo(SStreamMeta* pMeta, int64_t streamId, in // an fill history task needs to be started. int32_t streamLaunchFillHistoryTask(SStreamTask* pTask) { SStreamMeta* pMeta = pTask->pMeta; - int32_t hTaskId = pTask->hTaskInfo.id.taskId; - ASSERT((hTaskId != 0) && (pTask->status.downstreamReady == 1)); - stDebug("s-task:%s start to launch related fill-history task:0x%" PRIx64 "-0x%x", pTask->id.idStr, - pTask->hTaskInfo.id.streamId, hTaskId); + int64_t streamId = pTask->hTaskInfo.id.streamId; + int32_t hTaskId = pTask->hTaskInfo.id.taskId; + ASSERT(hTaskId != 0); + + SStreamTaskState* pStatus = streamTaskGetStatus(pTask); + if (pStatus->state != TASK_STATUS__READY) { + STaskExecStatisInfo* pInfo = &pTask->execInfo; + stDebug("s-task:%s not launch related fill-history task:0x%" PRIx64 "-0x%x, status:%s", pTask->id.idStr, + pTask->hTaskInfo.id.streamId, hTaskId, pStatus->name); + + streamMetaUpdateTaskDownstreamStatus(pMeta, streamId, hTaskId, pInfo->init, pInfo->start, false); + return -1;// todo set the correct error code + } else { + stDebug("s-task:%s start to launch related fill-history task:0x%" PRIx64 "-0x%x", pTask->id.idStr, + pTask->hTaskInfo.id.streamId, hTaskId); + } // Set the execute conditions, including the query time window and the version range SStreamTask** pHTask = taosHashGet(pMeta->pTasksMap, &pTask->hTaskInfo.id, sizeof(pTask->hTaskInfo.id)); @@ -886,7 +886,7 @@ int32_t streamLaunchFillHistoryTask(SStreamTask* pTask) { } int32_t streamTaskScanHistoryDataComplete(SStreamTask* pTask) { - if (streamTaskGetStatus(pTask, NULL) == TASK_STATUS__DROPPING) { + if (streamTaskGetStatus(pTask)->state == TASK_STATUS__DROPPING) { return 0; } @@ -1029,6 +1029,11 @@ void streamTaskSetRangeStreamCalc(SStreamTask* pTask) { pTask->id.idStr, pRange->window.skey, pRange->window.ekey, pRange->range.minVer, pRange->range.maxVer); } } else { + ASSERT(pTask->info.fillHistory == 0); + if (pTask->info.taskLevel >= TASK_LEVEL__AGG) { + return; + } + int64_t ekey = 0; if (pRange->window.ekey < INT64_MAX) { ekey = pRange->window.ekey + 1; @@ -1043,10 +1048,13 @@ void streamTaskSetRangeStreamCalc(SStreamTask* pTask) { pRange->range.minVer = 0; pRange->range.maxVer = ver; - stDebug("s-task:%s level:%d related fill-history task exists, update stream calc time window:%" PRId64 " - %" PRId64 + stDebug("s-task:%s level:%d related fill-history task exists, set stream task timeWindow:%" PRId64 " - %" PRId64 ", verRang:%" PRId64 " - %" PRId64, - pTask->id.idStr, pTask->info.taskLevel, pRange->window.skey, pRange->window.ekey, pRange->range.minVer, - pRange->range.maxVer); + pTask->id.idStr, pTask->info.taskLevel, pRange->window.skey, pRange->window.ekey, ver, INT64_MAX); + + SVersionRange verRange = {.minVer = ver, .maxVer = INT64_MAX}; + STimeWindow win = pRange->window; + streamSetParamForStreamScannerStep2(pTask, &verRange, &win); } } @@ -1066,23 +1074,21 @@ void streamTaskPause(SStreamTask* pTask, SStreamMeta* pMeta) { } void streamTaskResume(SStreamTask* pTask) { - char* p = NULL; - ETaskStatus status = streamTaskGetStatus(pTask, &p); + SStreamTaskState prevState = *streamTaskGetStatus(pTask); SStreamMeta* pMeta = pTask->pMeta; - if (status == TASK_STATUS__PAUSE || status == TASK_STATUS__HALT) { + if (prevState.state == TASK_STATUS__PAUSE || prevState.state == TASK_STATUS__HALT) { streamTaskRestoreStatus(pTask); - char* pNew = NULL; - streamTaskGetStatus(pTask, &pNew); - if (status == TASK_STATUS__PAUSE) { + char* pNew = streamTaskGetStatus(pTask)->name; + if (prevState.state == TASK_STATUS__PAUSE) { int32_t num = atomic_sub_fetch_32(&pMeta->numOfPausedTasks, 1); - stInfo("s-task:%s status:%s resume from %s, paused task(s):%d", pTask->id.idStr, pNew, p, num); + stInfo("s-task:%s status:%s resume from %s, paused task(s):%d", pTask->id.idStr, pNew, prevState.name, num); } else { - stInfo("s-task:%s status:%s resume from %s", pTask->id.idStr, pNew, p); + stInfo("s-task:%s status:%s resume from %s", pTask->id.idStr, pNew, prevState.name); } } else { - stDebug("s-task:%s status:%s not in pause/halt status, no need to resume", pTask->id.idStr, p); + stDebug("s-task:%s status:%s not in pause/halt status, no need to resume", pTask->id.idStr, prevState.name); } } diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 4d343a484d..c14d355dc8 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -309,7 +309,9 @@ void tFreeStreamTask(SStreamTask* pTask) { ETaskStatus status1 = TASK_STATUS__UNINIT; taosThreadMutexLock(&pTask->lock); if (pTask->status.pSM != NULL) { - status1 = streamTaskGetStatus(pTask, &p); + SStreamTaskState* pStatus = streamTaskGetStatus(pTask); + p = pStatus->name; + status1 = pStatus->state; } taosThreadMutexUnlock(&pTask->lock); @@ -328,9 +330,9 @@ void tFreeStreamTask(SStreamTask* pTask) { taosMsleep(100); } - if (pTask->schedInfo.pTimer != NULL) { - taosTmrStop(pTask->schedInfo.pTimer); - pTask->schedInfo.pTimer = NULL; + if (pTask->schedInfo.pDelayTimer != NULL) { + taosTmrStop(pTask->schedInfo.pDelayTimer); + pTask->schedInfo.pDelayTimer = NULL; } if (pTask->hTaskInfo.pTimer != NULL) { diff --git a/source/libs/stream/src/streamTaskSm.c b/source/libs/stream/src/streamTaskSm.c index d785932109..f0dcc75c4c 100644 --- a/source/libs/stream/src/streamTaskSm.c +++ b/source/libs/stream/src/streamTaskSm.c @@ -38,7 +38,6 @@ SStreamEventInfo StreamTaskEventList[12] = { {.event = 0, .name = ""}, // dummy event, place holder {.event = TASK_EVENT_INIT, .name = "initialize"}, {.event = TASK_EVENT_INIT_SCANHIST, .name = "scan-history-init"}, -// {.event = TASK_EVENT_INIT_STREAM_SCANHIST, .name = "stream-scan-history-init"}, {.event = TASK_EVENT_SCANHIST_DONE, .name = "scan-history-completed"}, {.event = TASK_EVENT_STOP, .name = "stopping"}, {.event = TASK_EVENT_GEN_CHECKPOINT, .name = "checkpoint"}, @@ -65,8 +64,7 @@ static STaskStateTrans createStateTransform(ETaskStatus current, ETaskStatus nex static int32_t dummyFn(SStreamTask* UNUSED_PARAM(p)) { return TSDB_CODE_SUCCESS; } static int32_t attachEvent(SStreamTask* pTask, SAttachedEventInfo* pEvtInfo) { - char* p = NULL; - streamTaskGetStatus(pTask, &p); + char* p = streamTaskGetStatus(pTask)->name; stDebug("s-task:%s status:%s attach event:%s required status:%s, since not allowed to handle it", pTask->id.idStr, p, GET_EVT_NAME(pEvtInfo->event), StreamTaskStatusList[pEvtInfo->status].name); @@ -110,12 +108,12 @@ int32_t streamTaskKeepCurrentVerInWal(SStreamTask* pTask) { // todo check rsp code for handle Event:TASK_EVENT_SCANHIST_DONE static bool isInvalidStateTransfer(ETaskStatus state, const EStreamTaskEvent event) { - if (/*event == TASK_EVENT_INIT_STREAM_SCANHIST || */event == TASK_EVENT_INIT || event == TASK_EVENT_INIT_SCANHIST) { + if (event == TASK_EVENT_INIT || event == TASK_EVENT_INIT_SCANHIST) { return (state != TASK_STATUS__UNINIT); } if (event == TASK_EVENT_SCANHIST_DONE) { - return (state != TASK_STATUS__SCAN_HISTORY/* && state != TASK_STATUS__STREAM_SCAN_HISTORY*/); + return (state != TASK_STATUS__SCAN_HISTORY); } if (event == TASK_EVENT_GEN_CHECKPOINT) { @@ -275,7 +273,7 @@ static int32_t doHandleEvent(SStreamTaskSM* pSM, EStreamTaskEvent event, STaskSt while (1) { // wait for the task to be here taosThreadMutexLock(&pTask->lock); - ETaskStatus s = streamTaskGetStatus(pTask, NULL); + ETaskStatus s = streamTaskGetStatus(pTask)->state; taosThreadMutexUnlock(&pTask->lock); if ((s == pTrans->next.state) && (pSM->prev.evt == pTrans->event)) {// this event has been handled already @@ -400,12 +398,8 @@ int32_t streamTaskOnHandleEventSuccess(SStreamTaskSM* pSM, EStreamTaskEvent even return TSDB_CODE_SUCCESS; } -ETaskStatus streamTaskGetStatus(const SStreamTask* pTask, char** pStr) { - SStreamTaskState s = pTask->status.pSM->current; // copy one obj in case of multi-thread environment - if (pStr != NULL) { - *pStr = s.name; - } - return s.state; +SStreamTaskState* streamTaskGetStatus(const SStreamTask* pTask) { + return &pTask->status.pSM->current; // copy one obj in case of multi-thread environment } const char* streamTaskGetStatusStr(ETaskStatus status) { @@ -482,14 +476,10 @@ void doInitStateTransferTable(void) { taosArrayPush(streamTaskSMTrans, &trans); trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__SCAN_HISTORY, TASK_EVENT_INIT_SCANHIST, streamTaskInitStatus, streamTaskOnScanhistoryTaskReady, false, false); taosArrayPush(streamTaskSMTrans, &trans); -// trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__STREAM_SCAN_HISTORY, TASK_EVENT_INIT_STREAM_SCANHIST, streamTaskInitStatus, streamTaskOnScanhistoryTaskReady, false, false); -// taosArrayPush(streamTaskSMTrans, &trans); // scan-history related event trans = createStateTransform(TASK_STATUS__SCAN_HISTORY, TASK_STATUS__READY, TASK_EVENT_SCANHIST_DONE, NULL, NULL, NULL, true); taosArrayPush(streamTaskSMTrans, &trans); -// trans = createStateTransform(TASK_STATUS__STREAM_SCAN_HISTORY, TASK_STATUS__READY, TASK_EVENT_SCANHIST_DONE, NULL, NULL, NULL, true); -// taosArrayPush(streamTaskSMTrans, &trans); // halt stream task, from other task status trans = createStateTransform(TASK_STATUS__READY, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, NULL, true); @@ -499,8 +489,6 @@ void doInitStateTransferTable(void) { SAttachedEventInfo info = {.status = TASK_STATUS__READY, .event = TASK_EVENT_HALT}; -// trans = createStateTransform(TASK_STATUS__STREAM_SCAN_HISTORY, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, &info, true); -// taosArrayPush(streamTaskSMTrans, &trans); trans = createStateTransform(TASK_STATUS__CK, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, &info, true); taosArrayPush(streamTaskSMTrans, &trans); trans = createStateTransform(TASK_STATUS__PAUSE, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, NULL, true); @@ -519,8 +507,6 @@ void doInitStateTransferTable(void) { taosArrayPush(streamTaskSMTrans, &trans); info = (SAttachedEventInfo){.status = TASK_STATUS__READY, .event = TASK_EVENT_PAUSE}; -// trans = createStateTransform(TASK_STATUS__STREAM_SCAN_HISTORY, TASK_STATUS__PAUSE, TASK_EVENT_PAUSE, NULL, NULL, &info, true); -// taosArrayPush(streamTaskSMTrans, &trans); trans = createStateTransform(TASK_STATUS__CK, TASK_STATUS__PAUSE, TASK_EVENT_PAUSE, NULL, NULL, &info, true); taosArrayPush(streamTaskSMTrans, &trans); trans = createStateTransform(TASK_STATUS__HALT, TASK_STATUS__PAUSE, TASK_EVENT_PAUSE, NULL, NULL, &info, true); @@ -554,8 +540,6 @@ void doInitStateTransferTable(void) { taosArrayPush(streamTaskSMTrans, &trans); trans = createStateTransform(TASK_STATUS__CK, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, NULL, NULL, true); taosArrayPush(streamTaskSMTrans, &trans); -// trans = createStateTransform(TASK_STATUS__STREAM_SCAN_HISTORY, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, NULL, NULL, true); -// taosArrayPush(streamTaskSMTrans, &trans); // dropping related event trans = createStateTransform(TASK_STATUS__READY, TASK_STATUS__DROPPING, TASK_EVENT_DROPPING, NULL, NULL, NULL, true); @@ -574,7 +558,5 @@ void doInitStateTransferTable(void) { taosArrayPush(streamTaskSMTrans, &trans); trans = createStateTransform(TASK_STATUS__CK, TASK_STATUS__DROPPING, TASK_EVENT_DROPPING, streamTaskSendTransSuccessMsg, NULL, NULL, true); taosArrayPush(streamTaskSMTrans, &trans); -// trans = createStateTransform(TASK_STATUS__STREAM_SCAN_HISTORY, TASK_STATUS__DROPPING, TASK_EVENT_DROPPING, NULL, NULL, NULL, true); -// taosArrayPush(streamTaskSMTrans, &trans); } //clang-format on \ No newline at end of file diff --git a/source/libs/stream/src/tstreamFileState.c b/source/libs/stream/src/tstreamFileState.c index d31e4cbfcd..aac78cf03b 100644 --- a/source/libs/stream/src/tstreamFileState.c +++ b/source/libs/stream/src/tstreamFileState.c @@ -26,6 +26,7 @@ #define FLUSH_NUM 4 #define DEFAULT_MAX_STREAM_BUFFER_SIZE (128 * 1024 * 1024) #define MIN_NUM_OF_ROW_BUFF 10240 +#define MIN_NUM_OF_RECOVER_ROW_BUFF 128 #define TASK_KEY "streamFileState" #define STREAM_STATE_INFO_NAME "StreamStateCheckPoint" @@ -660,7 +661,7 @@ int32_t recoverSesssion(SStreamFileState* pFileState, int64_t ckId) { if (pCur == NULL) { return -1; } - int32_t recoverNum = TMIN(MIN_NUM_OF_ROW_BUFF, pFileState->maxRowCount); + int32_t recoverNum = TMIN(MIN_NUM_OF_RECOVER_ROW_BUFF, pFileState->maxRowCount); while (code == TSDB_CODE_SUCCESS) { if (pFileState->curRowCount >= recoverNum) { break; @@ -694,7 +695,7 @@ int32_t recoverSnapshot(SStreamFileState* pFileState, int64_t ckId) { if (pCur == NULL) { return -1; } - int32_t recoverNum = TMIN(MIN_NUM_OF_ROW_BUFF, pFileState->maxRowCount); + int32_t recoverNum = TMIN(MIN_NUM_OF_RECOVER_ROW_BUFF, pFileState->maxRowCount); while (code == TSDB_CODE_SUCCESS) { if (pFileState->curRowCount >= recoverNum) { break; diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index f0e457ef8d..578e6798e0 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -22,6 +22,7 @@ #include "syncRaftStore.h" #include "syncReplication.h" #include "syncUtil.h" +#include "tglobal.h" static SyncIndex syncNodeGetSnapBeginIndex(SSyncNode *ths); @@ -1186,7 +1187,7 @@ static int32_t syncSnapBufferSend(SSyncSnapshotSender *pSender, SyncSnapshotRsp pSndBuf->start = ack + 1; } - while (pSender->seq != SYNC_SNAPSHOT_SEQ_END && pSender->seq - pSndBuf->start < (pSndBuf->size >> 2)) { + while (pSender->seq != SYNC_SNAPSHOT_SEQ_END && pSender->seq - pSndBuf->start < tsSnapReplMaxWaitN) { if (snapshotSend(pSender) != 0) { code = terrno; goto _out; diff --git a/source/libs/transport/src/tmsgcb.c b/source/libs/transport/src/tmsgcb.c index 1b1fa8cc1c..e44328c683 100644 --- a/source/libs/transport/src/tmsgcb.c +++ b/source/libs/transport/src/tmsgcb.c @@ -48,6 +48,14 @@ int32_t tmsgSendReq(const SEpSet* epSet, SRpcMsg* pMsg) { } return code; } +int32_t tmsgSendSyncReq(const SEpSet* epSet, SRpcMsg* pMsg) { + int32_t code = (*defaultMsgCb.sendSyncReqFp)(epSet, pMsg); + if (code != 0) { + rpcFreeCont(pMsg->pCont); + pMsg->pCont = NULL; + } + return code; +} void tmsgSendRsp(SRpcMsg* pMsg) { #if 1 diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 4b3000b47e..246605694b 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -1907,7 +1907,12 @@ bool cliRecvReleaseReq(SCliConn* conn, STransMsgHead* pHead) { static void* cliWorkThread(void* arg) { SCliThrd* pThrd = (SCliThrd*)arg; pThrd->pid = taosGetSelfPthreadId(); - setThreadName("trans-cli-work"); + + char threadName[TSDB_LABEL_LEN] = {0}; + STrans* pInst = pThrd->pTransInst; + strtolower(threadName, pInst->label); + setThreadName(threadName); + uv_run(pThrd->loop, UV_RUN_DEFAULT); tDebug("thread quit-thread:%08" PRId64, pThrd->pid); @@ -2701,6 +2706,7 @@ int transSendRecvWithTimeout(void* shandle, SEpSet* pEpSet, STransMsg* pReq, STr ret = TSDB_CODE_TIMEOUT_ERROR; } else { memcpy(pRsp, pSyncMsg->pRsp, sizeof(STransMsg)); + pSyncMsg->pRsp->pCont = NULL; if (pSyncMsg->hasEpSet == 1) { epsetAssign(pEpSet, &pSyncMsg->epSet); *epUpdated = 1; diff --git a/source/libs/transport/src/transComm.c b/source/libs/transport/src/transComm.c index 759a4d79db..b1fb9a2450 100644 --- a/source/libs/transport/src/transComm.c +++ b/source/libs/transport/src/transComm.c @@ -667,7 +667,7 @@ void transDestroySyncMsg(void* msg) { STransSyncMsg* pSyncMsg = msg; tsem_destroy(pSyncMsg->pSem); taosMemoryFree(pSyncMsg->pSem); - + transFreeMsg(pSyncMsg->pRsp->pCont); taosMemoryFree(pSyncMsg->pRsp); taosMemoryFree(pSyncMsg); } diff --git a/source/libs/wal/src/walRead.c b/source/libs/wal/src/walRead.c index 1660a0ecf0..6748d161ae 100644 --- a/source/libs/wal/src/walRead.c +++ b/source/libs/wal/src/walRead.c @@ -305,8 +305,8 @@ int32_t walFetchHead(SWalReader *pRead, int64_t ver) { } int32_t walSkipFetchBody(SWalReader *pRead) { - wDebug("vgId:%d, skip fetch body %" PRId64 ", first ver:%" PRId64 ", commit ver:%" PRId64 ", last ver:%" PRId64 - ", applied ver:%" PRId64", 0x%"PRIx64, + wDebug("vgId:%d, skip fetch body:%" PRId64 ", first:%" PRId64 ", commit:%" PRId64 ", last:%" PRId64 + ", applied:%" PRId64 ", 0x%" PRIx64, pRead->pWal->cfg.vgId, pRead->pHead->head.version, pRead->pWal->vers.firstVer, pRead->pWal->vers.commitVer, pRead->pWal->vers.lastVer, pRead->pWal->vers.appliedVer, pRead->readerId); @@ -325,11 +325,11 @@ int32_t walFetchBody(SWalReader *pRead) { int64_t ver = pReadHead->version; int32_t vgId = pRead->pWal->cfg.vgId; int64_t id = pRead->readerId; + SWalVer *pVer = &pRead->pWal->vers; - wDebug("vgId:%d, fetch body %" PRId64 ", first ver:%" PRId64 ", commit ver:%" PRId64 ", last ver:%" PRId64 - ", applied ver:%" PRId64 ", 0x%" PRIx64, - vgId, ver, pRead->pWal->vers.firstVer, pRead->pWal->vers.commitVer, pRead->pWal->vers.lastVer, - pRead->pWal->vers.appliedVer, id); + wDebug("vgId:%d, fetch body:%" PRId64 ", first:%" PRId64 ", commit:%" PRId64 ", last:%" PRId64 ", applied:%" PRId64 + ", 0x%" PRIx64, + vgId, ver, pVer->firstVer, pVer->commitVer, pVer->lastVer, pVer->appliedVer, id); if (pRead->capacity < pReadHead->bodyLen) { SWalCkHead *ptr = (SWalCkHead *)taosMemoryRealloc(pRead->pHead, sizeof(SWalCkHead) + pReadHead->bodyLen); diff --git a/tests/army/community/cluster/snapshot.py b/tests/army/community/cluster/snapshot.py index b177ba3b29..92ecc00726 100644 --- a/tests/army/community/cluster/snapshot.py +++ b/tests/army/community/cluster/snapshot.py @@ -34,7 +34,7 @@ class TDTestCase(TBase): tdLog.info(f"insert data.") # taosBenchmark run jfile = etool.curFile(__file__, "snapshot.json") - etool.runBenchmark(json=jfile) + etool.benchMark(json=jfile) tdSql.execute(f"use {self.db}") # set insert data information diff --git a/tests/army/community/cmdline/fullopt.py b/tests/army/community/cmdline/fullopt.py new file mode 100644 index 0000000000..22667149ae --- /dev/null +++ b/tests/army/community/cmdline/fullopt.py @@ -0,0 +1,144 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import sys +import time +import random + +import taos +import frame.etool + +from frame.log import * +from frame.sql import * +from frame.cases import * +from frame.caseBase import * +from frame.srvCtl import * +from frame import * + + +class TDTestCase(TBase): + def insertData(self): + tdLog.info(f"insert data.") + + # set insert data information + self.childtable_count = 10 + self.insert_rows = 10000 + self.timestamp_step = 1000 + + # taosBenchmark run + etool.benchMark(command = f"-d {self.db} -t {self.childtable_count} -n {self.insert_rows} -v 2 -y") + + def doTaos(self): + tdLog.info(f"check taos command options...") + # help + rets = etool.runBinFile("taos", "--help") + self.checkListNotEmpty(rets) + # b r w s + sql = f"select * from {self.db}.{self.stb} limit 10" + rets = etool.runBinFile("taos", f'-B -r -w 100 -s "{sql}" ') + self.checkListNotEmpty(rets) + # -C + rets = etool.runBinFile("taos", "-C") + self.checkListNotEmpty(rets) + # -t + rets = etool.runBinFile("taos", "-t") + self.checkListNotEmpty(rets) + # -v + rets = etool.runBinFile("taos", "-V") + self.checkListNotEmpty(rets) + # -? + rets = etool.runBinFile("taos", "-?") + self.checkListNotEmpty(rets) + + # TSDB_FQDN_LEN = 128 + lname = "testhostnamelength" + lname.rjust(130, 'a') + + # except test + sql = f"show vgroups;" + etool.exeBinFile("taos", f'-h {lname} -s "{sql}" ', wait=False) + etool.exeBinFile("taos", f'-u {lname} -s "{sql}" ', wait=False) + etool.exeBinFile("taos", f'-d {lname} -s "{sql}" ', wait=False) + etool.exeBinFile("taos", f'-a {lname} -s "{sql}" ', wait=False) + etool.exeBinFile("taos", f'-p{lname} -s "{sql}" ', wait=False) + etool.exeBinFile("taos", f'-w -s "{sql}" ', wait=False) + + # others + etool.exeBinFile("taos", f'-N 200 -l 2048 -s "{sql}" ', wait=False) + etool.exeBinFile("taos", f'-n server', wait=False) + + + def doTaosd(self): + tdLog.info(f"check taosd command options...") + idx = 1 # dnode1 + cfg = sc.dnodeCfgPath(idx) + + # -s + sdb = "./sdb.json" + eos.delFile(sdb) + etool.runBinFile("taosd", f"-s -c {cfg}") + self.checkFileExist(sdb) + + # -C + etool.runBinFile("taosd", "-C") + # -k + rets = etool.runBinFile("taosd", "-C") + self.checkListNotEmpty(rets) + # -V + rets = etool.runBinFile("taosd", "-V") + self.checkListNotEmpty(rets) + # --help + rets = etool.runBinFile("taosd", "--help") + self.checkListNotEmpty(rets) + + # except input + etool.exeBinFile("taosd", "-c") + etool.exeBinFile("taosd", "-e") + + # stop taosd + sc.dnodeStop(idx) + # other + etool.exeBinFile("taosd", f"-dm -c {cfg}", False) + sc.dnodeStop(idx) + etool.exeBinFile("taosd", "-a http://192.168.1.10") + + #exe + etool.exeBinFile("taosd", f"-E abc -c {cfg}", False) + sc.dnodeStop(idx) + etool.exeBinFile("taosd", f"-e def -c {cfg}", False) + + # stop taosd test taos as server + sc.dnodeStop(idx) + etool.exeBinFile("taos", f'-n server', wait=False) + + # run + def run(self): + tdLog.debug(f"start to excute {__file__}") + + # insert data + self.insertData() + + # do taos + self.doTaos() + + # do action + self.doTaosd() + + + + + tdLog.success(f"{__file__} successfully executed") + + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/army/enterprise/multi-level/mlevel_basic.py b/tests/army/enterprise/multi-level/mlevel_basic.py index 3bec2bfb72..081a1bb9bd 100644 --- a/tests/army/enterprise/multi-level/mlevel_basic.py +++ b/tests/army/enterprise/multi-level/mlevel_basic.py @@ -32,7 +32,7 @@ class TDTestCase(TBase): tdLog.info(f"insert data.") # taosBenchmark run json = etool.curFile(__file__, "mlevel_basic.json") - etool.runBenchmark(json=json) + etool.benchMark(json=json) tdSql.execute(f"use {self.db}") # set insert data information diff --git a/tests/army/enterprise/s3/s3_basic.py b/tests/army/enterprise/s3/s3_basic.py index 8e7c1f2d35..976ad85747 100644 --- a/tests/army/enterprise/s3/s3_basic.py +++ b/tests/army/enterprise/s3/s3_basic.py @@ -54,16 +54,21 @@ class TDTestCase(TBase): tdLog.info(f"insert data.") # taosBenchmark run json = etool.curFile(__file__, "s3_basic.json") - etool.runBenchmark(json=json) + etool.benchMark(json=json) tdSql.execute(f"use {self.db}") - # set insert data information + # come from s3_basic.json self.childtable_count = 4 self.insert_rows = 1000000 self.timestamp_step = 1000 + def createStream(self, sname): + sql = f"create stream {sname} fill_history 1 into stm1 as select count(*) from {self.db}.{self.stb} interval(10s);" + tdSql.execute(sql) + def doAction(self): tdLog.info(f"do action.") + self.flushDb() self.compactDb() @@ -80,16 +85,33 @@ class TDTestCase(TBase): time.sleep(5) self.trimDb(True) loop += 1 + tdLog.info(f"loop={loop} wait 5s...") + + def checkStreamCorrect(self): + sql = f"select count(*) from {self.db}.stm1" + count = 0 + for i in range(120): + tdSql.query(sql) + count = tdSql.getData(0, 0) + if count == 100000 or count == 100001: + return True + time.sleep(1) + + tdLog.exit(f"stream count is not expect . expect = 100000 or 100001 real={count} . sql={sql}") # run def run(self): tdLog.debug(f"start to excute {__file__}") + self.sname = "stream1" if eos.isArm64Cpu(): tdLog.success(f"{__file__} arm64 ignore executed") else: # insert data self.insertData() + # creat stream + self.createStream(self.sname) + # check insert data correct self.checkInsertCorrect() @@ -105,6 +127,12 @@ class TDTestCase(TBase): # check insert correct again self.checkInsertCorrect() + # check stream correct and drop stream + self.checkStreamCorrect() + + # drop stream + self.dropStream(self.sname) + # drop database and free s3 file self.dropDb() diff --git a/tests/army/frame/caseBase.py b/tests/army/frame/caseBase.py index 8d2c1e6d18..ec6b36aa1b 100644 --- a/tests/army/frame/caseBase.py +++ b/tests/army/frame/caseBase.py @@ -72,6 +72,9 @@ class TBase: def dropDb(self, show = False): tdSql.execute(f"drop database {self.db}", show = show) + def dropStream(self, sname, show = False): + tdSql.execute(f"drop stream {sname}", show = show) + def splitVGroups(self): vgids = self.getVGroup(self.db) selid = random.choice(vgids) @@ -188,3 +191,14 @@ class TBase: time.sleep(interval) return False + + # check file exist + def checkFileExist(self, pathFile): + if os.path.exists(pathFile) == False: + tdLog.exit(f"file not exist {pathFile}") + + # check list not exist + def checkListNotEmpty(self, lists, tips=""): + if len(lists) == 0: + tdLog.exit(f"list is empty {tips}") + diff --git a/tests/army/frame/eos.py b/tests/army/frame/eos.py index 1c00f94380..802b62e052 100644 --- a/tests/army/frame/eos.py +++ b/tests/army/frame/eos.py @@ -57,13 +57,16 @@ def exe(file): # execute file and return immediately def exeNoWait(file): - print("exe no wait") - + if isWin(): + cmd = f"mintty -h never {file}" + else: + cmd = f"nohup {file} > /dev/null 2>&1 & " + return exe(cmd) # run return output and error -def run(command): +def run(command, timeout = 10): process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - process.wait(3) + process.wait(timeout) output = process.stdout.read().decode(encoding="gbk") error = process.stderr.read().decode(encoding="gbk") @@ -72,7 +75,13 @@ def run(command): # return list after run -def runRetList(command): - lines = [] - output,error = run(command) +def runRetList(command, timeout=10): + output,error = run(command, timeout) return output.splitlines() + +# +# file +# + +def delFile(file): + return exe(f"rm -rf {file}") \ No newline at end of file diff --git a/tests/army/frame/etool.py b/tests/army/frame/etool.py index 35c390dc1a..0650d4a0e2 100644 --- a/tests/army/frame/etool.py +++ b/tests/army/frame/etool.py @@ -24,7 +24,7 @@ import frame.eos from frame.log import * # run taosBenchmark with command or json file mode -def runBenchmark(command = "", json = "") : +def benchMark(command = "", json = "") : # get taosBenchmark path bmFile = frame.epath.binFile("taosBenchmark") if frame.eos.isWin(): @@ -44,3 +44,31 @@ def runBenchmark(command = "", json = "") : # get current directory file name def curFile(fullPath, filename): return os.path.dirname(fullPath) + "/" + filename + + +# run build/bin file +def runBinFile(fname, command, show=True): + binFile = frame.epath.binFile(fname) + if frame.eos.isWin(): + binFile += ".exe" + + cmd = f"{binFile} {command}" + if show: + tdLog.info(cmd) + return frame.eos.runRetList(cmd) + +# exe build/bin file +def exeBinFile(fname, command, wait=True, show=True): + binFile = frame.epath.binFile(fname) + if frame.eos.isWin(): + binFile += ".exe" + + cmd = f"{binFile} {command}" + if wait: + if show: + tdLog.info("wait exe:" + cmd) + return frame.eos.exe(f"{binFile} {command}") + else: + if show: + tdLog.info("no wait exe:" + cmd) + return frame.eos.exeNoWait(cmd) \ No newline at end of file diff --git a/tests/army/frame/server/dnode.py b/tests/army/frame/server/dnode.py new file mode 100644 index 0000000000..6331939636 --- /dev/null +++ b/tests/army/frame/server/dnode.py @@ -0,0 +1,591 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import sys +import os +import os.path +import platform +import distro +import subprocess +from time import sleep +import base64 +import json +import copy +from fabric2 import Connection +from shutil import which + +# self +from frame.log import * + + +class TDDnode: + def __init__(self, index=1, level=1, disk=1): + self.index = index + self.level = level + self.disk = disk + self.dataDir = [] + self.running = 0 + self.deployed = 0 + self.testCluster = False + self.valgrind = 0 + self.asan = False + self.remoteIP = "" + self.cfgDict = { + "fqdn": "localhost", + "monitor": "0", + "maxShellConns": "30000", + "locale": "en_US.UTF-8", + "charset": "UTF-8", + "asyncLog": "0", + "mDebugFlag": "143", + "dDebugFlag": "143", + "vDebugFlag": "143", + "tqDebugFlag": "143", + "cDebugFlag": "143", + "stDebugFlag": "143", + "smaDebugFlag": "143", + "jniDebugFlag": "143", + "qDebugFlag": "143", + "rpcDebugFlag": "143", + "tmrDebugFlag": "131", + "uDebugFlag": "135", + "sDebugFlag": "135", + "wDebugFlag": "135", + "numOfLogLines": "100000000", + "statusInterval": "1", + "enableQueryHb": "1", + "supportVnodes": "1024", + "telemetryReporting": "0" + } + + def init(self, path, remoteIP = ""): + self.path = path + self.remoteIP = remoteIP + if (not self.remoteIP == ""): + try: + self.config = eval(self.remoteIP) + self.remote_conn = Connection(host=self.config["host"], port=self.config["port"], user=self.config["user"], connect_kwargs={'password':self.config["password"]}) + except Exception as r: + print(r) + + def setTestCluster(self, value): + self.testCluster = value + + def setValgrind(self, value): + self.valgrind = value + + def setAsan(self, value): + self.asan = value + if value: + selfPath = os.path.dirname(os.path.realpath(__file__)) + if ("community" in selfPath): + self.execPath = os.path.abspath(self.path + "/community/tests/script/sh/exec.sh") + else: + self.execPath = os.path.abspath(self.path + "/tests/script/sh/exec.sh") + + def getDataSize(self): + totalSize = 0 + + if (self.deployed == 1): + for dirpath, dirnames, filenames in os.walk(self.dataDir): + for f in filenames: + fp = os.path.join(dirpath, f) + + if not os.path.islink(fp): + totalSize = totalSize + os.path.getsize(fp) + + return totalSize + + def addExtraCfg(self, option, value): + self.cfgDict.update({option: value}) + + def remoteExec(self, updateCfgDict, execCmd): + valgrindStr = '' + if (self.valgrind==1): + valgrindStr = '-g' + remoteCfgDict = copy.deepcopy(updateCfgDict) + if ("logDir" in remoteCfgDict): + del remoteCfgDict["logDir"] + if ("dataDir" in remoteCfgDict): + del remoteCfgDict["dataDir"] + if ("cfgDir" in remoteCfgDict): + del remoteCfgDict["cfgDir"] + remoteCfgDictStr = base64.b64encode(json.dumps(remoteCfgDict).encode()).decode() + execCmdStr = base64.b64encode(execCmd.encode()).decode() + with self.remote_conn.cd((self.config["path"]+sys.path[0].replace(self.path, '')).replace('\\','/')): + self.remote_conn.run("python3 ./test.py %s -d %s -e %s"%(valgrindStr,remoteCfgDictStr,execCmdStr)) + + def deploy(self, *updatecfgDict): + # logDir + self.logDir = os.path.join(self.path,"sim","dnode%d" % self.index, "log") + # dataDir + simPath = os.path.join(self.path, "sim", "dnode%d" % self.index) + primary = 1 + if self.level == 1 and self.disk == 1: + eDir = os.path.join(simPath, "data") + self.dataDir.append(eDir) + else: + for i in range(self.level): + for j in range(self.disk): + eDir = os.path.join(simPath, f"data{i}{j}") + self.dataDir.append(f"{eDir} {i} {primary}") + if primary == 1: + primary = 0 + + # taos.cfg + self.cfgDir = os.path.join(self.path,"sim","dnode%d" % self.index, "cfg") + self.cfgPath = os.path.join(self.cfgDir, "taos.cfg") + + for eDir in self.dataDir: + cmd = "rm -rf " + eDir + if os.system(cmd) != 0: + tdLog.exit(cmd) + + cmd = "rm -rf " + self.logDir + if os.system(cmd) != 0: + tdLog.exit(cmd) + + cmd = "rm -rf " + self.cfgDir + if os.system(cmd) != 0: + tdLog.exit(cmd) + + # cmd = "mkdir -p " + self.dataDir + # if os.system(cmd) != 0: + # tdLog.exit(cmd) + for eDir in self.dataDir: + os.makedirs(eDir.split(' ')[0]) + + # cmd = "mkdir -p " + self.logDir + # if os.system(cmd) != 0: + # tdLog.exit(cmd) + os.makedirs(self.logDir) + + # cmd = "mkdir -p " + self.cfgDir + # if os.system(cmd) != 0: + # tdLog.exit(cmd) + os.makedirs(self.cfgDir) + + cmd = "touch " + self.cfgPath + if os.system(cmd) != 0: + tdLog.exit(cmd) + + if self.testCluster: + self.startIP() + + if self.testCluster: + self.cfg("masterIp", "192.168.0.1") + self.cfg("secondIp", "192.168.0.2") + self.cfg("publicIp", "192.168.0.%d" % (self.index)) + self.cfg("internalIp", "192.168.0.%d" % (self.index)) + self.cfg("privateIp", "192.168.0.%d" % (self.index)) + self.cfgDict["dataDir"] = self.dataDir + self.cfgDict["logDir"] = self.logDir + # self.cfg("dataDir",self.dataDir) + # self.cfg("logDir",self.logDir) + # print(updatecfgDict) + isFirstDir = 1 + if bool(updatecfgDict) and updatecfgDict[0] and updatecfgDict[0][0]: + for key, value in updatecfgDict[0][0].items(): + if key == "clientCfg" and self.remoteIP == "" and not platform.system().lower() == 'windows': + continue + if value == 'dataDir': + if isFirstDir: + self.cfgDict.pop('dataDir') + self.cfg(value, key) + isFirstDir = 0 + else: + self.cfg(value, key) + else: + self.addExtraCfg(key, value) + if (self.remoteIP == ""): + for key, value in self.cfgDict.items(): + if type(value) == list: + for v in value: + self.cfg(key, v) + else: + self.cfg(key, value) + else: + self.remoteExec(self.cfgDict, "tdDnodes.deploy(%d,updateCfgDict)"%self.index) + + self.deployed = 1 + tdLog.debug( + "dnode:%d is deployed and configured by %s" % + (self.index, self.cfgPath)) + + def getPath(self, tool="taosd"): + selfPath = os.path.dirname(os.path.realpath(__file__)) + + if ("community" in selfPath): + projPath = selfPath[:selfPath.find("community")] + else: + projPath = selfPath[:selfPath.find("tests")] + + paths = [] + for root, dirs, files in os.walk(projPath): + if ((tool) in files or ("%s.exe"%tool) in files): + rootRealPath = os.path.dirname(os.path.realpath(root)) + if ("packaging" not in rootRealPath): + paths.append(os.path.join(root, tool)) + break + if (len(paths) == 0): + return "" + return paths[0] + + def starttaosd(self): + binPath = self.getPath() + + if (binPath == ""): + tdLog.exit("taosd not found!") + else: + tdLog.info("taosd found: %s" % binPath) + + if self.deployed == 0: + tdLog.exit("dnode:%d is not deployed" % (self.index)) + + if self.valgrind == 0: + if platform.system().lower() == 'windows': + cmd = "mintty -h never %s -c %s" % ( + binPath, self.cfgDir) + else: + if self.asan: + asanDir = "%s/sim/asan/dnode%d.asan" % ( + self.path, self.index) + cmd = "nohup %s -c %s > /dev/null 2> %s & " % ( + binPath, self.cfgDir, asanDir) + else: + cmd = "nohup %s -c %s > /dev/null 2>&1 & " % ( + binPath, self.cfgDir) + else: + valgrindCmdline = "valgrind --log-file=\"%s/../log/valgrind.log\" --tool=memcheck --leak-check=full --show-reachable=no --track-origins=yes --show-leak-kinds=all -v --workaround-gcc296-bugs=yes"%self.cfgDir + + if platform.system().lower() == 'windows': + cmd = "mintty -h never %s %s -c %s" % ( + valgrindCmdline, binPath, self.cfgDir) + else: + cmd = "nohup %s %s -c %s 2>&1 & " % ( + valgrindCmdline, binPath, self.cfgDir) + + print(cmd) + + if (not self.remoteIP == ""): + self.remoteExec(self.cfgDict, "tdDnodes.dnodes[%d].deployed=1\ntdDnodes.dnodes[%d].logDir=\"%%s/sim/dnode%%d/log\"%%(tdDnodes.dnodes[%d].path,%d)\ntdDnodes.dnodes[%d].cfgDir=\"%%s/sim/dnode%%d/cfg\"%%(tdDnodes.dnodes[%d].path,%d)\ntdDnodes.start(%d)"%(self.index-1,self.index-1,self.index-1,self.index,self.index-1,self.index-1,self.index,self.index)) + self.running = 1 + else: + if os.system(cmd) != 0: + tdLog.exit(cmd) + self.running = 1 + tdLog.debug("dnode:%d is running with %s " % (self.index, cmd)) + if self.valgrind == 0: + time.sleep(0.1) + key1 = 'from offline to online' + bkey1 = bytes(key1, encoding="utf8") + key2= 'TDengine initialized successfully' + bkey2 = bytes(key2, encoding="utf8") + logFile = self.logDir + "/taosdlog.0" + i = 0 + # while not os.path.exists(logFile): + # sleep(0.1) + # i += 1 + # if i > 10: + # break + # tailCmdStr = 'tail -f ' + # if platform.system().lower() == 'windows': + # tailCmdStr = 'tail -n +0 -f ' + # popen = subprocess.Popen( + # tailCmdStr + logFile, + # stdout=subprocess.PIPE, + # stderr=subprocess.PIPE, + # shell=True) + # pid = popen.pid + # # print('Popen.pid:' + str(pid)) + # timeout = time.time() + 60 * 2 + # while True: + # line = popen.stdout.readline().strip() + # print(line) + # if bkey1 in line: + # popen.kill() + # break + # elif bkey2 in line: + # popen.kill() + # break + # if time.time() > timeout: + # print(time.time(),timeout) + # tdLog.exit('wait too long for taosd start') + tdLog.debug("the dnode:%d has been started." % (self.index)) + else: + tdLog.debug( + "wait 10 seconds for the dnode:%d to start." % + (self.index)) + time.sleep(10) + + def start(self): + binPath = self.getPath() + + if (binPath == ""): + tdLog.exit("taosd not found!") + else: + tdLog.info("taosd found: %s" % binPath) + + if self.deployed == 0: + tdLog.exit("dnode:%d is not deployed" % (self.index)) + + if self.valgrind == 0: + if platform.system().lower() == 'windows': + cmd = "mintty -h never %s -c %s" % ( + binPath, self.cfgDir) + else: + if self.asan: + asanDir = "%s/sim/asan/dnode%d.asan" % ( + self.path, self.index) + cmd = "nohup %s -c %s > /dev/null 2> %s & " % ( + binPath, self.cfgDir, asanDir) + else: + cmd = "nohup %s -c %s > /dev/null 2>&1 & " % ( + binPath, self.cfgDir) + else: + valgrindCmdline = "valgrind --log-file=\"%s/../log/valgrind.log\" --tool=memcheck --leak-check=full --show-reachable=no --track-origins=yes --show-leak-kinds=all -v --workaround-gcc296-bugs=yes"%self.cfgDir + + if platform.system().lower() == 'windows': + cmd = "mintty -h never %s %s -c %s" % ( + valgrindCmdline, binPath, self.cfgDir) + else: + cmd = "nohup %s %s -c %s 2>&1 & " % ( + valgrindCmdline, binPath, self.cfgDir) + + print(cmd) + + if (not self.remoteIP == ""): + self.remoteExec(self.cfgDict, "tdDnodes.dnodes[%d].deployed=1\ntdDnodes.dnodes[%d].logDir=\"%%s/sim/dnode%%d/log\"%%(tdDnodes.dnodes[%d].path,%d)\ntdDnodes.dnodes[%d].cfgDir=\"%%s/sim/dnode%%d/cfg\"%%(tdDnodes.dnodes[%d].path,%d)\ntdDnodes.start(%d)"%(self.index-1,self.index-1,self.index-1,self.index,self.index-1,self.index-1,self.index,self.index)) + self.running = 1 + else: + os.system("rm -rf %s/taosdlog.0"%self.logDir) + if os.system(cmd) != 0: + tdLog.exit(cmd) + self.running = 1 + tdLog.debug("dnode:%d is running with %s " % (self.index, cmd)) + if self.valgrind == 0: + time.sleep(0.1) + key = 'from offline to online' + bkey = bytes(key, encoding="utf8") + logFile = self.logDir + "/taosdlog.0" + i = 0 + while not os.path.exists(logFile): + sleep(0.1) + i += 1 + if i > 50: + break + with open(logFile) as f: + timeout = time.time() + 10 * 2 + while True: + line = f.readline().encode('utf-8') + if bkey in line: + break + if time.time() > timeout: + tdLog.exit('wait too long for taosd start') + tdLog.debug("the dnode:%d has been started." % (self.index)) + else: + tdLog.debug( + "wait 10 seconds for the dnode:%d to start." % + (self.index)) + time.sleep(10) + + def startWithoutSleep(self): + binPath = self.getPath() + + if (binPath == ""): + tdLog.exit("taosd not found!") + else: + tdLog.info("taosd found: %s" % binPath) + + if self.deployed == 0: + tdLog.exit("dnode:%d is not deployed" % (self.index)) + + if self.valgrind == 0: + if platform.system().lower() == 'windows': + cmd = "mintty -h never %s -c %s" % (binPath, self.cfgDir) + else: + if self.asan: + asanDir = "%s/sim/asan/dnode%d.asan" % ( + self.path, self.index) + cmd = "nohup %s -c %s > /dev/null 2> %s & " % ( + binPath, self.cfgDir, asanDir) + else: + cmd = "nohup %s -c %s > /dev/null 2>&1 & " % ( + binPath, self.cfgDir) + else: + valgrindCmdline = "valgrind --log-file=\"%s/../log/valgrind.log\" --tool=memcheck --leak-check=full --show-reachable=no --track-origins=yes --show-leak-kinds=all -v --workaround-gcc296-bugs=yes"%self.cfgDir + if platform.system().lower() == 'windows': + cmd = "mintty -h never %s %s -c %s" % ( + valgrindCmdline, binPath, self.cfgDir) + else: + cmd = "nohup %s %s -c %s 2>&1 & " % ( + valgrindCmdline, binPath, self.cfgDir) + print(cmd) + + if (self.remoteIP == ""): + if os.system(cmd) != 0: + tdLog.exit(cmd) + else: + self.remoteExec(self.cfgDict, "tdDnodes.dnodes[%d].deployed=1\ntdDnodes.dnodes[%d].logDir=\"%%s/sim/dnode%%d/log\"%%(tdDnodes.dnodes[%d].path,%d)\ntdDnodes.dnodes[%d].cfgDir=\"%%s/sim/dnode%%d/cfg\"%%(tdDnodes.dnodes[%d].path,%d)\ntdDnodes.startWithoutSleep(%d)"%(self.index-1,self.index-1,self.index-1,self.index,self.index-1,self.index-1,self.index,self.index)) + + self.running = 1 + tdLog.debug("dnode:%d is running with %s " % (self.index, cmd)) + + def stop(self): + if self.asan: + stopCmd = "%s -s stop -n dnode%d" % (self.execPath, self.index) + tdLog.info("execute script: " + stopCmd) + os.system(stopCmd) + return + + if (not self.remoteIP == ""): + self.remoteExec(self.cfgDict, "tdDnodes.dnodes[%d].running=1\ntdDnodes.dnodes[%d].stop()"%(self.index-1,self.index-1)) + tdLog.info("stop dnode%d"%self.index) + return + if self.valgrind == 0: + toBeKilled = "taosd" + else: + toBeKilled = "valgrind.bin" + + if self.running != 0: + psCmd = "ps -ef|grep -w %s| grep -v grep | awk '{print $2}' | xargs" % toBeKilled + processID = subprocess.check_output( + psCmd, shell=True).decode("utf-8").strip() + + onlyKillOnceWindows = 0 + while(processID): + if not platform.system().lower() == 'windows' or (onlyKillOnceWindows == 0 and platform.system().lower() == 'windows'): + killCmd = "kill -INT %s > /dev/null 2>&1" % processID + if platform.system().lower() == 'windows': + killCmd = "kill -INT %s > nul 2>&1" % processID + os.system(killCmd) + onlyKillOnceWindows = 1 + time.sleep(1) + processID = subprocess.check_output( + psCmd, shell=True).decode("utf-8").strip() + if not platform.system().lower() == 'windows': + for port in range(6030, 6041): + fuserCmd = "fuser -k -n tcp %d > /dev/null" % port + os.system(fuserCmd) + if self.valgrind: + time.sleep(2) + + self.running = 0 + tdLog.debug("dnode:%d is stopped by kill -INT" % (self.index)) + + + def stoptaosd(self): + tdLog.debug("start to stop taosd on dnode: %d "% (self.index)) + # print(self.asan,self.running,self.remoteIP,self.valgrind) + if self.asan: + stopCmd = "%s -s stop -n dnode%d" % (self.execPath, self.index) + tdLog.info("execute script: " + stopCmd) + os.system(stopCmd) + return + + if (not self.remoteIP == ""): + self.remoteExec(self.cfgDict, "tdDnodes.dnodes[%d].running=1\ntdDnodes.dnodes[%d].stop()"%(self.index-1,self.index-1)) + tdLog.info("stop dnode%d"%self.index) + return + if self.valgrind == 0: + toBeKilled = "taosd" + else: + toBeKilled = "valgrind.bin" + + if self.running != 0: + if platform.system().lower() == 'windows': + psCmd = "for /f %%a in ('wmic process where \"name='taosd.exe' and CommandLine like '%%dnode%d%%'\" get processId ^| xargs echo ^| awk ^'{print $2}^' ^&^& echo aa') do @(ps | grep %%a | awk '{print $1}' | xargs)" % (self.index) + else: + psCmd = "ps -ef|grep -w %s| grep dnode%d|grep -v grep | awk '{print $2}' | xargs" % (toBeKilled,self.index) + processID = subprocess.check_output( + psCmd, shell=True).decode("utf-8").strip() + + onlyKillOnceWindows = 0 + while(processID): + if not platform.system().lower() == 'windows' or (onlyKillOnceWindows == 0 and platform.system().lower() == 'windows'): + killCmd = "kill -INT %s > /dev/null 2>&1" % processID + if platform.system().lower() == 'windows': + killCmd = "kill -INT %s > nul 2>&1" % processID + os.system(killCmd) + onlyKillOnceWindows = 1 + time.sleep(1) + processID = subprocess.check_output( + psCmd, shell=True).decode("utf-8").strip() + if self.valgrind: + time.sleep(2) + + self.running = 0 + tdLog.debug("dnode:%d is stopped by kill -INT" % (self.index)) + + def forcestop(self): + if self.asan: + stopCmd = "%s -s stop -n dnode%d -x SIGKILL" + \ + (self.execPath, self.index) + tdLog.info("execute script: " + stopCmd) + os.system(stopCmd) + return + + if (not self.remoteIP == ""): + self.remoteExec(self.cfgDict, "tdDnodes.dnodes[%d].running=1\ntdDnodes.dnodes[%d].forcestop()"%(self.index-1,self.index-1)) + return + if self.valgrind == 0: + toBeKilled = "taosd" + else: + toBeKilled = "valgrind.bin" + + if self.running != 0: + psCmd = "ps -ef|grep -w %s| grep -v grep | awk '{print $2}' | xargs" % toBeKilled + processID = subprocess.check_output( + psCmd, shell=True).decode("utf-8").strip() + + onlyKillOnceWindows = 0 + while(processID): + if not platform.system().lower() == 'windows' or (onlyKillOnceWindows == 0 and platform.system().lower() == 'windows'): + killCmd = "kill -KILL %s > /dev/null 2>&1" % processID + os.system(killCmd) + onlyKillOnceWindows = 1 + time.sleep(1) + processID = subprocess.check_output( + psCmd, shell=True).decode("utf-8").strip() + for port in range(6030, 6041): + fuserCmd = "fuser -k -n tcp %d" % port + os.system(fuserCmd) + if self.valgrind: + time.sleep(2) + + self.running = 0 + tdLog.debug("dnode:%d is stopped by kill -KILL" % (self.index)) + + def startIP(self): + cmd = "sudo ifconfig lo:%d 192.168.0.%d up" % (self.index, self.index) + if os.system(cmd) != 0: + tdLog.exit(cmd) + + def stopIP(self): + cmd = "sudo ifconfig lo:%d 192.168.0.%d down" % ( + self.index, self.index) + if os.system(cmd) != 0: + tdLog.exit(cmd) + + def cfg(self, option, value): + cmd = "echo %s %s >> %s" % (option, value, self.cfgPath) + if os.system(cmd) != 0: + tdLog.exit(cmd) + + def getDnodeRootDir(self, index): + dnodeRootDir = os.path.join(self.path,"sim","psim","dnode%d" % index) + return dnodeRootDir + + def getDnodesRootDir(self): + dnodesRootDir = os.path.join(self.path,"sim","psim") + return dnodesRootDir \ No newline at end of file diff --git a/tests/army/frame/server/dnodes.py b/tests/army/frame/server/dnodes.py index 0d40b665dd..a22e1cbff7 100644 --- a/tests/army/frame/server/dnodes.py +++ b/tests/army/frame/server/dnodes.py @@ -22,660 +22,12 @@ import base64 import json import copy from fabric2 import Connection -from frame.log import * from shutil import which - -class TDSimClient: - def __init__(self, path): - self.testCluster = False - self.path = path - self.cfgDict = { - "fqdn": "localhost", - "numOfLogLines": "100000000", - "locale": "en_US.UTF-8", - "charset": "UTF-8", - "asyncLog": "0", - "rpcDebugFlag": "135", - "tmrDebugFlag": "131", - "cDebugFlag": "135", - "uDebugFlag": "135", - "jniDebugFlag": "135", - "qDebugFlag": "135", - "supportVnodes": "1024", - "enableQueryHb": "1", - "telemetryReporting": "0", - "tqDebugflag": "135", - "wDebugflag":"135", - } - - def getLogDir(self): - self.logDir = os.path.join(self.path,"sim","psim","log") - return self.logDir - - def getCfgDir(self): - self.cfgDir = os.path.join(self.path,"sim","psim","cfg") - return self.cfgDir - - def setTestCluster(self, value): - self.testCluster = value - - def addExtraCfg(self, option, value): - self.cfgDict.update({option: value}) - - def cfg(self, option, value): - cmd = "echo %s %s >> %s" % (option, value, self.cfgPath) - if os.system(cmd) != 0: - tdLog.exit(cmd) - - def deploy(self, *updatecfgDict): - self.logDir = os.path.join(self.path,"sim","psim","log") - self.cfgDir = os.path.join(self.path,"sim","psim","cfg") - self.cfgPath = os.path.join(self.path,"sim","psim","cfg","taos.cfg") - - cmd = "rm -rf " + self.logDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - # cmd = "mkdir -p " + self.logDir - # if os.system(cmd) != 0: - # tdLog.exit(cmd) - os.makedirs(self.logDir) - - cmd = "rm -rf " + self.cfgDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - # cmd = "mkdir -p " + self.cfgDir - # if os.system(cmd) != 0: - # tdLog.exit(cmd) - os.makedirs(self.cfgDir) - - cmd = "touch " + self.cfgPath - if os.system(cmd) != 0: - tdLog.exit(cmd) - - if self.testCluster: - self.cfg("masterIp", "192.168.0.1") - self.cfg("secondIp", "192.168.0.2") - self.cfg("logDir", self.logDir) - - for key, value in self.cfgDict.items(): - self.cfg(key, value) - - try: - if bool(updatecfgDict) and updatecfgDict[0] and updatecfgDict[0][0]: - clientCfg = dict (updatecfgDict[0][0].get('clientCfg')) - for key, value in clientCfg.items(): - self.cfg(key, value) - except Exception: - pass - - tdLog.debug("psim is deployed and configured by %s" % (self.cfgPath)) - - -class TDDnode: - def __init__(self, index=1, level=1, disk=1): - self.index = index - self.level = level - self.disk = disk - self.dataDir = [] - self.running = 0 - self.deployed = 0 - self.testCluster = False - self.valgrind = 0 - self.asan = False - self.remoteIP = "" - self.cfgDict = { - "fqdn": "localhost", - "monitor": "0", - "maxShellConns": "30000", - "locale": "en_US.UTF-8", - "charset": "UTF-8", - "asyncLog": "0", - "mDebugFlag": "143", - "dDebugFlag": "143", - "vDebugFlag": "143", - "tqDebugFlag": "143", - "cDebugFlag": "143", - "stDebugFlag": "143", - "smaDebugFlag": "143", - "jniDebugFlag": "143", - "qDebugFlag": "143", - "rpcDebugFlag": "143", - "tmrDebugFlag": "131", - "uDebugFlag": "135", - "sDebugFlag": "135", - "wDebugFlag": "135", - "numOfLogLines": "100000000", - "statusInterval": "1", - "enableQueryHb": "1", - "supportVnodes": "1024", - "telemetryReporting": "0" - } - - def init(self, path, remoteIP = ""): - self.path = path - self.remoteIP = remoteIP - if (not self.remoteIP == ""): - try: - self.config = eval(self.remoteIP) - self.remote_conn = Connection(host=self.config["host"], port=self.config["port"], user=self.config["user"], connect_kwargs={'password':self.config["password"]}) - except Exception as r: - print(r) - - def setTestCluster(self, value): - self.testCluster = value - - def setValgrind(self, value): - self.valgrind = value - - def setAsan(self, value): - self.asan = value - if value: - selfPath = os.path.dirname(os.path.realpath(__file__)) - if ("community" in selfPath): - self.execPath = os.path.abspath(self.path + "/community/tests/script/sh/exec.sh") - else: - self.execPath = os.path.abspath(self.path + "/tests/script/sh/exec.sh") - - def getDataSize(self): - totalSize = 0 - - if (self.deployed == 1): - for dirpath, dirnames, filenames in os.walk(self.dataDir): - for f in filenames: - fp = os.path.join(dirpath, f) - - if not os.path.islink(fp): - totalSize = totalSize + os.path.getsize(fp) - - return totalSize - - def addExtraCfg(self, option, value): - self.cfgDict.update({option: value}) - - def remoteExec(self, updateCfgDict, execCmd): - valgrindStr = '' - if (self.valgrind==1): - valgrindStr = '-g' - remoteCfgDict = copy.deepcopy(updateCfgDict) - if ("logDir" in remoteCfgDict): - del remoteCfgDict["logDir"] - if ("dataDir" in remoteCfgDict): - del remoteCfgDict["dataDir"] - if ("cfgDir" in remoteCfgDict): - del remoteCfgDict["cfgDir"] - remoteCfgDictStr = base64.b64encode(json.dumps(remoteCfgDict).encode()).decode() - execCmdStr = base64.b64encode(execCmd.encode()).decode() - with self.remote_conn.cd((self.config["path"]+sys.path[0].replace(self.path, '')).replace('\\','/')): - self.remote_conn.run("python3 ./test.py %s -d %s -e %s"%(valgrindStr,remoteCfgDictStr,execCmdStr)) - - def deploy(self, *updatecfgDict): - # logDir - self.logDir = os.path.join(self.path,"sim","dnode%d" % self.index, "log") - # dataDir - simPath = os.path.join(self.path, "sim", "dnode%d" % self.index) - primary = 1 - if self.level == 1 and self.disk == 1: - eDir = os.path.join(simPath, "data") - self.dataDir.append(eDir) - else: - for i in range(self.level): - for j in range(self.disk): - eDir = os.path.join(simPath, f"data{i}{j}") - self.dataDir.append(f"{eDir} {i} {primary}") - if primary == 1: - primary = 0 - - # taos.cfg - self.cfgDir = os.path.join(self.path,"sim","dnode%d" % self.index, "cfg") - self.cfgPath = os.path.join(self.path,"sim","dnode%d" % self.index, "cfg","taos.cfg") - - for eDir in self.dataDir: - cmd = "rm -rf " + eDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "rm -rf " + self.logDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - cmd = "rm -rf " + self.cfgDir - if os.system(cmd) != 0: - tdLog.exit(cmd) - - # cmd = "mkdir -p " + self.dataDir - # if os.system(cmd) != 0: - # tdLog.exit(cmd) - for eDir in self.dataDir: - os.makedirs(eDir.split(' ')[0]) - - # cmd = "mkdir -p " + self.logDir - # if os.system(cmd) != 0: - # tdLog.exit(cmd) - os.makedirs(self.logDir) - - # cmd = "mkdir -p " + self.cfgDir - # if os.system(cmd) != 0: - # tdLog.exit(cmd) - os.makedirs(self.cfgDir) - - cmd = "touch " + self.cfgPath - if os.system(cmd) != 0: - tdLog.exit(cmd) - - if self.testCluster: - self.startIP() - - if self.testCluster: - self.cfg("masterIp", "192.168.0.1") - self.cfg("secondIp", "192.168.0.2") - self.cfg("publicIp", "192.168.0.%d" % (self.index)) - self.cfg("internalIp", "192.168.0.%d" % (self.index)) - self.cfg("privateIp", "192.168.0.%d" % (self.index)) - self.cfgDict["dataDir"] = self.dataDir - self.cfgDict["logDir"] = self.logDir - # self.cfg("dataDir",self.dataDir) - # self.cfg("logDir",self.logDir) - # print(updatecfgDict) - isFirstDir = 1 - if bool(updatecfgDict) and updatecfgDict[0] and updatecfgDict[0][0]: - for key, value in updatecfgDict[0][0].items(): - if key == "clientCfg" and self.remoteIP == "" and not platform.system().lower() == 'windows': - continue - if value == 'dataDir': - if isFirstDir: - self.cfgDict.pop('dataDir') - self.cfg(value, key) - isFirstDir = 0 - else: - self.cfg(value, key) - else: - self.addExtraCfg(key, value) - if (self.remoteIP == ""): - for key, value in self.cfgDict.items(): - if type(value) == list: - for v in value: - self.cfg(key, v) - else: - self.cfg(key, value) - else: - self.remoteExec(self.cfgDict, "tdDnodes.deploy(%d,updateCfgDict)"%self.index) - - self.deployed = 1 - tdLog.debug( - "dnode:%d is deployed and configured by %s" % - (self.index, self.cfgPath)) - - def getPath(self, tool="taosd"): - selfPath = os.path.dirname(os.path.realpath(__file__)) - - if ("community" in selfPath): - projPath = selfPath[:selfPath.find("community")] - else: - projPath = selfPath[:selfPath.find("tests")] - - paths = [] - for root, dirs, files in os.walk(projPath): - if ((tool) in files or ("%s.exe"%tool) in files): - rootRealPath = os.path.dirname(os.path.realpath(root)) - if ("packaging" not in rootRealPath): - paths.append(os.path.join(root, tool)) - break - if (len(paths) == 0): - return "" - return paths[0] - - def starttaosd(self): - binPath = self.getPath() - - if (binPath == ""): - tdLog.exit("taosd not found!") - else: - tdLog.info("taosd found: %s" % binPath) - - if self.deployed == 0: - tdLog.exit("dnode:%d is not deployed" % (self.index)) - - if self.valgrind == 0: - if platform.system().lower() == 'windows': - cmd = "mintty -h never %s -c %s" % ( - binPath, self.cfgDir) - else: - if self.asan: - asanDir = "%s/sim/asan/dnode%d.asan" % ( - self.path, self.index) - cmd = "nohup %s -c %s > /dev/null 2> %s & " % ( - binPath, self.cfgDir, asanDir) - else: - cmd = "nohup %s -c %s > /dev/null 2>&1 & " % ( - binPath, self.cfgDir) - else: - valgrindCmdline = "valgrind --log-file=\"%s/../log/valgrind.log\" --tool=memcheck --leak-check=full --show-reachable=no --track-origins=yes --show-leak-kinds=all -v --workaround-gcc296-bugs=yes"%self.cfgDir - - if platform.system().lower() == 'windows': - cmd = "mintty -h never %s %s -c %s" % ( - valgrindCmdline, binPath, self.cfgDir) - else: - cmd = "nohup %s %s -c %s 2>&1 & " % ( - valgrindCmdline, binPath, self.cfgDir) - - print(cmd) - - if (not self.remoteIP == ""): - self.remoteExec(self.cfgDict, "tdDnodes.dnodes[%d].deployed=1\ntdDnodes.dnodes[%d].logDir=\"%%s/sim/dnode%%d/log\"%%(tdDnodes.dnodes[%d].path,%d)\ntdDnodes.dnodes[%d].cfgDir=\"%%s/sim/dnode%%d/cfg\"%%(tdDnodes.dnodes[%d].path,%d)\ntdDnodes.start(%d)"%(self.index-1,self.index-1,self.index-1,self.index,self.index-1,self.index-1,self.index,self.index)) - self.running = 1 - else: - if os.system(cmd) != 0: - tdLog.exit(cmd) - self.running = 1 - tdLog.debug("dnode:%d is running with %s " % (self.index, cmd)) - if self.valgrind == 0: - time.sleep(0.1) - key1 = 'from offline to online' - bkey1 = bytes(key1, encoding="utf8") - key2= 'TDengine initialized successfully' - bkey2 = bytes(key2, encoding="utf8") - logFile = self.logDir + "/taosdlog.0" - i = 0 - # while not os.path.exists(logFile): - # sleep(0.1) - # i += 1 - # if i > 10: - # break - # tailCmdStr = 'tail -f ' - # if platform.system().lower() == 'windows': - # tailCmdStr = 'tail -n +0 -f ' - # popen = subprocess.Popen( - # tailCmdStr + logFile, - # stdout=subprocess.PIPE, - # stderr=subprocess.PIPE, - # shell=True) - # pid = popen.pid - # # print('Popen.pid:' + str(pid)) - # timeout = time.time() + 60 * 2 - # while True: - # line = popen.stdout.readline().strip() - # print(line) - # if bkey1 in line: - # popen.kill() - # break - # elif bkey2 in line: - # popen.kill() - # break - # if time.time() > timeout: - # print(time.time(),timeout) - # tdLog.exit('wait too long for taosd start') - tdLog.debug("the dnode:%d has been started." % (self.index)) - else: - tdLog.debug( - "wait 10 seconds for the dnode:%d to start." % - (self.index)) - time.sleep(10) - - def start(self): - binPath = self.getPath() - - if (binPath == ""): - tdLog.exit("taosd not found!") - else: - tdLog.info("taosd found: %s" % binPath) - - if self.deployed == 0: - tdLog.exit("dnode:%d is not deployed" % (self.index)) - - if self.valgrind == 0: - if platform.system().lower() == 'windows': - cmd = "mintty -h never %s -c %s" % ( - binPath, self.cfgDir) - else: - if self.asan: - asanDir = "%s/sim/asan/dnode%d.asan" % ( - self.path, self.index) - cmd = "nohup %s -c %s > /dev/null 2> %s & " % ( - binPath, self.cfgDir, asanDir) - else: - cmd = "nohup %s -c %s > /dev/null 2>&1 & " % ( - binPath, self.cfgDir) - else: - valgrindCmdline = "valgrind --log-file=\"%s/../log/valgrind.log\" --tool=memcheck --leak-check=full --show-reachable=no --track-origins=yes --show-leak-kinds=all -v --workaround-gcc296-bugs=yes"%self.cfgDir - - if platform.system().lower() == 'windows': - cmd = "mintty -h never %s %s -c %s" % ( - valgrindCmdline, binPath, self.cfgDir) - else: - cmd = "nohup %s %s -c %s 2>&1 & " % ( - valgrindCmdline, binPath, self.cfgDir) - - print(cmd) - - if (not self.remoteIP == ""): - self.remoteExec(self.cfgDict, "tdDnodes.dnodes[%d].deployed=1\ntdDnodes.dnodes[%d].logDir=\"%%s/sim/dnode%%d/log\"%%(tdDnodes.dnodes[%d].path,%d)\ntdDnodes.dnodes[%d].cfgDir=\"%%s/sim/dnode%%d/cfg\"%%(tdDnodes.dnodes[%d].path,%d)\ntdDnodes.start(%d)"%(self.index-1,self.index-1,self.index-1,self.index,self.index-1,self.index-1,self.index,self.index)) - self.running = 1 - else: - os.system("rm -rf %s/taosdlog.0"%self.logDir) - if os.system(cmd) != 0: - tdLog.exit(cmd) - self.running = 1 - tdLog.debug("dnode:%d is running with %s " % (self.index, cmd)) - if self.valgrind == 0: - time.sleep(0.1) - key = 'from offline to online' - bkey = bytes(key, encoding="utf8") - logFile = self.logDir + "/taosdlog.0" - i = 0 - while not os.path.exists(logFile): - sleep(0.1) - i += 1 - if i > 50: - break - with open(logFile) as f: - timeout = time.time() + 10 * 2 - while True: - line = f.readline().encode('utf-8') - if bkey in line: - break - if time.time() > timeout: - tdLog.exit('wait too long for taosd start') - tdLog.debug("the dnode:%d has been started." % (self.index)) - else: - tdLog.debug( - "wait 10 seconds for the dnode:%d to start." % - (self.index)) - time.sleep(10) - - def startWithoutSleep(self): - binPath = self.getPath() - - if (binPath == ""): - tdLog.exit("taosd not found!") - else: - tdLog.info("taosd found: %s" % binPath) - - if self.deployed == 0: - tdLog.exit("dnode:%d is not deployed" % (self.index)) - - if self.valgrind == 0: - if platform.system().lower() == 'windows': - cmd = "mintty -h never %s -c %s" % (binPath, self.cfgDir) - else: - if self.asan: - asanDir = "%s/sim/asan/dnode%d.asan" % ( - self.path, self.index) - cmd = "nohup %s -c %s > /dev/null 2> %s & " % ( - binPath, self.cfgDir, asanDir) - else: - cmd = "nohup %s -c %s > /dev/null 2>&1 & " % ( - binPath, self.cfgDir) - else: - valgrindCmdline = "valgrind --log-file=\"%s/../log/valgrind.log\" --tool=memcheck --leak-check=full --show-reachable=no --track-origins=yes --show-leak-kinds=all -v --workaround-gcc296-bugs=yes"%self.cfgDir - if platform.system().lower() == 'windows': - cmd = "mintty -h never %s %s -c %s" % ( - valgrindCmdline, binPath, self.cfgDir) - else: - cmd = "nohup %s %s -c %s 2>&1 & " % ( - valgrindCmdline, binPath, self.cfgDir) - print(cmd) - - if (self.remoteIP == ""): - if os.system(cmd) != 0: - tdLog.exit(cmd) - else: - self.remoteExec(self.cfgDict, "tdDnodes.dnodes[%d].deployed=1\ntdDnodes.dnodes[%d].logDir=\"%%s/sim/dnode%%d/log\"%%(tdDnodes.dnodes[%d].path,%d)\ntdDnodes.dnodes[%d].cfgDir=\"%%s/sim/dnode%%d/cfg\"%%(tdDnodes.dnodes[%d].path,%d)\ntdDnodes.startWithoutSleep(%d)"%(self.index-1,self.index-1,self.index-1,self.index,self.index-1,self.index-1,self.index,self.index)) - - self.running = 1 - tdLog.debug("dnode:%d is running with %s " % (self.index, cmd)) - - def stop(self): - if self.asan: - stopCmd = "%s -s stop -n dnode%d" % (self.execPath, self.index) - tdLog.info("execute script: " + stopCmd) - os.system(stopCmd) - return - - if (not self.remoteIP == ""): - self.remoteExec(self.cfgDict, "tdDnodes.dnodes[%d].running=1\ntdDnodes.dnodes[%d].stop()"%(self.index-1,self.index-1)) - tdLog.info("stop dnode%d"%self.index) - return - if self.valgrind == 0: - toBeKilled = "taosd" - else: - toBeKilled = "valgrind.bin" - - if self.running != 0: - psCmd = "ps -ef|grep -w %s| grep -v grep | awk '{print $2}' | xargs" % toBeKilled - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8").strip() - - onlyKillOnceWindows = 0 - while(processID): - if not platform.system().lower() == 'windows' or (onlyKillOnceWindows == 0 and platform.system().lower() == 'windows'): - killCmd = "kill -INT %s > /dev/null 2>&1" % processID - if platform.system().lower() == 'windows': - killCmd = "kill -INT %s > nul 2>&1" % processID - os.system(killCmd) - onlyKillOnceWindows = 1 - time.sleep(1) - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8").strip() - if not platform.system().lower() == 'windows': - for port in range(6030, 6041): - fuserCmd = "fuser -k -n tcp %d > /dev/null" % port - os.system(fuserCmd) - if self.valgrind: - time.sleep(2) - - self.running = 0 - tdLog.debug("dnode:%d is stopped by kill -INT" % (self.index)) - - - def stoptaosd(self): - tdLog.debug("start to stop taosd on dnode: %d "% (self.index)) - # print(self.asan,self.running,self.remoteIP,self.valgrind) - if self.asan: - stopCmd = "%s -s stop -n dnode%d" % (self.execPath, self.index) - tdLog.info("execute script: " + stopCmd) - os.system(stopCmd) - return - - if (not self.remoteIP == ""): - self.remoteExec(self.cfgDict, "tdDnodes.dnodes[%d].running=1\ntdDnodes.dnodes[%d].stop()"%(self.index-1,self.index-1)) - tdLog.info("stop dnode%d"%self.index) - return - if self.valgrind == 0: - toBeKilled = "taosd" - else: - toBeKilled = "valgrind.bin" - - if self.running != 0: - if platform.system().lower() == 'windows': - psCmd = "for /f %%a in ('wmic process where \"name='taosd.exe' and CommandLine like '%%dnode%d%%'\" get processId ^| xargs echo ^| awk ^'{print $2}^' ^&^& echo aa') do @(ps | grep %%a | awk '{print $1}' | xargs)" % (self.index) - else: - psCmd = "ps -ef|grep -w %s| grep dnode%d|grep -v grep | awk '{print $2}' | xargs" % (toBeKilled,self.index) - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8").strip() - - onlyKillOnceWindows = 0 - while(processID): - if not platform.system().lower() == 'windows' or (onlyKillOnceWindows == 0 and platform.system().lower() == 'windows'): - killCmd = "kill -INT %s > /dev/null 2>&1" % processID - if platform.system().lower() == 'windows': - killCmd = "kill -INT %s > nul 2>&1" % processID - os.system(killCmd) - onlyKillOnceWindows = 1 - time.sleep(1) - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8").strip() - if self.valgrind: - time.sleep(2) - - self.running = 0 - tdLog.debug("dnode:%d is stopped by kill -INT" % (self.index)) - - def forcestop(self): - if self.asan: - stopCmd = "%s -s stop -n dnode%d -x SIGKILL" + \ - (self.execPath, self.index) - tdLog.info("execute script: " + stopCmd) - os.system(stopCmd) - return - - if (not self.remoteIP == ""): - self.remoteExec(self.cfgDict, "tdDnodes.dnodes[%d].running=1\ntdDnodes.dnodes[%d].forcestop()"%(self.index-1,self.index-1)) - return - if self.valgrind == 0: - toBeKilled = "taosd" - else: - toBeKilled = "valgrind.bin" - - if self.running != 0: - psCmd = "ps -ef|grep -w %s| grep -v grep | awk '{print $2}' | xargs" % toBeKilled - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8").strip() - - onlyKillOnceWindows = 0 - while(processID): - if not platform.system().lower() == 'windows' or (onlyKillOnceWindows == 0 and platform.system().lower() == 'windows'): - killCmd = "kill -KILL %s > /dev/null 2>&1" % processID - os.system(killCmd) - onlyKillOnceWindows = 1 - time.sleep(1) - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8").strip() - for port in range(6030, 6041): - fuserCmd = "fuser -k -n tcp %d" % port - os.system(fuserCmd) - if self.valgrind: - time.sleep(2) - - self.running = 0 - tdLog.debug("dnode:%d is stopped by kill -KILL" % (self.index)) - - def startIP(self): - cmd = "sudo ifconfig lo:%d 192.168.0.%d up" % (self.index, self.index) - if os.system(cmd) != 0: - tdLog.exit(cmd) - - def stopIP(self): - cmd = "sudo ifconfig lo:%d 192.168.0.%d down" % ( - self.index, self.index) - if os.system(cmd) != 0: - tdLog.exit(cmd) - - def cfg(self, option, value): - cmd = "echo %s %s >> %s" % (option, value, self.cfgPath) - if os.system(cmd) != 0: - tdLog.exit(cmd) - - def getDnodeRootDir(self, index): - dnodeRootDir = os.path.join(self.path,"sim","psim","dnode%d" % index) - return dnodeRootDir - - def getDnodesRootDir(self): - dnodesRootDir = os.path.join(self.path,"sim","psim") - return dnodesRootDir - +# self +from frame.log import * +from frame.server.dnode import * +from frame.server.simClient import * class TDDnodes: def __init__(self): diff --git a/tests/army/frame/server/simClient.py b/tests/army/frame/server/simClient.py new file mode 100644 index 0000000000..a4b719f798 --- /dev/null +++ b/tests/army/frame/server/simClient.py @@ -0,0 +1,116 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import sys +import os +import os.path +import platform +import distro +import subprocess +from time import sleep +import base64 +import json +import copy +from fabric2 import Connection +from shutil import which + +# self +from frame.log import * + + +class TDSimClient: + def __init__(self, path): + self.testCluster = False + self.path = path + self.cfgDict = { + "fqdn": "localhost", + "numOfLogLines": "100000000", + "locale": "en_US.UTF-8", + "charset": "UTF-8", + "asyncLog": "0", + "rpcDebugFlag": "135", + "tmrDebugFlag": "131", + "cDebugFlag": "135", + "uDebugFlag": "135", + "jniDebugFlag": "135", + "qDebugFlag": "135", + "supportVnodes": "1024", + "enableQueryHb": "1", + "telemetryReporting": "0", + "tqDebugflag": "135", + "wDebugflag":"135", + } + + def getLogDir(self): + self.logDir = os.path.join(self.path,"sim","psim","log") + return self.logDir + + def getCfgDir(self): + self.cfgDir = os.path.join(self.path,"sim","psim","cfg") + return self.cfgDir + + def setTestCluster(self, value): + self.testCluster = value + + def addExtraCfg(self, option, value): + self.cfgDict.update({option: value}) + + def cfg(self, option, value): + cmd = "echo %s %s >> %s" % (option, value, self.cfgPath) + if os.system(cmd) != 0: + tdLog.exit(cmd) + + def deploy(self, *updatecfgDict): + self.logDir = os.path.join(self.path,"sim","psim","log") + self.cfgDir = os.path.join(self.path,"sim","psim","cfg") + self.cfgPath = os.path.join(self.path,"sim","psim","cfg","taos.cfg") + + cmd = "rm -rf " + self.logDir + if os.system(cmd) != 0: + tdLog.exit(cmd) + + # cmd = "mkdir -p " + self.logDir + # if os.system(cmd) != 0: + # tdLog.exit(cmd) + os.makedirs(self.logDir) + + cmd = "rm -rf " + self.cfgDir + if os.system(cmd) != 0: + tdLog.exit(cmd) + + # cmd = "mkdir -p " + self.cfgDir + # if os.system(cmd) != 0: + # tdLog.exit(cmd) + os.makedirs(self.cfgDir) + + cmd = "touch " + self.cfgPath + if os.system(cmd) != 0: + tdLog.exit(cmd) + + if self.testCluster: + self.cfg("masterIp", "192.168.0.1") + self.cfg("secondIp", "192.168.0.2") + self.cfg("logDir", self.logDir) + + for key, value in self.cfgDict.items(): + self.cfg(key, value) + + try: + if bool(updatecfgDict) and updatecfgDict[0] and updatecfgDict[0][0]: + clientCfg = dict (updatecfgDict[0][0].get('clientCfg')) + for key, value in clientCfg.items(): + self.cfg(key, value) + except Exception: + pass + + tdLog.debug("psim is deployed and configured by %s" % (self.cfgPath)) \ No newline at end of file diff --git a/tests/army/frame/sql.py b/tests/army/frame/sql.py index 2e14f0c2f0..eafae9be2d 100644 --- a/tests/army/frame/sql.py +++ b/tests/army/frame/sql.py @@ -482,6 +482,11 @@ class TDSql: time.sleep(1) pass + # execute many sql + def executes(self, sqls, queryTimes=30, show=False): + for sql in sqls: + self.execute(sql, queryTimes, show) + def checkAffectedRows(self, expectAffectedRows): if self.affectedRows != expectAffectedRows: caller = inspect.getframeinfo(inspect.stack()[1][0]) diff --git a/tests/army/frame/srvCtl.py b/tests/army/frame/srvCtl.py index 155c83094d..c01ea33578 100644 --- a/tests/army/frame/srvCtl.py +++ b/tests/army/frame/srvCtl.py @@ -16,8 +16,10 @@ import os import time import datetime +from frame.server.dnode import * from frame.server.dnodes import * + class srvCtl: def __init__(self): # record server information @@ -26,6 +28,19 @@ class srvCtl: self.mLevel = 0 self.mLevelDisk = 0 + # + # control server + # + + # start + def dnodeStart(self, idx): + return tdDnodes.starttaosd(idx) + + # stop + def dnodeStop(self, idx): + return tdDnodes.stoptaosd(idx) + + # # about path # @@ -38,7 +53,14 @@ class srvCtl: def dnodeDataFiles(self, idx): files = [] return files + + # + # get dnodes information + # + # taos.cfg position + def dnodeCfgPath(self, idx): + return tdDnodes.dnodes[idx-1].cfgPath sc = srvCtl() \ No newline at end of file diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index 336f58afa3..63745f75ab 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -11,6 +11,7 @@ ,,y,army,./pytest.sh python3 ./test.py -f enterprise/multi-level/mlevel_basic.py -N 3 -L 3 -D 2 ,,y,army,./pytest.sh python3 ./test.py -f enterprise/s3/s3_basic.py -L 3 -D 1 ,,y,army,./pytest.sh python3 ./test.py -f community/cluster/snapshot.py -N 3 -L 3 -D 2 +,,n,army,python3 ./test.py -f community/cmdline/fullopt.py # @@ -36,6 +37,7 @@ #,,n,system-test,python3 ./test.py -f 8-stream/snode_restart_with_checkpoint.py -N 4 ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/tbname_vgroup.py +,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/count_interval.py ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/compact-col.py ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/stbJoin.py ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/stbJoin.py -Q 2 diff --git a/tests/script/coverage_test.sh b/tests/script/coverage_test.sh index 1b9e9753d3..c5e0c31f83 100644 --- a/tests/script/coverage_test.sh +++ b/tests/script/coverage_test.sh @@ -9,8 +9,14 @@ else exit 1 fi +# work main path +TDENGINE_DIR=/root/TDinternal/community +if [ x$2 != x ];then + TDENGINE_DIR=$2 +fi + +echo "TDENGINE_DIR=$TDENGINE_DIR" today=`date +"%Y%m%d"` -TDENGINE_DIR=/root/TDengine JDBC_DIR=/root/taos-connector-jdbc TDENGINE_COVERAGE_REPORT=$TDENGINE_DIR/tests/coverage-report-$today.log @@ -23,7 +29,16 @@ NC='\033[0m' function buildTDengine() { echo "check if TDengine need build" + + # pull parent code + cd "$TDENGINE_DIR/../" + echo "git pull parent code..." + git remote prune origin > /dev/null + git remote update > /dev/null + + # pull tdengine code cd $TDENGINE_DIR + echo "git pull tdengine code..." git remote prune origin > /dev/null git remote update > /dev/null REMOTE_COMMIT=`git rev-parse --short remotes/origin/$branch` @@ -54,14 +69,11 @@ function buildTDengine() { LOCAL_COMMIT=`git rev-parse --short @` rm -rf * - if [ "$branch" == "3.0" ]; then - echo "3.0 =============" - cmake -DCOVER=true -DBUILD_TEST=true -DBUILD_HTTP=false -DBUILD_TOOLS=true -DBUILD_GEOS=true -DBUILD_CONTRIB=true .. - else - cmake -DCOVER=true -DBUILD_TOOLS=true -DBUILD_HTTP=false .. > /dev/null - fi - make -j - make install + makecmd="cmake -DCOVER=true -DBUILD_TEST=true -DBUILD_HTTP=false -DBUILD_TOOLS=true -DBUILD_GEOS=true -DBUILD_CONTRIB=true ../../" + echo "$makecmd" + $makecmd + + make -j 8 install } function runCasesOneByOne () { @@ -71,18 +83,18 @@ function runCasesOneByOne () { if [[ "$2" == "sim" ]] && [[ $line == *"script"* ]]; then case=`echo $cmd | cut -d' ' -f 3` start_time=`date +%s` - date +%F\ %T | tee -a $TDENGINE_COVERAGE_REPORT && $cmd > /dev/null 2>&1 && \ + date +%F\ %T | tee -a $TDENGINE_COVERAGE_REPORT && timeout 20m $cmd > /dev/null 2>&1 && \ echo -e "${GREEN}$case success${NC}" | tee -a $TDENGINE_COVERAGE_REPORT \ || echo -e "${RED}$case failed${NC}" | tee -a $TDENGINE_COVERAGE_REPORT end_time=`date +%s` echo execution time of $case was `expr $end_time - $start_time`s. | tee -a $TDENGINE_COVERAGE_REPORT - elif [$line == *"$2"* ]; then + elif [[ "$line" == *"$2"* ]]; then if [[ "$cmd" == *"pytest.sh"* ]]; then cmd=`echo $cmd | cut -d' ' -f 2-20` fi case=`echo $cmd | cut -d' ' -f 4-20` start_time=`date +%s` - date +%F\ %T | tee -a $TDENGINE_COVERAGE_REPORT && $cmd > /dev/null 2>&1 && \ + date +%F\ %T | tee -a $TDENGINE_COVERAGE_REPORT && timeout 20m $cmd > /dev/null 2>&1 && \ echo -e "${GREEN}$case success${NC}" | tee -a $TDENGINE_COVERAGE_REPORT || \ echo -e "${RED}$case failed${NC}" | tee -a $TDENGINE_COVERAGE_REPORT end_time=`date +%s` @@ -178,7 +190,7 @@ function runTest() { runPythonCases runJDBCCases - stopTaosd + stopTaosd cd $TDENGINE_DIR/tests/script find . -name '*.sql' | xargs rm -f @@ -191,7 +203,7 @@ function lcovFunc { cd $TDENGINE_DIR # collect data - lcov --ignore-errors negative --ignore-errors mismatch -d . --capture --rc lcov_branch_coverage=1 --rc branch_coverage=1 --no-external -b $TDENGINE_DIR -o coverage.info + lcov --capture -d $TDENGINE_DIR -b $TDENGINE_DIR -o coverage.info --ignore-errors negative,mismatch,inconsistent,source --branch-coverage --function-coverage --no-external # remove exclude paths lcov --remove coverage.info \ @@ -202,23 +214,23 @@ function lcovFunc { '*/clientJniConnector.c' '*/clientTmqConnector.c' '*/version.c'\ '*/tthread.c' '*/tversion.c' '*/ctgDbg.c' '*/schDbg.c' '*/qwDbg.c' '*/tencode.h' \ '*/shellAuto.c' '*/shellTire.c' '*/shellCommand.c'\ - --rc branch_coverage=1 -o coverage.info + '*/sql.c' '*/sql.y'\ + --branch-coverage --function-coverage -o coverage.info # generate result echo "generate result" - lcov -l --rc branch_coverage=1 coverage.info | tee -a $TDENGINE_COVERAGE_REPORT + lcov -l --branch-coverage --function-coverage coverage.info | tee -a $TDENGINE_COVERAGE_REPORT sed -i 's/\/root\/TDengine\/sql.c/\/root\/TDengine\/source\/libs\/parser\/inc\/sql.c/g' coverage.info sed -i 's/\/root\/TDengine\/sql.y/\/root\/TDengine\/source\/libs\/parser\/inc\/sql.y/g' coverage.info # push result to coveralls.io echo "push result to coveralls.io" - /usr/local/bin/coveralls-lcov coverage.info -t o7uY02qEAgKyJHrkxLGiCOTfL3IGQR2zm | tee -a $TDENGINE_COVERAGE_REPORT + /usr/local/bin/coveralls-lcov coverage.info -b $branch -t o7uY02qEAgKyJHrkxLGiCOTfL3IGQR2zm | tee -a $TDENGINE_COVERAGE_REPORT #/root/pxiao/checkCoverageFile.sh -s $TDENGINE_DIR/source -f $TDENGINE_COVERAGE_REPORT #cat /root/pxiao/fileListNoCoverage.log | tee -a $TDENGINE_COVERAGE_REPORT cat $TDENGINE_COVERAGE_REPORT | grep "| 0.0%" | awk -F "%" '{print $1}' | awk -F "|" '{if($2==0.0)print $1}' | tee -a $TDENGINE_COVERAGE_REPORT - } function sendReport { diff --git a/tests/system-test/1-insert/insert_timestamp.py b/tests/system-test/1-insert/insert_timestamp.py index bc3b1c0275..310a27dae0 100644 --- a/tests/system-test/1-insert/insert_timestamp.py +++ b/tests/system-test/1-insert/insert_timestamp.py @@ -13,25 +13,13 @@ class TDTestCase: tdSql.init(conn.cursor(), True) def run(self): - """ - timestamp输入插入规则: - 对于插入的字段类型为timestamp类型的字段,只允许这么几种情况: - timestamp - timestamp +/- interval - interval + timestamp - timestamp可以是字符串譬如:"2023-12-05 00:00:00.000", 也可以是int型, 譬如:1701619200000 - interval支持:b, u, a, s, m, h, d, w 不支持n, y,譬如:1h, 2d - - 仅支持2元表达式,譬如:timestamp + 2h, 不支持2元以上表达,譬如timestamp + 2h + 1d - """ - tdSql.execute("create database test_insert_timestamp PRECISION 'ns';") tdSql.execute("use test_insert_timestamp;") tdSql.execute("create stable st(ts timestamp, c1 int) tags(id int);") tdSql.execute("create table test_t using st tags(1);") expectErrInfo = "syntax error" - # 异常场景:timestamp + timestamp + # abnormal scenario: timestamp + timestamp tdSql.error("insert into test_t values(now + today(), 1 );", expectErrInfo=expectErrInfo, fullMatched=False) tdSql.error("insert into test_t values(now - today(), 1 );", expectErrInfo=expectErrInfo, fullMatched=False) tdSql.error("insert into test_t values(today() + now(), 1 ); ", expectErrInfo=expectErrInfo, fullMatched=False) @@ -40,24 +28,24 @@ class TDTestCase: tdSql.error("insert into test_t values('2023-11-28 00:00:00.000' + 1701111600000, 1 ); ", expectErrInfo=expectErrInfo, fullMatched=False) tdSql.error("insert into test_t values(1701111500000 + 1701111600000, 1 ); ", expectErrInfo=expectErrInfo, fullMatched=False) - # 异常场景:timestamp + interval + interval + # abnormal scenario: timestamp + interval + interval tdSql.error("insert into test_t values(today() + 1d + 1s, 1);", expectErrInfo=expectErrInfo, fullMatched=False) - # 异常场景:interval - timestamp + # abnormal scenario: interval - timestamp tdSql.error("insert into test_t values(2h - now(), 1 ); ", expectErrInfo=expectErrInfo, fullMatched=False) tdSql.error("insert into test_t values(2h - today(), 1 ); ", expectErrInfo=expectErrInfo, fullMatched=False) - # 异常场景:interval + interval + # abnormal scenario: interval + interval tdSql.error("insert into test_t values(2h - 1h, 1 ); ", expectErrInfo=expectErrInfo, fullMatched=False) tdSql.error("insert into test_t values(2h + 1h, 1 ); ", expectErrInfo=expectErrInfo, fullMatched=False) - # 异常场景:非法interval类型n + # abnormal scenario: non-support datatype - n tdSql.error("insert into test_t values(today() + 2n, 7); ", expectErrInfo=expectErrInfo, fullMatched=False) - # 异常场景:非法interval类型y + # abnormal scenario: non-support datatype - y tdSql.error("insert into test_t values(today() - 2y, 8);", expectErrInfo=expectErrInfo, fullMatched=False) - # 异常场景:数据类型不对 + # abnormal scenario: non-support datatype tdSql.error("insert into test_t values('a1701619200000', 8);", expectErrInfo=expectErrInfo, fullMatched=False) tdSql.error("insert into test_t values('ss2023-12-05 00:00:00.000' + '1701619200000', 1);", expectErrInfo=expectErrInfo, fullMatched=False) tdSql.error("insert into test_t values(123456, 1);", expectErrInfo="Timestamp data out of range") @@ -66,31 +54,31 @@ class TDTestCase: tdSql.error("insert into test_t values(None, 1);", expectErrInfo=expectErrInfo, fullMatched=False) tdSql.error("insert into test_t values(null, 1);", expectErrInfo=expectErrInfo, fullMatched=False) - # 异常场景:格式不对 + # abnormal scenario: incorrect format tdSql.error("insert into test_t values('2023-122-05 00:00:00.000' + '1701619200000', 1);", expectErrInfo=expectErrInfo, fullMatched=False) tdSql.error("insert into test_t values('2023-12--05 00:00:00.000' + '1701619200000', 1);", expectErrInfo=expectErrInfo, fullMatched=False) tdSql.error("insert into test_t values('12/12/2023' + 10a, 1);", expectErrInfo=expectErrInfo, fullMatched=False) tdSql.error("insert into test_t values(1701619200000111, 1);", expectErrInfo="Timestamp data out of range", fullMatched=False) - # 正常场景:timestamp + interval + # normal scenario:timestamp + interval tdSql.execute("insert into test_t values(today() + 2b, 1);") tdSql.execute("insert into test_t values(1701619200000000000 + 2u, 2);") tdSql.execute("insert into test_t values(today + 2a, 3);") tdSql.execute("insert into test_t values('2023-12-05 23:59:59.999' + 2a, 4);") tdSql.execute("insert into test_t values(1701921599000000000 + 3a, 5);") - # 正常场景:timestamp - interval + # normal scenario:timestamp - interval tdSql.execute("insert into test_t values(today() - 2s, 6);") tdSql.execute("insert into test_t values(now() - 2m, 7);") tdSql.execute("insert into test_t values(today - 2h, 8);") tdSql.execute("insert into test_t values('2023-12-05 00:00:00.000000000' - 2a, 9);") tdSql.execute("insert into test_t values(1701669000000000000 - 2a, 10);") - # 正常场景:interval + timestamp + # normal scenario:interval + timestamp tdSql.execute("insert into test_t values(2d + now, 11);") tdSql.execute("insert into test_t values(2w + today, 12);") - # 正常场景:timestamp + # normal scenario:timestamp tdSql.execute("insert into test_t values('2023-12-05 00:00:00.000', 13);") tdSql.execute("insert into test_t values(1701629100000000000, 14);") tdSql.execute("insert into test_t values(now() + 2s, 15);") @@ -102,7 +90,7 @@ class TDTestCase: tdSql.execute("insert into test_t values(1701619200000000000, -5);") tdSql.execute("insert into test_t values('2023-12-05 12:12:12' + 10a, 19);") - # 验证数据 + # data verification tdSql.query(f'select ts,c1 from test_t order by c1;') tdSql.checkRows(22) tdSql.checkEqual(tdSql.queryResult[0][0], 1699977600000000000) # c1=-15 @@ -133,12 +121,10 @@ class TDTestCase: tdSql.execute("drop database if exists test_insert_timestamp;") def __convert_ts_to_date(self, ts: int) -> str: - # 创建datetime对象并进行转换 dt_object = datetime.datetime.fromtimestamp(ts / 1e9) - # 格式化日期字符串 formatted_date = dt_object.strftime('%Y-%m-%d') - # print("转换后的日期为:", formatted_date) + return formatted_date def __get_today_ts(self) -> int: diff --git a/tests/system-test/2-query/count_interval.py b/tests/system-test/2-query/count_interval.py new file mode 100644 index 0000000000..b37cc1db22 --- /dev/null +++ b/tests/system-test/2-query/count_interval.py @@ -0,0 +1,60 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import sys +import taos +from util.log import tdLog +from util.cases import tdCases +from util.sql import tdSql +from util.dnodes import tdDnodes +import random + + +class TDTestCase: + def init(self, conn, logSql, replicaVar=1): + tdLog.debug("start to execute %s" % __file__) + tdSql.init(conn.cursor(), logSql) + + def restartTaosd(self, index=1, dbname="db"): + tdDnodes.stop(index) + tdDnodes.startWithoutSleep(index) + tdSql.execute(f"use d") + + def run(self): + tdSql.execute("drop database if exists d"); + tdSql.execute("create database d"); + tdSql.execute("use d"); + tdSql.execute("create table st(ts timestamp, f int) tags (t int)") + + for i in range(-2048, 2047): + ts = 1626624000000 + i; + tdSql.execute(f"insert into ct1 using st tags(1) values({ts}, {i})") + + tdSql.execute("flush database d") + for i in range(1638): + ts = 1648758398208 + i + tdSql.execute(f"insert into ct1 using st tags(1) values({ts}, {i})") + tdSql.execute("insert into ct1 using st tags(1) values(1648970742528, 1638)") + tdSql.execute("flush database d") + + tdSql.query("select count(ts) from ct1 interval(17n, 5n)") + self.restartTaosd() + tdSql.query("select count(ts) from ct1 interval(17n, 5n)") + + def stop(self): + tdSql.close() + tdLog.success("%s successfully executed" % __file__) + + +tdCases.addWindows(__file__, TDTestCase()) +tdCases.addLinux(__file__, TDTestCase()) \ No newline at end of file diff --git a/tests/system-test/2-query/partition_by_col.py b/tests/system-test/2-query/partition_by_col.py index feb8693e3e..549e2738be 100644 --- a/tests/system-test/2-query/partition_by_col.py +++ b/tests/system-test/2-query/partition_by_col.py @@ -169,6 +169,16 @@ class TDTestCase: self.check_explain_res_has_row("Partition on", self.explain_sql(sql)) self.check_explain_res_has_row("Sort", self.explain_sql(sql_hint)) + sql = 'select count(*), c1 from meters partition by c1' + sql_hint = 'select /*+ sort_for_group() partition_first()*/ count(*), c1 from meters partition by c1' + self.check_explain_res_has_row("Sort", self.explain_sql(sql_hint)) + sql_hint = 'select /*+ partition_first()*/ count(*), c1 from meters partition by c1' + self.check_explain_res_has_row("Partition on", self.explain_sql(sql_hint)) + sql_hint = 'select /*+ partition_first() sort_for_group()*/ count(*), c1 from meters partition by c1' + self.check_explain_res_has_row("Partition on", self.explain_sql(sql_hint)) + sql_hint = 'select /*+ sort_for_group() partition_first()*/ count(*), c1 from meters partition by c1' + self.check_explain_res_has_row("Sort", self.explain_sql(sql_hint)) + def add_order_by(self, sql: str, order_by: str, select_list: str = "*") -> str: return "select %s from (%s)t order by %s" % (select_list, sql, order_by) diff --git a/tests/system-test/8-stream/at_once_state_window.py b/tests/system-test/8-stream/at_once_state_window.py index fa9f4ddd78..60a4f4e890 100644 --- a/tests/system-test/8-stream/at_once_state_window.py +++ b/tests/system-test/8-stream/at_once_state_window.py @@ -59,6 +59,9 @@ class TDTestCase: self.tdCom.create_stream(stream_name=f'{self.tb_name}{self.tdCom.stream_suffix}', des_table=self.tb_stream_des_table, source_sql=f'select _wstart AS wstart, {self.tdCom.tb_source_select_str} from {self.tb_name} partition by {partition} {partition_elm_alias} state_window({stream_state_window})', trigger_mode="at_once", subtable_value=tb_subtable_value, fill_history_value=fill_history_value) range_times = self.tdCom.range_count state_window_max = self.tdCom.dataDict['state_window_max'] + + time.sleep(2) + for i in range(range_times): state_window_value = random.randint(int((i)*state_window_max/range_times), int((i+1)*state_window_max/range_times)) for i in range(2, range_times+3):