From b28437aaba2bbc13c0030e129c71858866cc687b Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Wed, 10 Jul 2024 18:42:20 +0800 Subject: [PATCH 01/17] feat:[TS-4592] clear lost status for consumer --- include/common/tmsg.h | 21 ++- include/util/tdef.h | 2 + source/client/src/clientTmq.c | 73 +++++++- source/common/src/systable.c | 8 +- source/common/src/tmsg.c | 4 + source/dnode/mnode/impl/inc/mndDef.h | 5 +- source/dnode/mnode/impl/src/mndConsumer.c | 200 +++++++++++---------- source/dnode/mnode/impl/src/mndDef.c | 13 ++ source/dnode/mnode/impl/src/mndSubscribe.c | 33 ++-- 9 files changed, 228 insertions(+), 131 deletions(-) diff --git a/include/common/tmsg.h b/include/common/tmsg.h index a83aa4da44..c812138282 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -2772,6 +2772,9 @@ enum { TOPIC_SUB_TYPE__COLUMN, }; +#define DEFAULT_MAX_POLL_INTERVAL 3000000 +#define DEFAULT_SESSION_TIMEOUT 10000 + typedef struct { char name[TSDB_TOPIC_FNAME_LEN]; // accout.topic int8_t igExists; @@ -2794,7 +2797,7 @@ typedef struct { typedef struct { int64_t consumerId; char cgroup[TSDB_CGROUP_LEN]; - char clientId[256]; + char clientId[TSDB_CLIENT_ID_LEN]; SArray* topicNames; // SArray int8_t withTbName; @@ -2803,6 +2806,8 @@ typedef struct { int8_t resetOffsetCfg; int8_t enableReplay; int8_t enableBatchMeta; + int32_t sessionTimeoutMs; + int32_t maxPollIntervalMs; } SCMSubscribeReq; static FORCE_INLINE int32_t tSerializeSCMSubscribeReq(void** buf, const SCMSubscribeReq* pReq) { @@ -2824,11 +2829,14 @@ static FORCE_INLINE int32_t tSerializeSCMSubscribeReq(void** buf, const SCMSubsc tlen += taosEncodeFixedI8(buf, pReq->resetOffsetCfg); tlen += taosEncodeFixedI8(buf, pReq->enableReplay); tlen += taosEncodeFixedI8(buf, pReq->enableBatchMeta); + tlen += taosEncodeFixedI32(buf, pReq->sessionTimeoutMs); + tlen += taosEncodeFixedI32(buf, pReq->maxPollIntervalMs); return tlen; } -static FORCE_INLINE void* tDeserializeSCMSubscribeReq(void* buf, SCMSubscribeReq* pReq) { +static FORCE_INLINE void* tDeserializeSCMSubscribeReq(void* buf, SCMSubscribeReq* pReq, int32_t len) { + void* start = buf; buf = taosDecodeFixedI64(buf, &pReq->consumerId); buf = taosDecodeStringTo(buf, pReq->cgroup); buf = taosDecodeStringTo(buf, pReq->clientId); @@ -2849,6 +2857,14 @@ static FORCE_INLINE void* tDeserializeSCMSubscribeReq(void* buf, SCMSubscribeReq buf = taosDecodeFixedI8(buf, &pReq->resetOffsetCfg); buf = taosDecodeFixedI8(buf, &pReq->enableReplay); buf = taosDecodeFixedI8(buf, &pReq->enableBatchMeta); + if (buf - start < len) { + buf = taosDecodeFixedI32(buf, &pReq->sessionTimeoutMs); + buf = taosDecodeFixedI32(buf, &pReq->maxPollIntervalMs); + } else { + pReq->sessionTimeoutMs = DEFAULT_SESSION_TIMEOUT; + pReq->maxPollIntervalMs = DEFAULT_MAX_POLL_INTERVAL; + } + return buf; } @@ -4060,6 +4076,7 @@ typedef struct { int64_t consumerId; int32_t epoch; SArray* topics; + int8_t pollFlag; } SMqHbReq; typedef struct { diff --git a/include/util/tdef.h b/include/util/tdef.h index 9c2858ed30..70358c861c 100644 --- a/include/util/tdef.h +++ b/include/util/tdef.h @@ -221,6 +221,8 @@ typedef enum ELogicConditionType { #define TSDB_TABLE_NAME_LEN 193 // it is a null-terminated string #define TSDB_TOPIC_NAME_LEN 193 // it is a null-terminated string #define TSDB_CGROUP_LEN 193 // it is a null-terminated string +#define TSDB_CLIENT_ID_LEN 256 // it is a null-terminated string +#define TSDB_CONSUMER_ID_LEN 32 // it is a null-terminated string #define TSDB_OFFSET_LEN 64 // it is a null-terminated string #define TSDB_USER_CGROUP_LEN (TSDB_USER_LEN + TSDB_CGROUP_LEN) // it is a null-terminated string #define TSDB_STREAM_NAME_LEN 193 // it is a null-terminated string diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 21d1a528da..2921b7b333 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -37,6 +37,7 @@ struct SMqMgmt { static TdThreadOnce tmqInit = PTHREAD_ONCE_INIT; // initialize only once volatile int32_t tmqInitRes = 0; // initialize rsp code static struct SMqMgmt tmqMgmt = {0}; +static int8_t pollFlag = 0; typedef struct { int32_t code; @@ -56,7 +57,7 @@ struct tmq_list_t { }; struct tmq_conf_t { - char clientId[256]; + char clientId[TSDB_CLIENT_ID_LEN]; char groupId[TSDB_CGROUP_LEN]; int8_t autoCommit; int8_t resetOffset; @@ -66,6 +67,9 @@ struct tmq_conf_t { int8_t sourceExcluded; // do not consume, bit uint16_t port; int32_t autoCommitInterval; + int32_t sessionTimeoutMs; + int32_t heartBeatIntervalMs; + int32_t maxPollIntervalMs; char* ip; char* user; char* pass; @@ -77,15 +81,18 @@ struct tmq_conf_t { struct tmq_t { int64_t refId; char groupId[TSDB_CGROUP_LEN]; - char clientId[256]; + char clientId[TSDB_CLIENT_ID_LEN]; int8_t withTbName; int8_t useSnapshot; int8_t autoCommit; int32_t autoCommitInterval; + int32_t sessionTimeoutMs; + int32_t heartBeatIntervalMs; + int32_t maxPollIntervalMs; int8_t resetOffsetCfg; int8_t replayEnable; int8_t sourceExcluded; // do not consume, bit - uint64_t consumerId; + int64_t consumerId; tmq_commit_cb* commitCb; void* commitCbUserParam; int8_t enableBatchMeta; @@ -272,6 +279,9 @@ tmq_conf_t* tmq_conf_new() { conf->autoCommitInterval = DEFAULT_AUTO_COMMIT_INTERVAL; conf->resetOffset = TMQ_OFFSET__RESET_LATEST; conf->enableBatchMeta = false; + conf->heartBeatIntervalMs = DEFAULT_HEARTBEAT_INTERVAL; + conf->maxPollIntervalMs = DEFAULT_MAX_POLL_INTERVAL; + conf->sessionTimeoutMs = DEFAULT_SESSION_TIMEOUT; return conf; } @@ -301,7 +311,7 @@ tmq_conf_res_t tmq_conf_set(tmq_conf_t* conf, const char* key, const char* value } if (strcasecmp(key, "client.id") == 0) { - tstrncpy(conf->clientId, value, 256); + tstrncpy(conf->clientId, value, TSDB_CLIENT_ID_LEN); return TMQ_CONF_OK; } @@ -318,7 +328,38 @@ tmq_conf_res_t tmq_conf_set(tmq_conf_t* conf, const char* key, const char* value } if (strcasecmp(key, "auto.commit.interval.ms") == 0) { - conf->autoCommitInterval = taosStr2int64(value); + int64_t tmp = taosStr2int64(value); + if (tmp < 0 || EINVAL == errno || ERANGE == errno) { + return TMQ_CONF_INVALID; + } + conf->autoCommitInterval = (tmp > INT32_MAX ? INT32_MAX : tmp); + return TMQ_CONF_OK; + } + + if (strcasecmp(key, "session.timeout.ms") == 0) { + int64_t tmp = taosStr2int64(value); + if (tmp < 6000 || tmp > 1800000){ + return TMQ_CONF_INVALID; + } + conf->sessionTimeoutMs = tmp; + return TMQ_CONF_OK; + } + + if (strcasecmp(key, "heartbeat.interval.ms") == 0) { + int64_t tmp = taosStr2int64(value); + if (tmp < 1000 || tmp >= conf->sessionTimeoutMs){ + return TMQ_CONF_INVALID; + } + conf->heartBeatIntervalMs = tmp; + return TMQ_CONF_OK; + } + + if (strcasecmp(key, "max.poll.interval.ms") == 0) { + int64_t tmp = taosStr2int64(value); + if (tmp < 1000 || tmp > INT32_MAX){ + return TMQ_CONF_INVALID; + } + conf->maxPollIntervalMs = tmp; return TMQ_CONF_OK; } @@ -377,7 +418,12 @@ tmq_conf_res_t tmq_conf_set(tmq_conf_t* conf, const char* key, const char* value } if (strcasecmp(key, "td.connect.port") == 0) { - conf->port = taosStr2int64(value); + int64_t tmp = taosStr2int64(value); + if (tmp <= 0 || tmp > 65535) { + return TMQ_CONF_INVALID; + } + + conf->port = tmp; return TMQ_CONF_OK; } @@ -813,6 +859,7 @@ void tmqSendHbReq(void* param, void* tmrId) { SMqHbReq req = {0}; req.consumerId = tmq->consumerId; req.epoch = tmq->epoch; + req.pollFlag = atomic_load_8(&pollFlag); taosRLockLatch(&tmq->lock); req.topics = taosArrayInit(taosArrayGetSize(tmq->clientTopics), sizeof(TopicOffsetRows)); for (int i = 0; i < taosArrayGetSize(tmq->clientTopics); i++) { @@ -878,9 +925,10 @@ void tmqSendHbReq(void* param, void* tmrId) { tscError("tmqSendHbReq asyncSendMsgToServer failed"); } + atomic_val_compare_exchange_8(&pollFlag, 1, 0); OVER: tDestroySMqHbReq(&req); - taosTmrReset(tmqSendHbReq, DEFAULT_HEARTBEAT_INTERVAL, param, tmqMgmt.timer, &tmq->hbLiveTimer); + taosTmrReset(tmqSendHbReq, tmq->heartBeatIntervalMs, param, tmqMgmt.timer, &tmq->hbLiveTimer); taosReleaseRef(tmqMgmt.rsetId, refId); } @@ -1134,6 +1182,9 @@ tmq_t* tmq_consumer_new(tmq_conf_t* conf, char* errstr, int32_t errstrLen) { pTmq->useSnapshot = conf->snapEnable; pTmq->autoCommit = conf->autoCommit; pTmq->autoCommitInterval = conf->autoCommitInterval; + pTmq->sessionTimeoutMs = conf->sessionTimeoutMs; + pTmq->heartBeatIntervalMs = conf->heartBeatIntervalMs; + pTmq->maxPollIntervalMs = conf->maxPollIntervalMs; pTmq->commitCb = conf->commitCb; pTmq->commitCbUserParam = conf->commitCbUserParam; pTmq->resetOffsetCfg = conf->resetOffset; @@ -1173,7 +1224,7 @@ tmq_t* tmq_consumer_new(tmq_conf_t* conf, char* errstr, int32_t errstrLen) { int64_t* pRefId = taosMemoryMalloc(sizeof(int64_t)); *pRefId = pTmq->refId; - pTmq->hbLiveTimer = taosTmrStart(tmqSendHbReq, DEFAULT_HEARTBEAT_INTERVAL, pRefId, tmqMgmt.timer); + pTmq->hbLiveTimer = taosTmrStart(tmqSendHbReq, pTmq->heartBeatIntervalMs, pRefId, tmqMgmt.timer); char buf[TSDB_OFFSET_LEN] = {0}; STqOffsetVal offset = {.type = pTmq->resetOffsetCfg}; @@ -1203,7 +1254,7 @@ int32_t tmq_subscribe(tmq_t* tmq, const tmq_list_t* topic_list) { tscInfo("consumer:0x%" PRIx64 " cgroup:%s, subscribe %d topics", tmq->consumerId, tmq->groupId, sz); req.consumerId = tmq->consumerId; - tstrncpy(req.clientId, tmq->clientId, 256); + tstrncpy(req.clientId, tmq->clientId, TSDB_CLIENT_ID_LEN); tstrncpy(req.cgroup, tmq->groupId, TSDB_CGROUP_LEN); req.topicNames = taosArrayInit(sz, sizeof(void*)); @@ -1215,6 +1266,8 @@ int32_t tmq_subscribe(tmq_t* tmq, const tmq_list_t* topic_list) { req.withTbName = tmq->withTbName; req.autoCommit = tmq->autoCommit; req.autoCommitInterval = tmq->autoCommitInterval; + req.sessionTimeoutMs = tmq->sessionTimeoutMs; + req.maxPollIntervalMs = tmq->maxPollIntervalMs; req.resetOffsetCfg = tmq->resetOffsetCfg; req.enableReplay = tmq->replayEnable; req.enableBatchMeta = tmq->enableBatchMeta; @@ -2207,6 +2260,8 @@ TAOS_RES* tmq_consumer_poll(tmq_t* tmq, int64_t timeout) { } } + atomic_val_compare_exchange_8(&pollFlag, 0, 1); + while (1) { tmqHandleAllDelayedTask(tmq); diff --git a/source/common/src/systable.c b/source/common/src/systable.c index 0c0073b4a7..2d69a687a6 100644 --- a/source/common/src/systable.c +++ b/source/common/src/systable.c @@ -482,16 +482,16 @@ static const SSysDbTableSchema connectionsSchema[] = { static const SSysDbTableSchema consumerSchema[] = { - {.name = "consumer_id", .bytes = 32, .type = TSDB_DATA_TYPE_BINARY, .sysInfo = false}, - {.name = "consumer_group", .bytes = SYSTABLE_SCH_TABLE_NAME_LEN, .type = TSDB_DATA_TYPE_BINARY, .sysInfo = false}, - {.name = "client_id", .bytes = SYSTABLE_SCH_TABLE_NAME_LEN, .type = TSDB_DATA_TYPE_BINARY, .sysInfo = false}, + {.name = "consumer_id", .bytes = TSDB_CONSUMER_ID_LEN + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_BINARY, .sysInfo = false}, + {.name = "consumer_group", .bytes = TSDB_CGROUP_LEN + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_BINARY, .sysInfo = false}, + {.name = "client_id", .bytes = TSDB_CLIENT_ID_LEN + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_BINARY, .sysInfo = false}, {.name = "status", .bytes = 20 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_BINARY, .sysInfo = false}, {.name = "topics", .bytes = TSDB_TOPIC_FNAME_LEN + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_BINARY, .sysInfo = false}, /*{.name = "end_point", .bytes = TSDB_IPv4ADDR_LEN + 6 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false},*/ {.name = "up_time", .bytes = 8, .type = TSDB_DATA_TYPE_TIMESTAMP, .sysInfo = false}, {.name = "subscribe_time", .bytes = 8, .type = TSDB_DATA_TYPE_TIMESTAMP, .sysInfo = false}, {.name = "rebalance_time", .bytes = 8, .type = TSDB_DATA_TYPE_TIMESTAMP, .sysInfo = false}, - {.name = "parameters", .bytes = 64 + TSDB_OFFSET_LEN + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_BINARY, .sysInfo = false}, + {.name = "parameters", .bytes = 128 + TSDB_OFFSET_LEN + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_BINARY, .sysInfo = false}, }; static const SSysDbTableSchema offsetSchema[] = { diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index 10719674f5..3612e6553c 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -7002,6 +7002,7 @@ int32_t tSerializeSMqHbReq(void *buf, int32_t bufLen, SMqHbReq *pReq) { } } + if (tEncodeI8(&encoder, pReq->pollFlag) < 0) return -1; tEndEncode(&encoder); int32_t tlen = encoder.pos; @@ -7041,6 +7042,9 @@ int32_t tDeserializeSMqHbReq(void *buf, int32_t bufLen, SMqHbReq *pReq) { } } } + if (!tDecodeIsEnd(&decoder)) { + if (tDecodeI8(&decoder, &pReq->pollFlag) < 0) return -1; + } tEndDecode(&decoder); tDecoderClear(&decoder); diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index 089c4a10b3..b700c440a5 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -596,11 +596,12 @@ typedef struct { typedef struct { int64_t consumerId; char cgroup[TSDB_CGROUP_LEN]; - char clientId[256]; + char clientId[TSDB_CLIENT_ID_LEN]; int8_t updateType; // used only for update int32_t epoch; int32_t status; int32_t hbStatus; // hbStatus is not applicable to serialization + int32_t pollStatus; // pollStatus is not applicable to serialization SRWLatch lock; // lock is used for topics update SArray* currentTopics; // SArray SArray* rebNewTopics; // SArray @@ -620,6 +621,8 @@ typedef struct { int8_t autoCommit; int32_t autoCommitInterval; int32_t resetOffsetCfg; + int32_t sessionTimeoutMs; + int32_t maxPollIntervalMs; } SMqConsumerObj; SMqConsumerObj *tNewSMqConsumerObj(int64_t consumerId, char *cgroup, int8_t updateType, char *topic, SCMSubscribeReq *subscribe); diff --git a/source/dnode/mnode/impl/src/mndConsumer.c b/source/dnode/mnode/impl/src/mndConsumer.c index 9a7a8155ec..c37739252c 100644 --- a/source/dnode/mnode/impl/src/mndConsumer.c +++ b/source/dnode/mnode/impl/src/mndConsumer.c @@ -25,7 +25,7 @@ #include "tcompare.h" #include "tname.h" -#define MND_CONSUMER_VER_NUMBER 2 +#define MND_CONSUMER_VER_NUMBER 3 #define MND_CONSUMER_RESERVE_SIZE 64 #define MND_MAX_GROUP_PER_TOPIC 100 @@ -40,7 +40,7 @@ static int32_t mndProcessSubscribeReq(SRpcMsg *pMsg); static int32_t mndProcessAskEpReq(SRpcMsg *pMsg); static int32_t mndProcessMqHbReq(SRpcMsg *pMsg); static int32_t mndProcessConsumerClearMsg(SRpcMsg *pMsg); -static int32_t mndProcessConsumerRecoverMsg(SRpcMsg *pMsg); +//static int32_t mndProcessConsumerRecoverMsg(SRpcMsg *pMsg); int32_t mndInitConsumer(SMnode *pMnode) { SSdbTable table = { @@ -57,7 +57,7 @@ int32_t mndInitConsumer(SMnode *pMnode) { mndSetMsgHandle(pMnode, TDMT_MND_TMQ_HB, mndProcessMqHbReq); mndSetMsgHandle(pMnode, TDMT_MND_TMQ_ASK_EP, mndProcessAskEpReq); // mndSetMsgHandle(pMnode, TDMT_MND_TMQ_TIMER, mndProcessMqTimerMsg); - mndSetMsgHandle(pMnode, TDMT_MND_TMQ_CONSUMER_RECOVER, mndProcessConsumerRecoverMsg); +// mndSetMsgHandle(pMnode, TDMT_MND_TMQ_CONSUMER_RECOVER, mndProcessConsumerRecoverMsg); mndSetMsgHandle(pMnode, TDMT_MND_TMQ_LOST_CONSUMER_CLEAR, mndProcessConsumerClearMsg); mndAddShowRetrieveHandle(pMnode, TSDB_MGMT_TABLE_CONSUMERS, mndRetrieveConsumer); @@ -144,56 +144,56 @@ FAILED: return code; } -static int32_t mndProcessConsumerRecoverMsg(SRpcMsg *pMsg) { - int32_t code = 0; - SMnode *pMnode = pMsg->info.node; - SMqConsumerRecoverMsg *pRecoverMsg = pMsg->pCont; - SMqConsumerObj *pConsumerNew = NULL; - STrans *pTrans = NULL; - SMqConsumerObj *pConsumer = mndAcquireConsumer(pMnode, pRecoverMsg->consumerId); - if (pConsumer == NULL) { - mError("cannot find consumer %" PRId64 " when processing consumer recover msg", pRecoverMsg->consumerId); - code = -1; - goto END; - } - - mInfo("receive consumer recover msg, consumer:0x%" PRIx64 " status:%d(%s)", pRecoverMsg->consumerId, - pConsumer->status, mndConsumerStatusName(pConsumer->status)); - - if (pConsumer->status != MQ_CONSUMER_STATUS_LOST) { - terrno = TSDB_CODE_MND_CONSUMER_NOT_READY; - code = -1; - goto END; - } - - pConsumerNew = tNewSMqConsumerObj(pConsumer->consumerId, pConsumer->cgroup, CONSUMER_UPDATE_REC, NULL, NULL); - if (pConsumerNew == NULL){ - code = -1; - goto END; - } - - pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_NOTHING, pMsg, "recover-csm"); - if (pTrans == NULL) { - code = -1; - goto END; - } - code = validateTopics(pConsumer->assignedTopics, pMnode, pMsg->info.conn.user, false); - if (code != 0) { - goto END; - } - - code = mndSetConsumerCommitLogs(pTrans, pConsumerNew); - if (code != 0) { - goto END; - } - - code = mndTransPrepare(pMnode, pTrans); -END: - mndReleaseConsumer(pMnode, pConsumer); - tDeleteSMqConsumerObj(pConsumerNew); - mndTransDrop(pTrans); - return code; -} +//static int32_t mndProcessConsumerRecoverMsg(SRpcMsg *pMsg) { +// int32_t code = 0; +// SMnode *pMnode = pMsg->info.node; +// SMqConsumerRecoverMsg *pRecoverMsg = pMsg->pCont; +// SMqConsumerObj *pConsumerNew = NULL; +// STrans *pTrans = NULL; +// SMqConsumerObj *pConsumer = mndAcquireConsumer(pMnode, pRecoverMsg->consumerId); +// if (pConsumer == NULL) { +// mError("cannot find consumer %" PRId64 " when processing consumer recover msg", pRecoverMsg->consumerId); +// code = -1; +// goto END; +// } +// +// mInfo("receive consumer recover msg, consumer:0x%" PRIx64 " status:%d(%s)", pRecoverMsg->consumerId, +// pConsumer->status, mndConsumerStatusName(pConsumer->status)); +// +// if (pConsumer->status != MQ_CONSUMER_STATUS_LOST) { +// terrno = TSDB_CODE_MND_CONSUMER_NOT_READY; +// code = -1; +// goto END; +// } +// +// pConsumerNew = tNewSMqConsumerObj(pConsumer->consumerId, pConsumer->cgroup, CONSUMER_UPDATE_REC, NULL, NULL); +// if (pConsumerNew == NULL){ +// code = -1; +// goto END; +// } +// +// pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_NOTHING, pMsg, "recover-csm"); +// if (pTrans == NULL) { +// code = -1; +// goto END; +// } +// code = validateTopics(pConsumer->assignedTopics, pMnode, pMsg->info.conn.user, false); +// if (code != 0) { +// goto END; +// } +// +// code = mndSetConsumerCommitLogs(pTrans, pConsumerNew); +// if (code != 0) { +// goto END; +// } +// +// code = mndTransPrepare(pMnode, pTrans); +//END: +// mndReleaseConsumer(pMnode, pConsumer); +// tDeleteSMqConsumerObj(pConsumerNew); +// mndTransDrop(pTrans); +// return code; +//} static int32_t mndProcessConsumerClearMsg(SRpcMsg *pMsg) { int32_t code = 0; @@ -328,13 +328,15 @@ static int32_t mndProcessMqHbReq(SRpcMsg *pMsg) { } atomic_store_32(&pConsumer->hbStatus, 0); - - int32_t status = atomic_load_32(&pConsumer->status); - - if (status == MQ_CONSUMER_STATUS_LOST) { - mInfo("try to recover consumer:0x%" PRIx64, consumerId); - mndSendConsumerMsg(pMnode, pConsumer->consumerId, TDMT_MND_TMQ_CONSUMER_RECOVER, &pMsg->info); + if (req.pollFlag == 1){ + atomic_store_32(&pConsumer->pollStatus, 0); } +// int32_t status = atomic_load_32(&pConsumer->status); +// +// if (status == MQ_CONSUMER_STATUS_LOST) { +// mInfo("try to recover consumer:0x%" PRIx64, consumerId); +// mndSendConsumerMsg(pMnode, pConsumer->consumerId, TDMT_MND_TMQ_CONSUMER_RECOVER, &pMsg->info); +// } storeOffsetRows(pMnode, &req, pConsumer); code = buildMqHbRsp(pMsg, &rsp); @@ -480,14 +482,12 @@ static int32_t mndProcessAskEpReq(SRpcMsg *pMsg) { goto END; } - atomic_store_32(&pConsumer->hbStatus, 0); - // 1. check consumer status int32_t status = atomic_load_32(&pConsumer->status); - - if (status == MQ_CONSUMER_STATUS_LOST) { - mndSendConsumerMsg(pMnode, pConsumer->consumerId, TDMT_MND_TMQ_CONSUMER_RECOVER, &pMsg->info); - } +// +// if (status == MQ_CONSUMER_STATUS_LOST) { +// mndSendConsumerMsg(pMnode, pConsumer->consumerId, TDMT_MND_TMQ_CONSUMER_RECOVER, &pMsg->info); +// } if (status != MQ_CONSUMER_STATUS_READY) { mInfo("consumer:0x%" PRIx64 " not ready, status: %s", consumerId, mndConsumerStatusName(status)); @@ -652,7 +652,7 @@ int32_t mndProcessSubscribeReq(SRpcMsg *pMsg) { int32_t code = 0; SCMSubscribeReq subscribe = {0}; - tDeserializeSCMSubscribeReq(msgStr, &subscribe); + tDeserializeSCMSubscribeReq(msgStr, &subscribe, pMsg->contLen); SMqConsumerObj *pConsumerNew = NULL; STrans *pTrans = NULL; @@ -806,17 +806,17 @@ static int32_t mndConsumerActionDelete(SSdb *pSdb, SMqConsumerObj *pConsumer) { return 0; } -static void updateConsumerStatus(SMqConsumerObj *pConsumer) { - int32_t status = pConsumer->status; - - if (taosArrayGetSize(pConsumer->rebNewTopics) == 0 && taosArrayGetSize(pConsumer->rebRemovedTopics) == 0) { - if (status == MQ_CONSUMER_STATUS_REBALANCE) { - pConsumer->status = MQ_CONSUMER_STATUS_READY; - } else if (status == MQ_CONSUMER_STATUS_READY && taosArrayGetSize(pConsumer->currentTopics) == 0) { - pConsumer->status = MQ_CONSUMER_STATUS_LOST; - } - } -} +//static void updateConsumerStatus(SMqConsumerObj *pConsumer) { +// int32_t status = pConsumer->status; +// +// if (taosArrayGetSize(pConsumer->rebNewTopics) == 0 && taosArrayGetSize(pConsumer->rebRemovedTopics) == 0) { +// if (status == MQ_CONSUMER_STATUS_REBALANCE) { +// pConsumer->status = MQ_CONSUMER_STATUS_READY; +// } else if (status == MQ_CONSUMER_STATUS_READY && taosArrayGetSize(pConsumer->currentTopics) == 0) { +// pConsumer->status = MQ_CONSUMER_STATUS_LOST; +// } +// } +//} // remove from topic list static void removeFromTopicList(SArray *topicList, const char *pTopic, int64_t consumerId, char *type) { @@ -863,14 +863,14 @@ static int32_t mndConsumerActionUpdate(SSdb *pSdb, SMqConsumerObj *pOldConsumer, pOldConsumer->subscribeTime = taosGetTimestampMs(); pOldConsumer->status = MQ_CONSUMER_STATUS_REBALANCE; mInfo("consumer:0x%" PRIx64 " subscribe update, modify existed consumer", pOldConsumer->consumerId); - } else if (pNewConsumer->updateType == CONSUMER_UPDATE_REC) { - int32_t sz = taosArrayGetSize(pOldConsumer->assignedTopics); - for (int32_t i = 0; i < sz; i++) { - char *topic = taosStrdup(taosArrayGetP(pOldConsumer->assignedTopics, i)); - taosArrayPush(pOldConsumer->rebNewTopics, &topic); - } - pOldConsumer->status = MQ_CONSUMER_STATUS_REBALANCE; - mInfo("consumer:0x%" PRIx64 " recover update", pOldConsumer->consumerId); +// } else if (pNewConsumer->updateType == CONSUMER_UPDATE_REC) { +// int32_t sz = taosArrayGetSize(pOldConsumer->assignedTopics); +// for (int32_t i = 0; i < sz; i++) { +// char *topic = taosStrdup(taosArrayGetP(pOldConsumer->assignedTopics, i)); +// taosArrayPush(pOldConsumer->rebNewTopics, &topic); +// } +// pOldConsumer->status = MQ_CONSUMER_STATUS_REBALANCE; +// mInfo("consumer:0x%" PRIx64 " recover update", pOldConsumer->consumerId); } else if (pNewConsumer->updateType == CONSUMER_UPDATE_REB) { atomic_add_fetch_32(&pOldConsumer->epoch, 1); @@ -889,7 +889,11 @@ static int32_t mndConsumerActionUpdate(SSdb *pSdb, SMqConsumerObj *pOldConsumer, } int32_t status = pOldConsumer->status; - updateConsumerStatus(pOldConsumer); +// updateConsumerStatus(pOldConsumer); + if (taosArrayGetSize(pOldConsumer->rebNewTopics) == 0 && taosArrayGetSize(pOldConsumer->rebRemovedTopics) == 0) { + pOldConsumer->status = MQ_CONSUMER_STATUS_READY; + } + pOldConsumer->rebalanceTime = taosGetTimestampMs(); atomic_add_fetch_32(&pOldConsumer->epoch, 1); @@ -906,7 +910,10 @@ static int32_t mndConsumerActionUpdate(SSdb *pSdb, SMqConsumerObj *pOldConsumer, removeFromTopicList(pOldConsumer->currentTopics, topic, pOldConsumer->consumerId, "current"); int32_t status = pOldConsumer->status; - updateConsumerStatus(pOldConsumer); +// updateConsumerStatus(pOldConsumer); + if (taosArrayGetSize(pOldConsumer->rebNewTopics) == 0 && taosArrayGetSize(pOldConsumer->rebRemovedTopics) == 0) { + pOldConsumer->status = MQ_CONSUMER_STATUS_READY; + } pOldConsumer->rebalanceTime = taosGetTimestampMs(); atomic_add_fetch_32(&pOldConsumer->epoch, 1); @@ -973,7 +980,7 @@ static int32_t mndRetrieveConsumer(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock * int32_t cols = 0; // consumer id - char consumerIdHex[32] = {0}; + char consumerIdHex[TSDB_CONSUMER_ID_LEN + VARSTR_HEADER_SIZE] = {0}; sprintf(varDataVal(consumerIdHex), "0x%" PRIx64, pConsumer->consumerId); varDataSetLen(consumerIdHex, strlen(varDataVal(consumerIdHex))); @@ -988,19 +995,20 @@ static int32_t mndRetrieveConsumer(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock * colDataSetVal(pColInfo, numOfRows, (const char *)cgroup, false); // client id - char clientId[256 + VARSTR_HEADER_SIZE] = {0}; + char clientId[TSDB_CLIENT_ID_LEN + VARSTR_HEADER_SIZE] = {0}; STR_TO_VARSTR(clientId, pConsumer->clientId); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataSetVal(pColInfo, numOfRows, (const char *)clientId, false); // status - char status[20 + VARSTR_HEADER_SIZE] = {0}; const char *pStatusName = mndConsumerStatusName(pConsumer->status); + char *status = taosMemoryCalloc(1, pShow->pMeta->pSchemas[cols].bytes); STR_TO_VARSTR(status, pStatusName); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataSetVal(pColInfo, numOfRows, (const char *)status, false); + taosMemoryFree(status); // one subscribed topic pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); @@ -1033,14 +1041,14 @@ static int32_t mndRetrieveConsumer(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock * STqOffsetVal pVal = {.type = pConsumer->resetOffsetCfg}; tFormatOffset(buf, TSDB_OFFSET_LEN, &pVal); - char parasStr[64 + TSDB_OFFSET_LEN + VARSTR_HEADER_SIZE] = {0}; - sprintf(varDataVal(parasStr), "tbname:%d,commit:%d,interval:%dms,reset:%s", pConsumer->withTbName, - pConsumer->autoCommit, pConsumer->autoCommitInterval, buf); + char *parasStr = taosMemoryCalloc(1, pShow->pMeta->pSchemas[cols].bytes); + sprintf(varDataVal(parasStr), "tbname:%d,commit:%d,interval:%dms,reset:%s,maxPoll:%d,timeout:%d", pConsumer->withTbName, + pConsumer->autoCommit, pConsumer->autoCommitInterval, buf, pConsumer->maxPollIntervalMs, pConsumer->sessionTimeoutMs); varDataSetLen(parasStr, strlen(varDataVal(parasStr))); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataSetVal(pColInfo, numOfRows, (const char *)parasStr, false); - + taosMemoryFree(parasStr); numOfRows++; } @@ -1063,8 +1071,8 @@ const char *mndConsumerStatusName(int status) { switch (status) { case MQ_CONSUMER_STATUS_READY: return "ready"; - case MQ_CONSUMER_STATUS_LOST: - return "lost"; +// case MQ_CONSUMER_STATUS_LOST: +// return "lost"; case MQ_CONSUMER_STATUS_REBALANCE: return "rebalancing"; default: diff --git a/source/dnode/mnode/impl/src/mndDef.c b/source/dnode/mnode/impl/src/mndDef.c index 5164557184..1373691cdd 100644 --- a/source/dnode/mnode/impl/src/mndDef.c +++ b/source/dnode/mnode/impl/src/mndDef.c @@ -266,6 +266,7 @@ SMqConsumerObj *tNewSMqConsumerObj(int64_t consumerId, char *cgroup, int8_t upda pConsumer->epoch = 0; pConsumer->status = MQ_CONSUMER_STATUS_REBALANCE; pConsumer->hbStatus = 0; + pConsumer->pollStatus = 0; taosInitRWLatch(&pConsumer->lock); pConsumer->createTime = taosGetTimestampMs(); @@ -294,6 +295,8 @@ SMqConsumerObj *tNewSMqConsumerObj(int64_t consumerId, char *cgroup, int8_t upda pConsumer->autoCommit = subscribe->autoCommit; pConsumer->autoCommitInterval = subscribe->autoCommitInterval; pConsumer->resetOffsetCfg = subscribe->resetOffsetCfg; + pConsumer->maxPollIntervalMs = subscribe->maxPollIntervalMs; + pConsumer->sessionTimeoutMs = subscribe->sessionTimeoutMs; pConsumer->rebNewTopics = taosArrayDup(subscribe->topicNames, topicNameDup); @@ -396,6 +399,8 @@ int32_t tEncodeSMqConsumerObj(void **buf, const SMqConsumerObj *pConsumer) { tlen += taosEncodeFixedI8(buf, pConsumer->autoCommit); tlen += taosEncodeFixedI32(buf, pConsumer->autoCommitInterval); tlen += taosEncodeFixedI32(buf, pConsumer->resetOffsetCfg); + tlen += taosEncodeFixedI32(buf, pConsumer->maxPollIntervalMs); + tlen += taosEncodeFixedI32(buf, pConsumer->sessionTimeoutMs); return tlen; } @@ -456,6 +461,14 @@ void *tDecodeSMqConsumerObj(const void *buf, SMqConsumerObj *pConsumer, int8_t s buf = taosDecodeFixedI32(buf, &pConsumer->autoCommitInterval); buf = taosDecodeFixedI32(buf, &pConsumer->resetOffsetCfg); } + if (sver > 2){ + buf = taosDecodeFixedI32(buf, &pConsumer->maxPollIntervalMs); + buf = taosDecodeFixedI32(buf, &pConsumer->sessionTimeoutMs); + } else{ + pConsumer->maxPollIntervalMs = DEFAULT_MAX_POLL_INTERVAL; + pConsumer->sessionTimeoutMs = DEFAULT_SESSION_TIMEOUT; + } + return (void *)buf; } diff --git a/source/dnode/mnode/impl/src/mndSubscribe.c b/source/dnode/mnode/impl/src/mndSubscribe.c index e2bedc258a..d2615d7b2f 100644 --- a/source/dnode/mnode/impl/src/mndSubscribe.c +++ b/source/dnode/mnode/impl/src/mndSubscribe.c @@ -27,8 +27,7 @@ #define MND_SUBSCRIBE_VER_NUMBER 3 #define MND_SUBSCRIBE_RESERVE_SIZE 64 -#define MND_CONSUMER_LOST_HB_CNT 6 -#define MND_CONSUMER_LOST_CLEAR_THRESHOLD 43200 +//#define MND_CONSUMER_LOST_HB_CNT 6 static int32_t mqRebInExecCnt = 0; @@ -234,7 +233,7 @@ static void processRemovedConsumers(SMqRebOutputObj *pOutput, SHashObj *pHash, c int32_t numOfRemoved = taosArrayGetSize(pInput->pRebInfo->removedConsumers); int32_t actualRemoved = 0; for (int32_t i = 0; i < numOfRemoved; i++) { - uint64_t consumerId = *(uint64_t *)taosArrayGet(pInput->pRebInfo->removedConsumers, i); + int64_t consumerId = *(int64_t *)taosArrayGet(pInput->pRebInfo->removedConsumers, i); SMqConsumerEp *pConsumerEp = taosHashGet(pOutput->pSub->consumerHash, &consumerId, sizeof(int64_t)); if (pConsumerEp == NULL) { continue; @@ -378,12 +377,10 @@ static int32_t processRemoveAddVgs(SMnode *pMnode, SMqRebOutputObj *pOutput) { } } - if (taosArrayGetSize(pOutput->pSub->unassignedVgs) == 0 && taosArrayGetSize(newVgs) != 0) { + if (taosArrayGetSize(newVgs) != 0) { taosArrayAddAll(pOutput->pSub->unassignedVgs, newVgs); mInfo("[rebalance] processRemoveAddVgs add new vg num:%d", (int)taosArrayGetSize(newVgs)); taosArrayDestroy(newVgs); - } else { - taosArrayDestroyP(newVgs, (FDelete)tDeleteSMqVgEp); } return totalVgNum; } @@ -678,7 +675,7 @@ static void freeRebalanceItem(void *param) { static void buildRebInfo(SHashObj *rebSubHash, SArray *topicList, int8_t type, char *group, int64_t consumerId) { int32_t topicNum = taosArrayGetSize(topicList); for (int32_t i = 0; i < topicNum; i++) { - char key[TSDB_SUBSCRIBE_KEY_LEN]; + char key[TSDB_SUBSCRIBE_KEY_LEN] = {0}; char *removedTopic = taosArrayGetP(topicList, i); mndMakeSubscribeKey(key, group, removedTopic); SMqRebInfo *pRebSub = mndGetOrCreateRebSub(rebSubHash, key); @@ -707,7 +704,7 @@ static void checkForVgroupSplit(SMnode *pMnode, SMqConsumerObj *pConsumer, SHash SMqVgEp *pVgEp = taosArrayGetP(pConsumerEp->vgs, j); SVgObj *pVgroup = mndAcquireVgroup(pMnode, pVgEp->vgId); if (!pVgroup) { - char key[TSDB_SUBSCRIBE_KEY_LEN]; + char key[TSDB_SUBSCRIBE_KEY_LEN] = {0}; mndMakeSubscribeKey(key, pConsumer->cgroup, topic); mndGetOrCreateRebSub(rebSubHash, key); mInfo("vnode splitted, vgId:%d rebalance will be triggered", pVgEp->vgId); @@ -733,27 +730,25 @@ static void mndCheckConsumer(SRpcMsg *pMsg, SHashObj *rebSubHash) { } int32_t hbStatus = atomic_add_fetch_32(&pConsumer->hbStatus, 1); + int32_t pollStatus = atomic_add_fetch_32(&pConsumer->pollStatus, 1); int32_t status = atomic_load_32(&pConsumer->status); mDebug("[rebalance] check for consumer:0x%" PRIx64 " status:%d(%s), sub-time:%" PRId64 ", createTime:%" PRId64 - ", hbstatus:%d", + ", hbstatus:%d, pollStatus:%d", pConsumer->consumerId, status, mndConsumerStatusName(status), pConsumer->subscribeTime, - pConsumer->createTime, hbStatus); + pConsumer->createTime, hbStatus, pollStatus); if (status == MQ_CONSUMER_STATUS_READY) { - if (taosArrayGetSize(pConsumer->assignedTopics) == 0) { // unsubscribe or close + if (taosArrayGetSize(pConsumer->currentTopics) == 0) { // unsubscribe or close mndSendConsumerMsg(pMnode, pConsumer->consumerId, TDMT_MND_TMQ_LOST_CONSUMER_CLEAR, &pMsg->info); - } else if (hbStatus > MND_CONSUMER_LOST_HB_CNT) { + } else if (hbStatus * tsMqRebalanceInterval * 1000 >= pConsumer->sessionTimeoutMs || + pollStatus * tsMqRebalanceInterval * 1000 >= pConsumer->maxPollIntervalMs) { taosRLockLatch(&pConsumer->lock); buildRebInfo(rebSubHash, pConsumer->currentTopics, 0, pConsumer->cgroup, pConsumer->consumerId); taosRUnLockLatch(&pConsumer->lock); } else { checkForVgroupSplit(pMnode, pConsumer, rebSubHash); } - } else if (status == MQ_CONSUMER_STATUS_LOST) { - if (hbStatus > MND_CONSUMER_LOST_CLEAR_THRESHOLD) { // clear consumer if lost a day - mndSendConsumerMsg(pMnode, pConsumer->consumerId, TDMT_MND_TMQ_LOST_CONSUMER_CLEAR, &pMsg->info); - } } else { taosRLockLatch(&pConsumer->lock); buildRebInfo(rebSubHash, pConsumer->rebNewTopics, 1, pConsumer->cgroup, pConsumer->consumerId); @@ -832,8 +827,8 @@ static int32_t buildRebOutput(SMnode *pMnode, SMqRebInputObj *rebInput, SMqRebOu if (pSub == NULL) { // split sub key and extract topic - char topic[TSDB_TOPIC_FNAME_LEN]; - char cgroup[TSDB_CGROUP_LEN]; + char topic[TSDB_TOPIC_FNAME_LEN] = {0}; + char cgroup[TSDB_CGROUP_LEN] = {0}; mndSplitSubscribeKey(key, topic, cgroup, true); SMqTopicObj *pTopic = mndAcquireTopic(pMnode, topic); @@ -878,7 +873,7 @@ static int32_t mndProcessRebalanceReq(SRpcMsg *pMsg) { SMnode *pMnode = pMsg->info.node; mDebug("[rebalance] start to process mq timer") - if (!mndRebTryStart()) { + if (!mndRebTryStart()) { mInfo("[rebalance] mq rebalance already in progress, do nothing") return code; } From 4ef2c3964968aa6baf49cb89fd607b6594402df6 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Thu, 11 Jul 2024 14:56:25 +0800 Subject: [PATCH 02/17] fix:[TS-4592]remove lost status --- include/common/tmsg.h | 2 +- source/client/src/clientEnv.c | 2 +- source/dnode/mnode/impl/inc/mndConsumer.h | 2 +- source/dnode/mnode/impl/src/mndSubscribe.c | 82 ++++++++++---------- source/dnode/mnode/impl/src/mndTopic.c | 90 +++++++++++----------- source/dnode/mnode/sdb/src/sdbFile.c | 2 + utils/test/c/tmq_taosx_ci.c | 1 + 7 files changed, 93 insertions(+), 88 deletions(-) diff --git a/include/common/tmsg.h b/include/common/tmsg.h index c812138282..be7accd0da 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -2773,7 +2773,7 @@ enum { }; #define DEFAULT_MAX_POLL_INTERVAL 3000000 -#define DEFAULT_SESSION_TIMEOUT 10000 +#define DEFAULT_SESSION_TIMEOUT 12000 typedef struct { char name[TSDB_TOPIC_FNAME_LEN]; // accout.topic diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index 3a821768f8..1336372ae2 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -127,7 +127,7 @@ static void generateWriteSlowLog(STscObj *pTscObj, SRequestObj *pRequest, int32_ cJSON_AddItemToObject(json, "error_info", cJSON_CreateString(tstrerror(pRequest->code))); cJSON_AddItemToObject(json, "type", cJSON_CreateNumber(reqType)); cJSON_AddItemToObject(json, "rows_num", cJSON_CreateNumber(pRequest->body.resInfo.numOfRows + pRequest->body.resInfo.totalRows)); - if(strlen(pRequest->sqlstr) > pTscObj->pAppInfo->monitorParas.tsSlowLogMaxLen){ + if(pRequest->sqlstr != NULL && strlen(pRequest->sqlstr) > pTscObj->pAppInfo->monitorParas.tsSlowLogMaxLen){ char tmp = pRequest->sqlstr[pTscObj->pAppInfo->monitorParas.tsSlowLogMaxLen]; pRequest->sqlstr[pTscObj->pAppInfo->monitorParas.tsSlowLogMaxLen] = '\0'; cJSON_AddItemToObject(json, "sql", cJSON_CreateString(pRequest->sqlstr)); diff --git a/source/dnode/mnode/impl/inc/mndConsumer.h b/source/dnode/mnode/impl/inc/mndConsumer.h index 5184ad0eca..7308343d1c 100644 --- a/source/dnode/mnode/impl/inc/mndConsumer.h +++ b/source/dnode/mnode/impl/inc/mndConsumer.h @@ -25,7 +25,7 @@ extern "C" { enum { MQ_CONSUMER_STATUS_REBALANCE = 1, MQ_CONSUMER_STATUS_READY, - MQ_CONSUMER_STATUS_LOST, +// MQ_CONSUMER_STATUS_LOST, }; int32_t mndInitConsumer(SMnode *pMnode); diff --git a/source/dnode/mnode/impl/src/mndSubscribe.c b/source/dnode/mnode/impl/src/mndSubscribe.c index d2615d7b2f..e03eee07a1 100644 --- a/source/dnode/mnode/impl/src/mndSubscribe.c +++ b/source/dnode/mnode/impl/src/mndSubscribe.c @@ -749,11 +749,13 @@ static void mndCheckConsumer(SRpcMsg *pMsg, SHashObj *rebSubHash) { } else { checkForVgroupSplit(pMnode, pConsumer, rebSubHash); } - } else { + } else if (status == MQ_CONSUMER_STATUS_REBALANCE) { taosRLockLatch(&pConsumer->lock); buildRebInfo(rebSubHash, pConsumer->rebNewTopics, 1, pConsumer->cgroup, pConsumer->consumerId); buildRebInfo(rebSubHash, pConsumer->rebRemovedTopics, 0, pConsumer->cgroup, pConsumer->consumerId); taosRUnLockLatch(&pConsumer->lock); + } else { + mndSendConsumerMsg(pMnode, pConsumer->consumerId, TDMT_MND_TMQ_LOST_CONSUMER_CLEAR, &pMsg->info); } mndReleaseConsumer(pMnode, pConsumer); @@ -974,41 +976,41 @@ END: return ret; } -static int32_t mndDropConsumerByGroup(SMnode *pMnode, STrans *pTrans, char *cgroup, char *topic) { - void *pIter = NULL; - SMqConsumerObj *pConsumer = NULL; - int ret = 0; - while (1) { - pIter = sdbFetch(pMnode->pSdb, SDB_CONSUMER, pIter, (void **)&pConsumer); - if (pIter == NULL) { - break; - } - - // drop consumer in lost status, other consumers not in lost status already deleted by rebalance - if (pConsumer->status != MQ_CONSUMER_STATUS_LOST || strcmp(cgroup, pConsumer->cgroup) != 0) { - sdbRelease(pMnode->pSdb, pConsumer); - continue; - } - int32_t sz = taosArrayGetSize(pConsumer->assignedTopics); - for (int32_t i = 0; i < sz; i++) { - char *name = taosArrayGetP(pConsumer->assignedTopics, i); - if (strcmp(topic, name) == 0) { - int32_t code = mndSetConsumerDropLogs(pTrans, pConsumer); - if (code != 0) { - ret = code; - goto END; - } - } - } - - sdbRelease(pMnode->pSdb, pConsumer); - } - -END: - sdbRelease(pMnode->pSdb, pConsumer); - sdbCancelFetch(pMnode->pSdb, pIter); - return ret; -} +//static int32_t mndDropConsumerByGroup(SMnode *pMnode, STrans *pTrans, char *cgroup, char *topic) { +// void *pIter = NULL; +// SMqConsumerObj *pConsumer = NULL; +// int ret = 0; +// while (1) { +// pIter = sdbFetch(pMnode->pSdb, SDB_CONSUMER, pIter, (void **)&pConsumer); +// if (pIter == NULL) { +// break; +// } +// +// // drop consumer in lost status, other consumers not in lost status already deleted by rebalance +// if (pConsumer->status != MQ_CONSUMER_STATUS_LOST || strcmp(cgroup, pConsumer->cgroup) != 0) { +// sdbRelease(pMnode->pSdb, pConsumer); +// continue; +// } +// int32_t sz = taosArrayGetSize(pConsumer->assignedTopics); +// for (int32_t i = 0; i < sz; i++) { +// char *name = taosArrayGetP(pConsumer->assignedTopics, i); +// if (strcmp(topic, name) == 0) { +// int32_t code = mndSetConsumerDropLogs(pTrans, pConsumer); +// if (code != 0) { +// ret = code; +// goto END; +// } +// } +// } +// +// sdbRelease(pMnode->pSdb, pConsumer); +// } +// +//END: +// sdbRelease(pMnode->pSdb, pConsumer); +// sdbCancelFetch(pMnode->pSdb, pIter); +// return ret; +//} static int32_t mndProcessDropCgroupReq(SRpcMsg *pMsg) { SMnode *pMnode = pMsg->info.node; @@ -1055,10 +1057,10 @@ static int32_t mndProcessDropCgroupReq(SRpcMsg *pMsg) { goto end; } - code = mndDropConsumerByGroup(pMnode, pTrans, dropReq.cgroup, dropReq.topic); - if (code != 0) { - goto end; - } +// code = mndDropConsumerByGroup(pMnode, pTrans, dropReq.cgroup, dropReq.topic); +// if (code != 0) { +// goto end; +// } code = sendDeleteSubToVnode(pMnode, pSub, pTrans); if (code != 0) { diff --git a/source/dnode/mnode/impl/src/mndTopic.c b/source/dnode/mnode/impl/src/mndTopic.c index bcb38a3902..9ca0fed08a 100644 --- a/source/dnode/mnode/impl/src/mndTopic.c +++ b/source/dnode/mnode/impl/src/mndTopic.c @@ -668,47 +668,47 @@ static bool checkTopic(SArray *topics, char *topicName){ return false; } -static int32_t mndDropConsumerByTopic(SMnode *pMnode, STrans *pTrans, char *topicName){ - int32_t code = 0; - SSdb *pSdb = pMnode->pSdb; - void *pIter = NULL; - SMqConsumerObj *pConsumer = NULL; - while (1) { - pIter = sdbFetch(pSdb, SDB_CONSUMER, pIter, (void **)&pConsumer); - if (pIter == NULL) { - break; - } - - bool found = checkTopic(pConsumer->assignedTopics, topicName); - if (found){ - if (pConsumer->status == MQ_CONSUMER_STATUS_LOST) { - code = mndSetConsumerDropLogs(pTrans, pConsumer); - if (code != 0) { - goto end; - } - sdbRelease(pSdb, pConsumer); - continue; - } - mError("topic:%s, failed to drop since subscribed by consumer:0x%" PRIx64 ", in consumer group %s", - topicName, pConsumer->consumerId, pConsumer->cgroup); - code = TSDB_CODE_MND_TOPIC_SUBSCRIBED; - goto end; - } - - if (checkTopic(pConsumer->rebNewTopics, topicName) || checkTopic(pConsumer->rebRemovedTopics, topicName)) { - code = TSDB_CODE_MND_TOPIC_SUBSCRIBED; - mError("topic:%s, failed to drop since subscribed by consumer:%" PRId64 ", in consumer group %s (reb new)", - topicName, pConsumer->consumerId, pConsumer->cgroup); - goto end; - } - sdbRelease(pSdb, pConsumer); - } - -end: - sdbRelease(pSdb, pConsumer); - sdbCancelFetch(pSdb, pIter); - return code; -} +//static int32_t mndDropConsumerByTopic(SMnode *pMnode, STrans *pTrans, char *topicName){ +// int32_t code = 0; +// SSdb *pSdb = pMnode->pSdb; +// void *pIter = NULL; +// SMqConsumerObj *pConsumer = NULL; +// while (1) { +// pIter = sdbFetch(pSdb, SDB_CONSUMER, pIter, (void **)&pConsumer); +// if (pIter == NULL) { +// break; +// } +// +// bool found = checkTopic(pConsumer->assignedTopics, topicName); +// if (found){ +// if (pConsumer->status == MQ_CONSUMER_STATUS_LOST) { +// code = mndSetConsumerDropLogs(pTrans, pConsumer); +// if (code != 0) { +// goto end; +// } +// sdbRelease(pSdb, pConsumer); +// continue; +// } +// mError("topic:%s, failed to drop since subscribed by consumer:0x%" PRIx64 ", in consumer group %s", +// topicName, pConsumer->consumerId, pConsumer->cgroup); +// code = TSDB_CODE_MND_TOPIC_SUBSCRIBED; +// goto end; +// } +// +// if (checkTopic(pConsumer->rebNewTopics, topicName) || checkTopic(pConsumer->rebRemovedTopics, topicName)) { +// code = TSDB_CODE_MND_TOPIC_SUBSCRIBED; +// mError("topic:%s, failed to drop since subscribed by consumer:%" PRId64 ", in consumer group %s (reb new)", +// topicName, pConsumer->consumerId, pConsumer->cgroup); +// goto end; +// } +// sdbRelease(pSdb, pConsumer); +// } +// +//end: +// sdbRelease(pSdb, pConsumer); +// sdbCancelFetch(pSdb, pIter); +// return code; +//} static int32_t mndDropCheckInfoByTopic(SMnode *pMnode, STrans *pTrans, SMqTopicObj *pTopic){ // broadcast to all vnode @@ -804,10 +804,10 @@ static int32_t mndProcessDropTopicReq(SRpcMsg *pReq) { goto end; } - code = mndDropConsumerByTopic(pMnode, pTrans, dropReq.name); - if (code != 0) { - goto end; - } +// code = mndDropConsumerByTopic(pMnode, pTrans, dropReq.name); +// if (code != 0) { +// goto end; +// } code = mndDropSubByTopic(pMnode, pTrans, dropReq.name); if (code < 0) { diff --git a/source/dnode/mnode/sdb/src/sdbFile.c b/source/dnode/mnode/sdb/src/sdbFile.c index d94650695c..ab928a4edc 100644 --- a/source/dnode/mnode/sdb/src/sdbFile.c +++ b/source/dnode/mnode/sdb/src/sdbFile.c @@ -339,6 +339,8 @@ static int32_t sdbReadFileImp(SSdb *pSdb) { code = sdbWriteWithoutFree(pSdb, pRaw); if (code != 0) { mError("failed to read sdb file:%s since %s", file, terrstr()); + code = sdbWriteWithoutFree(pSdb, pRaw); + goto _OVER; } } diff --git a/utils/test/c/tmq_taosx_ci.c b/utils/test/c/tmq_taosx_ci.c index 51d134a463..2f6a5fa59b 100644 --- a/utils/test/c/tmq_taosx_ci.c +++ b/utils/test/c/tmq_taosx_ci.c @@ -596,6 +596,7 @@ tmq_t* build_consumer() { tmq_conf_set(conf, "enable.auto.commit", "true"); tmq_conf_set(conf, "auto.offset.reset", "earliest"); tmq_conf_set(conf, "msg.consume.excluded", "1"); +// tmq_conf_set(conf, "max.poll.interval.ms", "20000"); if (g_conf.snapShot) { tmq_conf_set(conf, "experimental.snapshot.enable", "true"); From ea9819744eab2b5655bdade5f1f61d1c57e35c02 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Thu, 11 Jul 2024 15:08:59 +0800 Subject: [PATCH 03/17] fix:[TS-4592]remove lost status --- source/client/src/clientEnv.c | 2 +- source/dnode/mnode/sdb/src/sdbFile.c | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index 18907ede5d..ecfa1e3392 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -127,7 +127,7 @@ static void generateWriteSlowLog(STscObj *pTscObj, SRequestObj *pRequest, int32_ cJSON_AddItemToObject(json, "error_info", cJSON_CreateString(tstrerror(pRequest->code))); cJSON_AddItemToObject(json, "type", cJSON_CreateNumber(reqType)); cJSON_AddItemToObject(json, "rows_num", cJSON_CreateNumber(pRequest->body.resInfo.numOfRows + pRequest->body.resInfo.totalRows)); - if(pRequest->sqlstr != NULL && strlen(pRequest->sqlstr) > pTscObj->pAppInfo->monitorParas.tsSlowLogMaxLen){ + if(strlen(pRequest->sqlstr) > pTscObj->pAppInfo->monitorParas.tsSlowLogMaxLen){ char tmp = pRequest->sqlstr[pTscObj->pAppInfo->monitorParas.tsSlowLogMaxLen]; pRequest->sqlstr[pTscObj->pAppInfo->monitorParas.tsSlowLogMaxLen] = '\0'; cJSON_AddItemToObject(json, "sql", cJSON_CreateString(pRequest->sqlstr)); diff --git a/source/dnode/mnode/sdb/src/sdbFile.c b/source/dnode/mnode/sdb/src/sdbFile.c index ab928a4edc..d94650695c 100644 --- a/source/dnode/mnode/sdb/src/sdbFile.c +++ b/source/dnode/mnode/sdb/src/sdbFile.c @@ -339,8 +339,6 @@ static int32_t sdbReadFileImp(SSdb *pSdb) { code = sdbWriteWithoutFree(pSdb, pRaw); if (code != 0) { mError("failed to read sdb file:%s since %s", file, terrstr()); - code = sdbWriteWithoutFree(pSdb, pRaw); - goto _OVER; } } From 44027f7978f7780ba5371ccdead6f60de80277a2 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Tue, 30 Jul 2024 01:25:37 +0800 Subject: [PATCH 04/17] feat:[TS-4592]remove lost status for consumer --- include/common/tmsg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 4058805d02..5a6f13d1c7 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -2886,7 +2886,7 @@ static FORCE_INLINE int32_t tDeserializeSCMSubscribeReq(void* buf, SCMSubscribeR buf = taosDecodeFixedI8(buf, &pReq->resetOffsetCfg); buf = taosDecodeFixedI8(buf, &pReq->enableReplay); buf = taosDecodeFixedI8(buf, &pReq->enableBatchMeta); - if (buf - start < len) { + if ((char*)buf - (char*)start < len) { buf = taosDecodeFixedI32(buf, &pReq->sessionTimeoutMs); buf = taosDecodeFixedI32(buf, &pReq->maxPollIntervalMs); } else { From 1d426f40d2f719eb55e82ccb5a2ed57b3c10d5b9 Mon Sep 17 00:00:00 2001 From: dmchen Date: Tue, 30 Jul 2024 09:38:58 +0000 Subject: [PATCH 05/17] fix/TD-31140 --- source/dnode/mnode/impl/src/mndDb.c | 8 ++++---- source/dnode/mnode/impl/src/mndVgroup.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndDb.c b/source/dnode/mnode/impl/src/mndDb.c index a307e3557b..5a7831ac0e 100644 --- a/source/dnode/mnode/impl/src/mndDb.c +++ b/source/dnode/mnode/impl/src/mndDb.c @@ -791,12 +791,12 @@ static int32_t mndCreateDb(SMnode *pMnode, SRpcMsg *pReq, SCreateDbReq *pCreate, mndSetDefaultDbCfg(&dbObj.cfg); if ((code = mndCheckDbName(dbObj.name, pUser)) != 0) { - mError("db:%s, failed to create since %s", pCreate->db, terrstr()); + mError("db:%s, failed to create, check db name failed, since %s", pCreate->db, terrstr()); TAOS_RETURN(code); } if ((code = mndCheckDbCfg(pMnode, &dbObj.cfg)) != 0) { - mError("db:%s, failed to create since %s", pCreate->db, terrstr()); + mError("db:%s, failed to create, check db cfg failed, since %s", pCreate->db, terrstr()); TAOS_RETURN(code); } @@ -812,7 +812,7 @@ static int32_t mndCreateDb(SMnode *pMnode, SRpcMsg *pReq, SCreateDbReq *pCreate, SVgObj *pVgroups = NULL; if ((code = mndAllocVgroup(pMnode, &dbObj, &pVgroups)) != 0) { - mError("db:%s, failed to create since %s", pCreate->db, terrstr()); + mError("db:%s, failed to create, alloc vgroup failed, since %s", pCreate->db, terrstr()); TAOS_RETURN(code); } @@ -965,7 +965,7 @@ static int32_t mndProcessCreateDbReq(SRpcMsg *pReq) { TAOS_CHECK_GOTO(mndAcquireUser(pMnode, pReq->info.conn.user, &pUser), &lino, _OVER); - code = mndCreateDb(pMnode, pReq, &createReq, pUser); + TAOS_CHECK_GOTO(mndCreateDb(pMnode, pReq, &createReq, pUser), &lino, _OVER); if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS; SName name = {0}; diff --git a/source/dnode/mnode/impl/src/mndVgroup.c b/source/dnode/mnode/impl/src/mndVgroup.c index 57a7453eac..5cfc896a1c 100644 --- a/source/dnode/mnode/impl/src/mndVgroup.c +++ b/source/dnode/mnode/impl/src/mndVgroup.c @@ -877,7 +877,7 @@ int32_t mndAllocVgroup(SMnode *pMnode, SDbObj *pDb, SVgObj **ppVgroups) { pVgroup->dbUid = pDb->uid; pVgroup->replica = pDb->cfg.replications; - if (mndGetAvailableDnode(pMnode, pDb, pVgroup, pArray) != 0) { + if ((code = mndGetAvailableDnode(pMnode, pDb, pVgroup, pArray)) != 0) { goto _OVER; } From 9c2dae3613e5d45f770192ef7ca1277bff3ebded Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Tue, 30 Jul 2024 19:27:33 +0800 Subject: [PATCH 06/17] feat:[TS-4592]remove lost status for consumer --- source/dnode/mnode/impl/src/mndSubscribe.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndSubscribe.c b/source/dnode/mnode/impl/src/mndSubscribe.c index a74200472a..b2a866979c 100644 --- a/source/dnode/mnode/impl/src/mndSubscribe.c +++ b/source/dnode/mnode/impl/src/mndSubscribe.c @@ -362,13 +362,13 @@ static int32_t processRemoveAddVgs(SMnode *pMnode, SMqRebOutputObj *pOutput) { SMqConsumerEp *pConsumerEp = (SMqConsumerEp *)pIter; int32_t j = 0; while (j < taosArrayGetSize(pConsumerEp->vgs)) { - SMqVgEp *pVgEp = taosArrayGetP(pConsumerEp->vgs, j); - MND_TMQ_NULL_CHECK(pVgEp); + SMqVgEp *pVgEpTmp = taosArrayGetP(pConsumerEp->vgs, j); + MND_TMQ_NULL_CHECK(pVgEpTmp); bool find = false; for (int32_t k = 0; k < taosArrayGetSize(newVgs); k++) { SMqVgEp *pnewVgEp = taosArrayGetP(newVgs, k); MND_TMQ_NULL_CHECK(pnewVgEp); - if (pVgEp->vgId == pnewVgEp->vgId) { + if (pVgEpTmp->vgId == pnewVgEp->vgId) { tDeleteSMqVgEp(pnewVgEp); taosArrayRemove(newVgs, k); find = true; @@ -376,8 +376,8 @@ static int32_t processRemoveAddVgs(SMnode *pMnode, SMqRebOutputObj *pOutput) { } } if (!find) { - mInfo("[rebalance] processRemoveAddVgs old vgId:%d", pVgEp->vgId); - tDeleteSMqVgEp(pVgEp); + mInfo("[rebalance] processRemoveAddVgs old vgId:%d", pVgEpTmp->vgId); + tDeleteSMqVgEp(pVgEpTmp); taosArrayRemove(pConsumerEp->vgs, j); continue; } @@ -385,7 +385,7 @@ static int32_t processRemoveAddVgs(SMnode *pMnode, SMqRebOutputObj *pOutput) { } } - if (taosArrayGetSize(newVgs) != 0) { + if (taosArrayGetSize(pOutput->pSub->unassignedVgs) == 0 && taosArrayGetSize(newVgs) != 0) { MND_TMQ_NULL_CHECK(taosArrayAddAll(pOutput->pSub->unassignedVgs, newVgs)); mInfo("[rebalance] processRemoveAddVgs add new vg num:%d", (int)taosArrayGetSize(newVgs)); taosArrayDestroy(newVgs); From a81d8261e5b4159756c8d7f41c5e22a8d2fa320f Mon Sep 17 00:00:00 2001 From: sima Date: Wed, 31 Jul 2024 11:30:53 +0800 Subject: [PATCH 07/17] fix:[TD-31113] return 0.0 instead of -0.0 when using round() and ceil() --- source/libs/scalar/src/sclfunc.c | 4 ++-- tests/system-test/2-query/ceil.py | 5 ++++- tests/system-test/2-query/round.py | 5 ++++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/source/libs/scalar/src/sclfunc.c b/source/libs/scalar/src/sclfunc.c index 3e5471700c..23cc7324f0 100644 --- a/source/libs/scalar/src/sclfunc.c +++ b/source/libs/scalar/src/sclfunc.c @@ -262,7 +262,7 @@ static int32_t doScalarFunction(SScalarParam *pInput, int32_t inputNum, SScalarP colDataSetNULL(pOutputData, i); continue; } - out[i] = f1(in[i]); + out[i] = f1(in[i]) + 0; } break; } @@ -276,7 +276,7 @@ static int32_t doScalarFunction(SScalarParam *pInput, int32_t inputNum, SScalarP colDataSetNULL(pOutputData, i); continue; } - out[i] = d1(in[i]); + out[i] = d1(in[i]) + 0; } break; } diff --git a/tests/system-test/2-query/ceil.py b/tests/system-test/2-query/ceil.py index aabc716a74..e719d819d8 100644 --- a/tests/system-test/2-query/ceil.py +++ b/tests/system-test/2-query/ceil.py @@ -57,7 +57,7 @@ class TDTestCase: ( '2020-10-21 01:01:01.000', 1, 11111, 111, 11, 1.11, 11.11, 1, "binary1", "nchar1", now()+1a ) ( '2020-12-31 01:01:01.000', 2, 22222, 222, 22, 2.22, 22.22, 0, "binary2", "nchar2", now()+2a ) ( '2021-01-01 01:01:06.000', 3, 33333, 333, 33, 3.33, 33.33, 0, "binary3", "nchar3", now()+3a ) - ( '2021-05-07 01:01:10.000', 4, 44444, 444, 44, 4.44, 44.44, 1, "binary4", "nchar4", now()+4a ) + ( '2021-05-07 01:01:10.000', 4, 44444, 444, 44, -0.444, 44.44, 1, "binary4", "nchar4", now()+4a ) ( '2021-07-21 01:01:01.000', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL ) ( '2021-09-30 01:01:16.000', 5, 55555, 555, 55, 5.55, 55.55, 0, "binary5", "nchar5", now()+5a ) ( '2022-02-01 01:01:20.000', 6, 66666, 666, 66, 6.66, 66.66, 1, "binary6", "nchar6", now()+6a ) @@ -223,6 +223,9 @@ class TDTestCase: tdSql.checkData(3, 4, 33) tdSql.checkData(5, 5, None) + tdSql.query(f"select ceil(c5) from {dbname}.t1") + tdSql.checkData(4 , 0, 0) + self.check_result_auto( f"select c1, c2, c3 , c4, c5 from {dbname}.t1", f"select (c1), ceil(c2) ,ceil(c3), ceil(c4), ceil(c5) from {dbname}.t1") # used for sub table diff --git a/tests/system-test/2-query/round.py b/tests/system-test/2-query/round.py index d647f516ae..f87f234fa3 100644 --- a/tests/system-test/2-query/round.py +++ b/tests/system-test/2-query/round.py @@ -53,7 +53,7 @@ class TDTestCase: ( '2020-10-21 01:01:01.000', 1, 11111, 111, 11, 1.11, 11.11, 1, "binary1", "nchar1", now()+1a ) ( '2020-12-31 01:01:01.000', 2, 22222, 222, 22, 2.22, 22.22, 0, "binary2", "nchar2", now()+2a ) ( '2021-01-01 01:01:06.000', 3, 33333, 333, 33, 3.33, 33.33, 0, "binary3", "nchar3", now()+3a ) - ( '2021-05-07 01:01:10.000', 4, 44444, 444, 44, 4.44, 44.44, 1, "binary4", "nchar4", now()+4a ) + ( '2021-05-07 01:01:10.000', 4, 44444, 444, 44, -0.444, 44.44, 1, "binary4", "nchar4", now()+4a ) ( '2021-07-21 01:01:01.000', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL ) ( '2021-09-30 01:01:16.000', 5, 55555, 555, 55, 5.55, 55.55, 0, "binary5", "nchar5", now()+5a ) ( '2022-02-01 01:01:20.000', 6, 66666, 666, 66, 6.66, 66.66, 1, "binary6", "nchar6", now()+6a ) @@ -232,6 +232,9 @@ class TDTestCase: tdSql.checkData(3, 4, 33) tdSql.checkData(5, 5, None) + tdSql.query(f"select round(c5) from {dbname}.t1") + tdSql.checkData(4 , 0, 0) + self.check_result_auto( f"select c1, c2, c3 , c4, c5 from {dbname}.t1", f"select (c1), round(c2) ,round(c3), round(c4), round(c5) from {dbname}.t1") # used for sub table From 52b3e1be6f5d8e34ff654914d0f819bf5bc65459 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 31 Jul 2024 14:08:22 +0800 Subject: [PATCH 08/17] fix(stream): drop orphan tasks/reset task in trans done by write-queue. --- include/common/tmsg.h | 16 +- include/common/tmsgdef.h | 6 +- source/common/src/tmsg.c | 49 +- source/dnode/mnode/impl/inc/mndStream.h | 17 +- source/dnode/mnode/impl/src/mndStream.c | 535 ++++-------------- source/dnode/mnode/impl/src/mndStreamHb.c | 232 +++++--- source/dnode/mnode/impl/src/mndStreamUtil.c | 585 +++++++++++++++++++- source/libs/stream/src/streamTask.c | 1 + 8 files changed, 883 insertions(+), 558 deletions(-) diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 782b9a072d..4476eee447 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -1827,12 +1827,18 @@ typedef struct { int32_t tSerializeSMTimerMsg(void* buf, int32_t bufLen, SMTimerReq* pReq); // int32_t tDeserializeSMTimerMsg(void* buf, int32_t bufLen, SMTimerReq* pReq); -typedef struct { - int64_t tick; -} SMStreamTickReq; +typedef struct SOrphanTask { + int64_t streamId; + int32_t taskId; + int32_t nodeId; +} SOrphanTask; -int32_t tSerializeSMStreamTickMsg(void* buf, int32_t bufLen, SMStreamTickReq* pReq); -// int32_t tDeserializeSMStreamTickMsg(void* buf, int32_t bufLen, SMStreamTickReq* pReq); +typedef struct SMStreamDropOrphanMsg { + SArray* pList; // SArray +} SMStreamDropOrphanMsg; + +int32_t tSerializeDropOrphanTaskMsg(void* buf, int32_t bufLen, SMStreamDropOrphanMsg* pMsg); +int32_t tDeserializeDropOrphanTaskMsg(void* buf, int32_t bufLen, SMStreamDropOrphanMsg* pMsg); typedef struct { int32_t id; diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index 3515df3127..b73a15ebcc 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -224,9 +224,9 @@ TD_DEF_MSG_TYPE(TDMT_MND_RESTORE_DNODE, "restore-dnode", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_PAUSE_STREAM, "pause-stream", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_RESUME_STREAM, "resume-stream", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_MND_STREAM_CHECKPOINT_TIMER, "stream-checkpoint-tmr", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_MND_STREAM_CHECKPOINT_TIMER, "stream-checkpoint-tmr", NULL, NULL) // not used TD_DEF_MSG_TYPE(TDMT_MND_STREAM_BEGIN_CHECKPOINT, "stream-begin-checkpoint", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_MND_STREAM_CHECKPOINT_CANDIDITATE, "stream-checkpoint-remain", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_MND_STREAM_CHECKPOINT_CANDIDITATE, "stream-checkpoint-remain", NULL, NULL) // not used TD_DEF_MSG_TYPE(TDMT_MND_STREAM_NODECHANGE_CHECK, "stream-nodechange-check", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_TRIM_DB_TIMER, "trim-db-tmr", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_GRANT_NOTIFY, "grant-notify", NULL, NULL) @@ -251,6 +251,8 @@ TD_DEF_MSG_TYPE(TDMT_MND_STREAM_UPDATE_CHKPT_EVT, "stream-update-chkpt-evt", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_STREAM_CHKPT_REPORT, "stream-chkpt-report", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_STREAM_CONSEN_TIMER, "stream-consen-tmr", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_MND_STREAM_DROP_ORPHANTASKS, "stream-drop-orphan-tasks", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_MND_STREAM_TASK_RESET, "stream-reset-tasks", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_MAX_MSG, "mnd-max", NULL, NULL) TD_CLOSE_MSG_SEG(TDMT_END_MND_MSG) diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index 9e663d495c..23356ca7c1 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -5275,12 +5275,22 @@ int32_t tSerializeSMTimerMsg(void *buf, int32_t bufLen, SMTimerReq *pReq) { // return 0; // } -int32_t tSerializeSMStreamTickMsg(void *buf, int32_t bufLen, SMStreamTickReq *pReq) { +int32_t tSerializeDropOrphanTaskMsg(void* buf, int32_t bufLen, SMStreamDropOrphanMsg* pMsg) { SEncoder encoder = {0}; tEncoderInit(&encoder, buf, bufLen); if (tStartEncode(&encoder) < 0) return -1; - if (tEncodeI64(&encoder, pReq->tick) < 0) return -1; + + int32_t size = taosArrayGetSize(pMsg->pList); + if (tEncodeI32(&encoder, size) < 0) return -1; + + for (int32_t i = 0; i < size; i++) { + SOrphanTask *pTask = taosArrayGet(pMsg->pList, i); + if (tEncodeI64(&encoder, pTask->streamId) < 0) return -1; + if (tEncodeI32(&encoder, pTask->taskId) < 0) return -1; + if (tEncodeI32(&encoder, pTask->nodeId) < 0) return -1; + } + tEndEncode(&encoder); int32_t tlen = encoder.pos; @@ -5288,17 +5298,34 @@ int32_t tSerializeSMStreamTickMsg(void *buf, int32_t bufLen, SMStreamTickReq *pR return tlen; } -// int32_t tDeserializeSMStreamTickMsg(void *buf, int32_t bufLen, SMStreamTickReq *pReq) { -// SDecoder decoder = {0}; -// tDecoderInit(&decoder, buf, bufLen); +int32_t tDeserializeDropOrphanTaskMsg(void* buf, int32_t bufLen, SMStreamDropOrphanMsg* pMsg) { + SDecoder decoder = {0}; + tDecoderInit(&decoder, buf, bufLen); -// if (tStartDecode(&decoder) < 0) return -1; -// if (tDecodeI64(&decoder, &pReq->tick) < 0) return -1; -// tEndDecode(&decoder); + if (tStartDecode(&decoder) < 0) return -1; -// tDecoderClear(&decoder); -// return 0; -// } + int32_t num = 0; + if (tDecodeI32(&decoder, &num) < 0) return -1; + + if (num > 0) { + pMsg->pList = taosArrayInit(num, sizeof(SOrphanTask)); + if (NULL == pMsg->pList) return -1; + for (int32_t i = 0; i < num; ++i) { + SOrphanTask info = {0}; + if (tDecodeI64(&decoder, &info.streamId) < 0) return -1; + if (tDecodeI32(&decoder, &info.taskId) < 0) return -1; + if (tDecodeI32(&decoder, &info.nodeId) < 0) return -1; + + if (taosArrayPush(pMsg->pList, &info) == NULL) { + return -1; + } + } + } + + tEndDecode(&decoder); + tDecoderClear(&decoder); + return 0; +} int32_t tEncodeSReplica(SEncoder *pEncoder, SReplica *pReplica) { if (tEncodeI32(pEncoder, pReplica->id) < 0) return -1; diff --git a/source/dnode/mnode/impl/inc/mndStream.h b/source/dnode/mnode/impl/inc/mndStream.h index bd0d97e34d..d713de5158 100644 --- a/source/dnode/mnode/impl/inc/mndStream.h +++ b/source/dnode/mnode/impl/inc/mndStream.h @@ -52,6 +52,11 @@ typedef struct SStreamTransMgmt { SHashObj *pDBTrans; } SStreamTransMgmt; +typedef struct SStreamTaskResetMsg { + int64_t streamId; + int32_t transId; +} SStreamTaskResetMsg; + typedef struct SStreamExecInfo { bool initTaskList; SArray *pNodeList; @@ -63,6 +68,7 @@ typedef struct SStreamExecInfo { SHashObj *pTransferStateStreams; SHashObj *pChkptStreams; SHashObj *pStreamConsensus; + SArray *pKilledChkptTrans; // SArray } SStreamExecInfo; extern SStreamExecInfo execInfo; @@ -75,12 +81,6 @@ typedef struct SNodeEntry { int64_t hbTimestamp; // second } SNodeEntry; -typedef struct SOrphanTask { - int64_t streamId; - int32_t taskId; - int32_t nodeId; -} SOrphanTask; - typedef struct { SMsgHead head; } SMStreamReqCheckpointRsp, SMStreamUpdateChkptRsp, SMStreamReqConsensChkptRsp; @@ -152,6 +152,11 @@ void mndAddConsensusTasks(SCheckpointConsensusInfo *pInfo, const SRestoreChec void mndClearConsensusRspEntry(SCheckpointConsensusInfo *pInfo); int64_t mndClearConsensusCheckpointId(SHashObj* pHash, int64_t streamId); +int32_t setStreamAttrInResBlock(SStreamObj *pStream, SSDataBlock *pBlock, int32_t numOfRows); +int32_t setTaskAttrInResBlock(SStreamObj *pStream, SStreamTask *pTask, SSDataBlock *pBlock, int32_t numOfRows); + +int32_t mndProcessResetStatusReq(SRpcMsg *pReq); + #ifdef __cplusplus } #endif diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index e20529f4b6..297b747ea0 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -61,9 +61,7 @@ static int32_t mndProcessStreamReqCheckpoint(SRpcMsg *pReq); static int32_t mndProcessCheckpointReport(SRpcMsg *pReq); static int32_t mndProcessConsensusInTmr(SRpcMsg *pMsg); static void doSendQuickRsp(SRpcHandleInfo *pInfo, int32_t msgSize, int32_t vgId, int32_t code); - - -static int32_t setStreamAttrInResBlock(SStreamObj *pStream, SSDataBlock *pBlock, int32_t numOfRows); +static int32_t mndProcessDropOrphanTaskReq(SRpcMsg* pReq); static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pPrevNodeList, const SArray *pNodeList); @@ -121,6 +119,8 @@ int32_t mndInitStream(SMnode *pMnode) { mndSetMsgHandle(pMnode, TDMT_VND_STREAM_CHECK_POINT_SOURCE_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_MND_STREAM_BEGIN_CHECKPOINT, mndProcessStreamCheckpoint); + mndSetMsgHandle(pMnode, TDMT_MND_STREAM_DROP_ORPHANTASKS, mndProcessDropOrphanTaskReq); + mndSetMsgHandle(pMnode, TDMT_MND_STREAM_TASK_RESET, mndProcessResetStatusReq); mndSetMsgHandle(pMnode, TDMT_MND_STREAM_REQ_CHKPT, mndProcessStreamReqCheckpoint); mndSetMsgHandle(pMnode, TDMT_MND_STREAM_CHKPT_REPORT, mndProcessCheckpointReport); mndSetMsgHandle(pMnode, TDMT_MND_STREAM_UPDATE_CHKPT_EVT, mndScanCheckpointReportInfo); @@ -154,6 +154,7 @@ int32_t mndInitStream(SMnode *pMnode) { void mndCleanupStream(SMnode *pMnode) { taosArrayDestroy(execInfo.pTaskList); taosArrayDestroy(execInfo.pNodeList); + taosArrayDestroy(execInfo.pKilledChkptTrans); taosHashCleanup(execInfo.pTaskMap); taosHashCleanup(execInfo.transMgmt.pDBTrans); taosHashCleanup(execInfo.pTransferStateStreams); @@ -271,38 +272,12 @@ void mndReleaseStream(SMnode *pMnode, SStreamObj *pStream) { sdbRelease(pSdb, pStream); } -static void mndShowStreamStatus(char *dst, SStreamObj *pStream) { - int8_t status = atomic_load_8(&pStream->status); - if (status == STREAM_STATUS__NORMAL) { - strcpy(dst, "ready"); - } else if (status == STREAM_STATUS__STOP) { - strcpy(dst, "stop"); - } else if (status == STREAM_STATUS__FAILED) { - strcpy(dst, "failed"); - } else if (status == STREAM_STATUS__RECOVER) { - strcpy(dst, "recover"); - } else if (status == STREAM_STATUS__PAUSE) { - strcpy(dst, "paused"); - } -} - SSdbRaw *mndStreamSeqActionEncode(SStreamObj *pStream) { return NULL; } SSdbRow *mndStreamSeqActionDecode(SSdbRaw *pRaw) { return NULL; } int32_t mndStreamSeqActionInsert(SSdb *pSdb, SStreamSeq *pStream) { return 0; } int32_t mndStreamSeqActionDelete(SSdb *pSdb, SStreamSeq *pStream) { return 0; } int32_t mndStreamSeqActionUpdate(SSdb *pSdb, SStreamSeq *pOldStream, SStreamSeq *pNewStream) { return 0; } -static void mndShowStreamTrigger(char *dst, SStreamObj *pStream) { - int8_t trigger = pStream->conf.trigger; - if (trigger == STREAM_TRIGGER_AT_ONCE) { - strcpy(dst, "at once"); - } else if (trigger == STREAM_TRIGGER_WINDOW_CLOSE) { - strcpy(dst, "window close"); - } else if (trigger == STREAM_TRIGGER_MAX_DELAY) { - strcpy(dst, "max delay"); - } -} - static int32_t mndCheckCreateStreamReq(SCMCreateStreamReq *pCreate) { if (pCreate->name[0] == 0 || pCreate->sql == NULL || pCreate->sql[0] == 0 || pCreate->sourceDB[0] == 0 || pCreate->targetStbFullName[0] == 0) { @@ -1365,8 +1340,8 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { SMDropStreamReq dropReq = {0}; if (tDeserializeSMDropStreamReq(pReq->pCont, pReq->contLen, &dropReq) < 0) { mError("invalid drop stream msg recv, discarded"); - terrno = TSDB_CODE_INVALID_MSG; - return -1; + code = TSDB_CODE_INVALID_MSG; + TAOS_RETURN(code); } mDebug("recv drop stream:%s msg", dropReq.name); @@ -1379,10 +1354,10 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { tFreeMDropStreamReq(&dropReq); return 0; } else { - terrno = TSDB_CODE_MND_STREAM_NOT_EXIST; + code = TSDB_CODE_MND_STREAM_NOT_EXIST; mError("stream:%s not exist failed to drop it", dropReq.name); tFreeMDropStreamReq(&dropReq); - return -1; + TAOS_RETURN(code); } } @@ -1399,11 +1374,11 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { sdbCancelFetch(pMnode->pSdb, pIter); tFreeMDropStreamReq(&dropReq); - terrno = TSDB_CODE_TSMA_MUST_BE_DROPPED; + code = TSDB_CODE_TSMA_MUST_BE_DROPPED; mError("try to drop sma-related stream:%s, uid:0x%" PRIx64 " code:%s only allowed to be dropped along with sma", dropReq.name, pStream->uid, tstrerror(terrno)); - return -1; + TAOS_RETURN(code); } if (pSma) { @@ -1425,7 +1400,7 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { if (conflict) { sdbRelease(pMnode->pSdb, pStream); tFreeMDropStreamReq(&dropReq); - return -1; + return terrno; } STrans *pTrans = NULL; @@ -1434,26 +1409,35 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { mError("stream:%s uid:0x%" PRIx64 " failed to drop since %s", dropReq.name, pStream->uid, terrstr()); sdbRelease(pMnode->pSdb, pStream); tFreeMDropStreamReq(&dropReq); - return -1; + TAOS_RETURN(code); } code = mndStreamRegisterTrans(pTrans, MND_STREAM_DROP_NAME, pStream->uid); - - // drop all tasks - if (mndStreamSetDropAction(pMnode, pTrans, pStream) < 0) { - mError("stream:%s uid:0x%" PRIx64 " failed to drop task since %s", dropReq.name, pStream->uid, terrstr()); + if (code) { + mError("failed to register drop stream trans, code:%s", tstrerror(code)); sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); tFreeMDropStreamReq(&dropReq); - return -1; + TAOS_RETURN(code); + } + + // drop all tasks + code = mndStreamSetDropAction(pMnode, pTrans, pStream); + if (code) { + mError("stream:%s uid:0x%" PRIx64 " failed to drop task since %s", dropReq.name, pStream->uid, tstrerror(code)); + sdbRelease(pMnode->pSdb, pStream); + mndTransDrop(pTrans); + tFreeMDropStreamReq(&dropReq); + TAOS_RETURN(code); } // drop stream - if (mndPersistTransLog(pStream, pTrans, SDB_STATUS_DROPPED) < 0) { + code = mndPersistTransLog(pStream, pTrans, SDB_STATUS_DROPPED); + if (code) { sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); tFreeMDropStreamReq(&dropReq); - return -1; + TAOS_RETURN(code); } code = mndTransPrepare(pMnode, pTrans); @@ -1462,7 +1446,7 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); tFreeMDropStreamReq(&dropReq); - return -1; + TAOS_RETURN(code); } // kill the related checkpoint trans @@ -1488,7 +1472,7 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { if (code == 0) { return TSDB_CODE_ACTION_IN_PROGRESS; } else { - return code; + TAOS_RETURN(code); } } @@ -1570,15 +1554,6 @@ int32_t mndGetNumOfStreams(SMnode *pMnode, char *dbName, int32_t *pNumOfStreams) return 0; } -static void int64ToHexStr(int64_t id, char *pBuf, int32_t bufLen) { - memset(pBuf, 0, bufLen); - pBuf[2] = '0'; - pBuf[3] = 'x'; - - int32_t len = tintToHex(id, &pBuf[4]); - varDataSetLen(pBuf, len + 2); -} - static int32_t mndRetrieveStream(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows) { SMnode *pMnode = pReq->info.node; SSdb *pSdb = pMnode->pSdb; @@ -1606,379 +1581,6 @@ static void mndCancelGetNextStream(SMnode *pMnode, void *pIter) { sdbCancelFetch(pSdb, pIter); } -int32_t setStreamAttrInResBlock(SStreamObj *pStream, SSDataBlock *pBlock, int32_t numOfRows) { - int32_t code = 0; - int32_t cols = 0; - int32_t lino = 0; - - char streamName[TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE] = {0}; - STR_WITH_MAXSIZE_TO_VARSTR(streamName, mndGetDbStr(pStream->name), sizeof(streamName)); - SColumnInfoData *pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - - code = colDataSetVal(pColInfo, numOfRows, (const char *)streamName, false); - TSDB_CHECK_CODE(code, lino, _end); - - // create time - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - code = colDataSetVal(pColInfo, numOfRows, (const char *)&pStream->createTime, false); - TSDB_CHECK_CODE(code, lino, _end); - - // stream id - char buf[128] = {0}; - int64ToHexStr(pStream->uid, buf, tListLen(buf)); - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - code = colDataSetVal(pColInfo, numOfRows, buf, false); - TSDB_CHECK_CODE(code, lino, _end); - - // related fill-history stream id - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - if (pStream->hTaskUid != 0) { - int64ToHexStr(pStream->hTaskUid, buf, tListLen(buf)); - code = colDataSetVal(pColInfo, numOfRows, buf, false); - } else { - code = colDataSetVal(pColInfo, numOfRows, buf, true); - } - TSDB_CHECK_CODE(code, lino, _end); - - // related fill-history stream id - char sql[TSDB_SHOW_SQL_LEN + VARSTR_HEADER_SIZE] = {0}; - STR_WITH_MAXSIZE_TO_VARSTR(sql, pStream->sql, sizeof(sql)); - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - code = colDataSetVal(pColInfo, numOfRows, (const char *)sql, false); - TSDB_CHECK_CODE(code, lino, _end); - - char status[20 + VARSTR_HEADER_SIZE] = {0}; - char status2[20] = {0}; - mndShowStreamStatus(status2, pStream); - STR_WITH_MAXSIZE_TO_VARSTR(status, status2, sizeof(status)); - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - code = colDataSetVal(pColInfo, numOfRows, (const char *)&status, false); - TSDB_CHECK_CODE(code, lino, _end); - - char sourceDB[TSDB_DB_NAME_LEN + VARSTR_HEADER_SIZE] = {0}; - STR_WITH_MAXSIZE_TO_VARSTR(sourceDB, mndGetDbStr(pStream->sourceDb), sizeof(sourceDB)); - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - code = colDataSetVal(pColInfo, numOfRows, (const char *)&sourceDB, false); - TSDB_CHECK_CODE(code, lino, _end); - - char targetDB[TSDB_DB_NAME_LEN + VARSTR_HEADER_SIZE] = {0}; - STR_WITH_MAXSIZE_TO_VARSTR(targetDB, mndGetDbStr(pStream->targetDb), sizeof(targetDB)); - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - code = colDataSetVal(pColInfo, numOfRows, (const char *)&targetDB, false); - TSDB_CHECK_CODE(code, lino, _end); - - if (pStream->targetSTbName[0] == 0) { - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - code = colDataSetVal(pColInfo, numOfRows, NULL, true); - } else { - char targetSTB[TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE] = {0}; - STR_WITH_MAXSIZE_TO_VARSTR(targetSTB, mndGetStbStr(pStream->targetSTbName), sizeof(targetSTB)); - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - code = colDataSetVal(pColInfo, numOfRows, (const char *)&targetSTB, false); - } - TSDB_CHECK_CODE(code, lino, _end); - - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - code = colDataSetVal(pColInfo, numOfRows, (const char *)&pStream->conf.watermark, false); - TSDB_CHECK_CODE(code, lino, _end); - - char trigger[20 + VARSTR_HEADER_SIZE] = {0}; - char trigger2[20] = {0}; - mndShowStreamTrigger(trigger2, pStream); - STR_WITH_MAXSIZE_TO_VARSTR(trigger, trigger2, sizeof(trigger)); - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - code = colDataSetVal(pColInfo, numOfRows, (const char *)&trigger, false); - TSDB_CHECK_CODE(code, lino, _end); - - // sink_quota - char sinkQuota[20 + VARSTR_HEADER_SIZE] = {0}; - sinkQuota[0] = '0'; - char dstStr[20] = {0}; - STR_TO_VARSTR(dstStr, sinkQuota) - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - code = colDataSetVal(pColInfo, numOfRows, (const char *)dstStr, false); - TSDB_CHECK_CODE(code, lino, _end); - - // checkpoint interval - char tmp[20 + VARSTR_HEADER_SIZE] = {0}; - sprintf(varDataVal(tmp), "%d sec", tsStreamCheckpointInterval); - varDataSetLen(tmp, strlen(varDataVal(tmp))); - - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - code = colDataSetVal(pColInfo, numOfRows, (const char *)tmp, false); - TSDB_CHECK_CODE(code, lino, _end); - - // checkpoint backup type - char backup[20 + VARSTR_HEADER_SIZE] = {0}; - STR_TO_VARSTR(backup, "none") - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - code = colDataSetVal(pColInfo, numOfRows, (const char *)backup, false); - TSDB_CHECK_CODE(code, lino, _end); - - // history scan idle - char scanHistoryIdle[20 + VARSTR_HEADER_SIZE] = {0}; - strcpy(scanHistoryIdle, "100a"); - - memset(dstStr, 0, tListLen(dstStr)); - STR_TO_VARSTR(dstStr, scanHistoryIdle) - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - code = colDataSetVal(pColInfo, numOfRows, (const char *)dstStr, false); - - _end: - return code; -} - -static int32_t setTaskAttrInResBlock(SStreamObj *pStream, SStreamTask *pTask, SSDataBlock *pBlock, int32_t numOfRows) { - SColumnInfoData *pColInfo; - int32_t cols = 0; - int32_t code = 0; - int32_t lino = 0; - - STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId}; - - STaskStatusEntry *pe = taosHashGet(execInfo.pTaskMap, &id, sizeof(id)); - if (pe == NULL) { - mError("task:0x%" PRIx64 " not exists in any vnodes, streamName:%s, streamId:0x%" PRIx64 " createTs:%" PRId64 - " no valid status/stage info", - id.taskId, pStream->name, pStream->uid, pStream->createTime); - return TSDB_CODE_STREAM_TASK_NOT_EXIST; - } - - // stream name - char streamName[TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE] = {0}; - STR_WITH_MAXSIZE_TO_VARSTR(streamName, mndGetDbStr(pStream->name), sizeof(streamName)); - - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - code = colDataSetVal(pColInfo, numOfRows, (const char *)streamName, false); - TSDB_CHECK_CODE(code, lino, _end); - - // task id - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - - char idstr[128] = {0}; - int64ToHexStr(pTask->id.taskId, idstr, tListLen(idstr)); - code = colDataSetVal(pColInfo, numOfRows, idstr, false); - TSDB_CHECK_CODE(code, lino, _end); - - // node type - char nodeType[20 + VARSTR_HEADER_SIZE] = {0}; - varDataSetLen(nodeType, 5); - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - if (pTask->info.nodeId > 0) { - memcpy(varDataVal(nodeType), "vnode", 5); - } else { - memcpy(varDataVal(nodeType), "snode", 5); - } - code = colDataSetVal(pColInfo, numOfRows, nodeType, false); - TSDB_CHECK_CODE(code, lino, _end); - - // node id - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - int64_t nodeId = TMAX(pTask->info.nodeId, 0); - code = colDataSetVal(pColInfo, numOfRows, (const char *)&nodeId, false); - TSDB_CHECK_CODE(code, lino, _end); - - // level - char level[20 + VARSTR_HEADER_SIZE] = {0}; - if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { - memcpy(varDataVal(level), "source", 6); - varDataSetLen(level, 6); - } else if (pTask->info.taskLevel == TASK_LEVEL__AGG) { - memcpy(varDataVal(level), "agg", 3); - varDataSetLen(level, 3); - } else if (pTask->info.taskLevel == TASK_LEVEL__SINK) { - memcpy(varDataVal(level), "sink", 4); - varDataSetLen(level, 4); - } - - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - code = colDataSetVal(pColInfo, numOfRows, (const char *)level, false); - TSDB_CHECK_CODE(code, lino, _end); - - // status - char status[20 + VARSTR_HEADER_SIZE] = {0}; - - const char *pStatus = streamTaskGetStatusStr(pe->status); - STR_TO_VARSTR(status, pStatus); - - // status - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - code = colDataSetVal(pColInfo, numOfRows, (const char *)status, false); - TSDB_CHECK_CODE(code, lino, _end); - - // stage - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - code = colDataSetVal(pColInfo, numOfRows, (const char *)&pe->stage, false); - TSDB_CHECK_CODE(code, lino, _end); - - // input queue - char vbuf[40] = {0}; - char buf[38] = {0}; - const char *queueInfoStr = "%4.2f MiB (%6.2f%)"; - snprintf(buf, tListLen(buf), queueInfoStr, pe->inputQUsed, pe->inputRate); - STR_TO_VARSTR(vbuf, buf); - - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - code = colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); - TSDB_CHECK_CODE(code, lino, _end); - - // input total - const char *formatTotalMb = "%7.2f MiB"; - const char *formatTotalGb = "%7.2f GiB"; - if (pe->procsTotal < 1024) { - snprintf(buf, tListLen(buf), formatTotalMb, pe->procsTotal); - } else { - snprintf(buf, tListLen(buf), formatTotalGb, pe->procsTotal / 1024); - } - - memset(vbuf, 0, tListLen(vbuf)); - STR_TO_VARSTR(vbuf, buf); - - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - code = colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); - TSDB_CHECK_CODE(code, lino, _end); - - // process throughput - const char *formatKb = "%7.2f KiB/s"; - const char *formatMb = "%7.2f MiB/s"; - if (pe->procsThroughput < 1024) { - snprintf(buf, tListLen(buf), formatKb, pe->procsThroughput); - } else { - snprintf(buf, tListLen(buf), formatMb, pe->procsThroughput / 1024); - } - - memset(vbuf, 0, tListLen(vbuf)); - STR_TO_VARSTR(vbuf, buf); - - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - code = colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); - TSDB_CHECK_CODE(code, lino, _end); - - // output total - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - - if (pTask->info.taskLevel == TASK_LEVEL__SINK) { - colDataSetNULL(pColInfo, numOfRows); - } else { - sprintf(buf, formatTotalMb, pe->outputTotal); - memset(vbuf, 0, tListLen(vbuf)); - STR_TO_VARSTR(vbuf, buf); - - code = colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); - TSDB_CHECK_CODE(code, lino, _end); - } - - // output throughput - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - - if (pTask->info.taskLevel == TASK_LEVEL__SINK) { - colDataSetNULL(pColInfo, numOfRows); - } else { - if (pe->outputThroughput < 1024) { - snprintf(buf, tListLen(buf), formatKb, pe->outputThroughput); - } else { - snprintf(buf, tListLen(buf), formatMb, pe->outputThroughput / 1024); - } - - memset(vbuf, 0, tListLen(vbuf)); - STR_TO_VARSTR(vbuf, buf); - - code = colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); - TSDB_CHECK_CODE(code, lino, _end); - } - - // output queue - // sprintf(buf, queueInfoStr, pe->outputQUsed, pe->outputRate); - // STR_TO_VARSTR(vbuf, buf); - - // pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - // colDataSetVal(pColInfo, numOfRows, (const char*)vbuf, false); - - // info - if (pTask->info.taskLevel == TASK_LEVEL__SINK) { - const char *sinkStr = "%.2f MiB"; - snprintf(buf, tListLen(buf), sinkStr, pe->sinkDataSize); - } else if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { - // offset info - const char *offsetStr = "%" PRId64 " [%" PRId64 ", %" PRId64 "]"; - snprintf(buf, tListLen(buf), offsetStr, pe->processedVer, pe->verRange.minVer, pe->verRange.maxVer); - } else { - memset(buf, 0, tListLen(buf)); - } - - STR_TO_VARSTR(vbuf, buf); - - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - code = colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); - TSDB_CHECK_CODE(code, lino, _end); - - // start_time - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - code = colDataSetVal(pColInfo, numOfRows, (const char *)&pe->startTime, false); - TSDB_CHECK_CODE(code, lino, _end); - - // start id - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - code = colDataSetVal(pColInfo, numOfRows, (const char *)&pe->startCheckpointId, false); - TSDB_CHECK_CODE(code, lino, _end); - - // start ver - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - code = colDataSetVal(pColInfo, numOfRows, (const char *)&pe->startCheckpointVer, false); - TSDB_CHECK_CODE(code, lino, _end); - - // checkpoint time - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - if (pe->checkpointInfo.latestTime != 0) { - code = colDataSetVal(pColInfo, numOfRows, (const char *)&pe->checkpointInfo.latestTime, false); - } else { - code = colDataSetVal(pColInfo, numOfRows, 0, true); - } - TSDB_CHECK_CODE(code, lino, _end); - - // checkpoint_id - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - code = colDataSetVal(pColInfo, numOfRows, (const char *)&pe->checkpointInfo.latestId, false); - TSDB_CHECK_CODE(code, lino, _end); - - // checkpoint version - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - code = colDataSetVal(pColInfo, numOfRows, (const char *)&pe->checkpointInfo.latestVer, false); - TSDB_CHECK_CODE(code, lino, _end); - - // checkpoint size - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataSetNULL(pColInfo, numOfRows); - - // checkpoint backup status - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - code = colDataSetVal(pColInfo, numOfRows, 0, true); - TSDB_CHECK_CODE(code, lino, _end); - - // ds_err_info - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - code = colDataSetVal(pColInfo, numOfRows, 0, true); - TSDB_CHECK_CODE(code, lino, _end); - - // history_task_id - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - if (pe->hTaskId != 0) { - int64ToHexStr(pe->hTaskId, idstr, tListLen(idstr)); - code = colDataSetVal(pColInfo, numOfRows, idstr, false); - } else { - code = colDataSetVal(pColInfo, numOfRows, 0, true); - } - TSDB_CHECK_CODE(code, lino, _end); - - // history_task_status - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - code = colDataSetVal(pColInfo, numOfRows, 0, true); - TSDB_CHECK_CODE(code, lino, _end); - - _end: - return code; -} - static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rowsCapacity) { SMnode *pMnode = pReq->info.node; SSdb *pSdb = pMnode->pSdb; @@ -3141,3 +2743,80 @@ int32_t mndCreateStreamChkptInfoUpdateTrans(SMnode *pMnode, SStreamObj *pStream, return TSDB_CODE_ACTION_IN_PROGRESS; } + +static int32_t mndProcessDropOrphanTaskReq(SRpcMsg *pReq) { + SMnode *pMnode = pReq->info.node; + int32_t code = 0; + SOrphanTask *pTask = NULL; + int32_t i = 0; + + SMStreamDropOrphanMsg msg = {0}; + code = tDeserializeDropOrphanTaskMsg(pReq->pCont, pReq->contLen, &msg); + if (code) { + return code; + } + + int32_t numOfTasks = taosArrayGetSize(msg.pList); + if (numOfTasks == 0) { + mDebug("no orphan tasks to drop, no need to create trans"); + return code; + } + + mDebug("create trans to drop %d orphan tasks", numOfTasks); + + i = 0; + while (i < numOfTasks && ((pTask = taosArrayGet(msg.pList, i)) == NULL)) { + i += 1; + } + + if (pTask == NULL) { + mError("failed to extract entry in drop orphan task list, not create trans to drop orphan-task"); + return TSDB_CODE_SUCCESS; + } + + // check if it is conflict with other trans in both sourceDb and targetDb. + bool conflict = mndStreamTransConflictCheck(pMnode, pTask->streamId, MND_STREAM_DROP_NAME, false); + if (conflict) { + return -1; + } + + SStreamObj dummyObj = {.uid = pTask->streamId, .sourceDb = "", .targetSTbName = ""}; + STrans *pTrans = NULL; + code = doCreateTrans(pMnode, &dummyObj, NULL, TRN_CONFLICT_NOTHING, MND_STREAM_DROP_NAME, "drop stream", &pTrans); + if (pTrans == NULL || code != 0) { + mError("failed to create trans to drop orphan tasks since %s", terrstr()); + return code; + } + + code = mndStreamRegisterTrans(pTrans, MND_STREAM_DROP_NAME, pTask->streamId); + if (code) { + return code; + } + + // drop all tasks + if ((code = mndStreamSetDropActionFromList(pMnode, pTrans, msg.pList)) < 0) { + mError("failed to create trans to drop orphan tasks since %s", terrstr()); + mndTransDrop(pTrans); + return code; + } + + // drop stream + if ((code = mndPersistTransLog(&dummyObj, pTrans, SDB_STATUS_DROPPED)) < 0) { + mndTransDrop(pTrans); + return code; + } + + code = mndTransPrepare(pMnode, pTrans); + if (code != TSDB_CODE_SUCCESS && code != TSDB_CODE_ACTION_IN_PROGRESS) { + mError("trans:%d, failed to prepare drop stream trans since %s", pTrans->id, terrstr()); + mndTransDrop(pTrans); + return code; + } + + if (code == TSDB_CODE_SUCCESS) { + mDebug("create drop %d orphan tasks trans succ", numOfTasks); + } + + mndTransDrop(pTrans); + return code; +} \ No newline at end of file diff --git a/source/dnode/mnode/impl/src/mndStreamHb.c b/source/dnode/mnode/impl/src/mndStreamHb.c index 507cafabe5..11556a212d 100644 --- a/source/dnode/mnode/impl/src/mndStreamHb.c +++ b/source/dnode/mnode/impl/src/mndStreamHb.c @@ -22,12 +22,12 @@ typedef struct SFailedCheckpointInfo { int32_t transId; } SFailedCheckpointInfo; -static void mndStreamStartUpdateCheckpointInfo(SMnode *pMnode); +static int32_t mndStreamSendUpdateChkptInfoMsg(SMnode *pMnode); +static int32_t mndSendDropOrphanTasksMsg(SMnode *pMnode, SArray *pList); +static int32_t mndSendResetFromCheckpointMsg(SMnode *pMnode, int64_t streamId, int32_t transId); static void updateStageInfo(STaskStatusEntry *pTaskEntry, int64_t stage); static void addIntoCheckpointList(SArray *pList, const SFailedCheckpointInfo *pInfo); -static int32_t mndResetStatusFromCheckpoint(SMnode *pMnode, int64_t streamId, int32_t transId); static int32_t setNodeEpsetExpiredFlag(const SArray *pNodeList); -static int32_t mndDropOrphanTasks(SMnode *pMnode, SArray *pList); static int32_t suspendAllStreams(SMnode *pMnode, SRpcHandleInfo *info); static bool validateHbMsg(const SArray *pNodeList, int32_t vgId); static void cleanupAfterProcessHbMsg(SStreamHbMsg *pReq, SArray *pFailedChkptList, SArray *pOrphanTasks); @@ -37,6 +37,10 @@ void updateStageInfo(STaskStatusEntry *pTaskEntry, int64_t stage) { int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeList); for (int32_t j = 0; j < numOfNodes; ++j) { SNodeEntry *pNodeEntry = taosArrayGet(execInfo.pNodeList, j); + if (pNodeEntry == NULL) { + continue; + } + if (pNodeEntry->nodeId == pTaskEntry->nodeId) { mInfo("vgId:%d stage updated from %" PRId64 " to %" PRId64 ", nodeUpdate trigger by s-task:0x%" PRIx64, pTaskEntry->nodeId, pTaskEntry->stage, stage, pTaskEntry->id.taskId); @@ -52,7 +56,7 @@ void addIntoCheckpointList(SArray *pList, const SFailedCheckpointInfo *pInfo) { int32_t num = taosArrayGetSize(pList); for (int32_t i = 0; i < num; ++i) { SFailedCheckpointInfo *p = taosArrayGet(pList, i); - if (p->transId == pInfo->transId) { + if (p && (p->transId == pInfo->transId)) { return; } } @@ -104,15 +108,110 @@ int32_t mndCreateStreamResetStatusTrans(SMnode *pMnode, SStreamObj *pStream) { sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); - return TSDB_CODE_ACTION_IN_PROGRESS; + return code; } -int32_t mndResetStatusFromCheckpoint(SMnode *pMnode, int64_t streamId, int32_t transId) { - int32_t code = TSDB_CODE_SUCCESS; - mndKillTransImpl(pMnode, transId, ""); +int32_t mndSendResetFromCheckpointMsg(SMnode *pMnode, int64_t streamId, int32_t transId) { + int32_t size = sizeof(SStreamTaskResetMsg); + int32_t num = taosArrayGetSize(execInfo.pKilledChkptTrans); + for(int32_t i = 0; i < num; ++i) { + SStreamTaskResetMsg* p = taosArrayGet(execInfo.pKilledChkptTrans, i); + if (p == NULL) { + continue; + } + + if (p->transId == transId && p->streamId == streamId) { + mDebug("already reset stream:0x%" PRIx64 ", not send reset-msg again for transId:%d", streamId, transId); + return TSDB_CODE_SUCCESS; + } + } + + if (num >= 10) { + taosArrayRemove(execInfo.pKilledChkptTrans, 0); // remove this first, append new reset trans in the tail + } + + SStreamTaskResetMsg p = {.streamId = streamId, .transId = transId}; + + void *px = taosArrayPush(execInfo.pKilledChkptTrans, &p); + if (px == NULL) { + mError("failed to push reset-msg trans:%d into the killed chkpt trans list, size:%d", transId, num - 1); + return terrno; + } + + SStreamTaskResetMsg *pReq = rpcMallocCont(size); + if (pReq == NULL) { + return terrno; + } + + pReq->streamId = streamId; + pReq->transId = transId; + + SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_TASK_RESET, .pCont = pReq, .contLen = size}; + int32_t code = tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); + if (code) { + mError("failed to put reset-task msg into write queue, code:%s", tstrerror(code)); + } else { + mDebug("send reset task status msg for transId:%d succ", transId); + } + + return code; +} + +int32_t mndStreamSendUpdateChkptInfoMsg(SMnode *pMnode) { // here reuse the doCheckpointmsg + int32_t size = sizeof(SMStreamDoCheckpointMsg); + void *pMsg = rpcMallocCont(size); + if (pMsg == NULL) { + return terrno; + } + + SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_UPDATE_CHKPT_EVT, .pCont = pMsg, .contLen = size}; + int32_t code = tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); + if (code) { + mError("failed to put update-checkpoint-info msg into write queue, code:%s", tstrerror(code)); + } else { + mDebug("send update checkpoint-info msg succ"); + } + + return code; +} + +int32_t mndSendDropOrphanTasksMsg(SMnode *pMnode, SArray *pList) { + SMStreamDropOrphanMsg msg = {.pList = pList}; + + int32_t num = taosArrayGetSize(pList); + int32_t contLen = tSerializeDropOrphanTaskMsg(NULL, 0, &msg); + if (contLen <= 0) { + return terrno; + } + + void *pReq = rpcMallocCont(contLen); + if (pReq == NULL) { + return terrno; + } + + (void)tSerializeDropOrphanTaskMsg(pReq, contLen, &msg); + + SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_DROP_ORPHANTASKS, .pCont = pReq, .contLen = contLen}; + int32_t code = tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); + if (code) { + mError("failed to put drop-orphan task msg into write queue, code:%s", tstrerror(code)); + } else { + mDebug("send drop %d orphan tasks msg succ", num); + } + + return code; +} + +int32_t mndProcessResetStatusReq(SRpcMsg *pReq) { + SMnode *pMnode = pReq->info.node; + int32_t code = TSDB_CODE_SUCCESS; SStreamObj *pStream = NULL; - code = mndGetStreamObj(pMnode, streamId, &pStream); + + SStreamTaskResetMsg* pMsg = pReq->pCont; + mndKillTransImpl(pMnode, pMsg->transId, ""); + + code = mndGetStreamObj(pMnode, pMsg->streamId, &pStream); if (pStream == NULL || code != 0) { code = TSDB_CODE_STREAM_TASK_NOT_EXIST; mError("failed to acquire the streamObj:0x%" PRIx64 " to reset checkpoint, may have been dropped", pStream->uid); @@ -123,7 +222,7 @@ int32_t mndResetStatusFromCheckpoint(SMnode *pMnode, int64_t streamId, int32_t t pStream->sourceDb, pStream->targetSTbName); } else { mDebug("stream:%s (0x%" PRIx64 ") reset checkpoint procedure, transId:%d, create reset trans", pStream->name, - pStream->uid, transId); + pStream->uid, pMsg->transId); code = mndCreateStreamResetStatusTrans(pMnode, pStream); } } @@ -138,6 +237,10 @@ int32_t setNodeEpsetExpiredFlag(const SArray *pNodeList) { for (int k = 0; k < num; ++k) { int32_t *pVgId = taosArrayGet(pNodeList, k); + if (pVgId == NULL) { + continue; + } + mInfo("set node expired for nodeId:%d, total:%d", *pVgId, num); bool setFlag = false; @@ -145,8 +248,7 @@ int32_t setNodeEpsetExpiredFlag(const SArray *pNodeList) { for (int i = 0; i < numOfNodes; ++i) { SNodeEntry *pNodeEntry = taosArrayGet(execInfo.pNodeList, i); - - if (pNodeEntry->nodeId == *pVgId) { + if ((pNodeEntry) && (pNodeEntry->nodeId == *pVgId)) { mInfo("vgId:%d expired for some stream tasks, needs update nodeEp", *pVgId); pNodeEntry->stageUpdated = true; setFlag = true; @@ -162,52 +264,6 @@ int32_t setNodeEpsetExpiredFlag(const SArray *pNodeList) { return TSDB_CODE_SUCCESS; } -int32_t mndDropOrphanTasks(SMnode *pMnode, SArray *pList) { - SOrphanTask *pTask = taosArrayGet(pList, 0); - - // check if it is conflict with other trans in both sourceDb and targetDb. - bool conflict = mndStreamTransConflictCheck(pMnode, pTask->streamId, MND_STREAM_DROP_NAME, false); - if (conflict) { - return -1; - } - - SStreamObj dummyObj = {.uid = pTask->streamId, .sourceDb = "", .targetSTbName = ""}; - STrans *pTrans = NULL; - int32_t code = - doCreateTrans(pMnode, &dummyObj, NULL, TRN_CONFLICT_NOTHING, MND_STREAM_DROP_NAME, "drop stream", &pTrans); - if (pTrans == NULL || code != 0) { - mError("failed to create trans to drop orphan tasks since %s", terrstr()); - return code; - } - - code = mndStreamRegisterTrans(pTrans, MND_STREAM_DROP_NAME, pTask->streamId); - if (code) { - return code; - } - // drop all tasks - if ((code = mndStreamSetDropActionFromList(pMnode, pTrans, pList)) < 0) { - mError("failed to create trans to drop orphan tasks since %s", terrstr()); - mndTransDrop(pTrans); - return code; - } - - // drop stream - if ((code = mndPersistTransLog(&dummyObj, pTrans, SDB_STATUS_DROPPED)) < 0) { - mndTransDrop(pTrans); - return code; - } - - code = mndTransPrepare(pMnode, pTrans); - if (code != TSDB_CODE_SUCCESS && code != TSDB_CODE_ACTION_IN_PROGRESS) { - mError("trans:%d, failed to prepare drop stream trans since %s", pTrans->id, terrstr()); - mndTransDrop(pTrans); - return code; - } - - mndTransDrop(pTrans); - return code; -} - int32_t suspendAllStreams(SMnode *pMnode, SRpcHandleInfo *info) { SSdb *pSdb = pMnode->pSdb; SStreamObj *pStream = NULL; @@ -259,7 +315,7 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { SStreamHbMsg req = {0}; SArray *pFailedChkpt = NULL; SArray *pOrphanTasks = NULL; - int32_t code = 0; + int32_t code = 0; if ((code = grantCheckExpire(TSDB_GRANT_STREAMS)) < 0) { if (suspendAllStreams(pMnode, &pReq->info) < 0) { @@ -273,8 +329,7 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { if (tDecodeStreamHbMsg(&decoder, &req) < 0) { tCleanupStreamHbMsg(&req); tDecoderClear(&decoder); - code = terrno = TSDB_CODE_INVALID_MSG; - return code; + TAOS_RETURN(TSDB_CODE_INVALID_MSG); } tDecoderClear(&decoder); @@ -282,6 +337,11 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { pFailedChkpt = taosArrayInit(4, sizeof(SFailedCheckpointInfo)); pOrphanTasks = taosArrayInit(4, sizeof(SOrphanTask)); + if (pFailedChkpt == NULL || pOrphanTasks == NULL) { + taosArrayDestroy(pFailedChkpt); + taosArrayDestroy(pOrphanTasks); + TAOS_RETURN(TSDB_CODE_OUT_OF_MEMORY); + } streamMutexLock(&execInfo.lock); @@ -289,12 +349,11 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { if (!validateHbMsg(execInfo.pNodeList, req.vgId)) { mError("vgId:%d not exists in nodeList buf, discarded", req.vgId); - code = terrno = TSDB_CODE_INVALID_MSG; doSendHbMsgRsp(terrno, &pReq->info, req.vgId, req.msgId); streamMutexUnlock(&execInfo.lock); cleanupAfterProcessHbMsg(&req, pFailedChkpt, pOrphanTasks); - return code; + TAOS_RETURN(TSDB_CODE_INVALID_MSG); } int32_t numOfUpdated = taosArrayGetSize(req.pUpdateNodes); @@ -306,15 +365,18 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { bool snodeChanged = false; for (int32_t i = 0; i < req.numOfTasks; ++i) { STaskStatusEntry *p = taosArrayGet(req.pTaskStatus, i); + if (p == NULL) { + continue; + } STaskStatusEntry *pTaskEntry = taosHashGet(execInfo.pTaskMap, &p->id, sizeof(p->id)); if (pTaskEntry == NULL) { - mError("s-task:0x%" PRIx64 " not found in mnode task list", p->id.taskId); + mError("s-task:0x%" PRIx64 " not found in mnode task list, added into orphan task list", p->id.taskId); SOrphanTask oTask = {.streamId = p->id.streamId, .taskId = p->id.taskId, .nodeId = p->nodeId}; void* px = taosArrayPush(pOrphanTasks, &oTask); if (px == NULL) { - mError("Failed to put task into list, taskId:0x%" PRIx64, p->id.taskId); + mError("failed to put task into list, taskId:0x%" PRIx64, p->id.taskId); } continue; } @@ -331,13 +393,12 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { SStreamObj *pStream = NULL; code = mndGetStreamObj(pMnode, p->id.streamId, &pStream); if (code) { - code = TSDB_CODE_STREAM_TASK_NOT_EXIST; continue; } int32_t numOfTasks = mndGetNumOfStreamTasks(pStream); - SCheckpointConsensusInfo *pInfo = NULL; + SCheckpointConsensusInfo *pInfo = NULL; code = mndGetConsensusInfo(execInfo.pStreamConsensus, p->id.streamId, numOfTasks, &pInfo); if (code == 0) { mndAddConsensusTasks(pInfo, &cp); @@ -357,7 +418,7 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { streamTaskStatusCopy(pTaskEntry, p); if ((pChkInfo->activeId != 0) && pChkInfo->failed) { - mError("stream task:0x%" PRIx64 " checkpointId:%" PRIx64 " transId:%d failed, kill it", p->id.taskId, + mError("stream task:0x%" PRIx64 " checkpointId:%" PRId64 " transId:%d failed, kill it", p->id.taskId, pChkInfo->activeId, pChkInfo->activeTransId); SFailedCheckpointInfo info = { @@ -372,13 +433,6 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { } } - if (p->status == pTaskEntry->status) { - pTaskEntry->statusLastDuration++; - } else { - pTaskEntry->status = p->status; - pTaskEntry->statusLastDuration = 0; - } - if (p->status != TASK_STATUS__READY) { mDebug("received s-task:0x%" PRIx64 " not in ready status:%s", p->id.taskId, streamTaskGetStatusStr(p->status)); } @@ -391,7 +445,6 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { if (pMnode != NULL) { SArray *p = NULL; - code = mndTakeVgroupSnapshot(pMnode, &allReady, &p); taosArrayDestroy(p); if (code) { @@ -405,10 +458,14 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { // if the execInfo.activeCheckpoint == 0, the checkpoint is restoring from wal for (int32_t i = 0; i < taosArrayGetSize(pFailedChkpt); ++i) { SFailedCheckpointInfo *pInfo = taosArrayGet(pFailedChkpt, i); + if (pInfo == NULL) { + continue; + } + mInfo("checkpointId:%" PRId64 " transId:%d failed, issue task-reset trans to reset all tasks status", pInfo->checkpointId, pInfo->transId); - code = mndResetStatusFromCheckpoint(pMnode, pInfo->streamUid, pInfo->transId); + code = mndSendResetFromCheckpointMsg(pMnode, pInfo->streamUid, pInfo->transId); if (code) { mError("failed to create reset task trans, code:%s", tstrerror(code)); } @@ -420,11 +477,14 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { // handle the orphan tasks that are invalid but not removed in some vnodes or snode due to some unknown errors. if (taosArrayGetSize(pOrphanTasks) > 0) { - code = mndDropOrphanTasks(pMnode, pOrphanTasks); + code = mndSendDropOrphanTasksMsg(pMnode, pOrphanTasks); + if (code) { + mError("failed to send drop orphan tasks msg, code:%s, try next time", tstrerror(code)); + } } if (pMnode != NULL) { // make sure that the unit test case can work - mndStreamStartUpdateCheckpointInfo(pMnode); + mndStreamSendUpdateChkptInfoMsg(pMnode); } streamMutexUnlock(&execInfo.lock); @@ -435,22 +495,10 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { return code; } -void mndStreamStartUpdateCheckpointInfo(SMnode *pMnode) { // here reuse the doCheckpointmsg - SMStreamDoCheckpointMsg *pMsg = rpcMallocCont(sizeof(SMStreamDoCheckpointMsg)); - if (pMsg != NULL) { - int32_t size = sizeof(SMStreamDoCheckpointMsg); - SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_UPDATE_CHKPT_EVT, .pCont = pMsg, .contLen = size}; - int32_t code = tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); - if (code) { - mError("failed to put into write Queue, code:%s", tstrerror(code)); - } - } -} - bool validateHbMsg(const SArray *pNodeList, int32_t vgId) { for (int32_t i = 0; i < taosArrayGetSize(pNodeList); ++i) { SNodeEntry *pEntry = taosArrayGet(pNodeList, i); - if (pEntry->nodeId == vgId) { + if ((pEntry) && (pEntry->nodeId == vgId)) { return true; } } diff --git a/source/dnode/mnode/impl/src/mndStreamUtil.c b/source/dnode/mnode/impl/src/mndStreamUtil.c index 548eb118c7..6640841e5a 100644 --- a/source/dnode/mnode/impl/src/mndStreamUtil.c +++ b/source/dnode/mnode/impl/src/mndStreamUtil.c @@ -17,6 +17,8 @@ #include "mndTrans.h" #include "tmisce.h" #include "mndVgroup.h" +#include "mndStb.h" +#include "mndDb.h" struct SStreamTaskIter { SStreamObj *pStream; @@ -31,7 +33,6 @@ int32_t doRemoveTasks(SStreamExecInfo *pExecNode, STaskId *pRemovedId); int32_t createStreamTaskIter(SStreamObj* pStream, SStreamTaskIter** pIter) { *pIter = taosMemoryCalloc(1, sizeof(SStreamTaskIter)); if (*pIter == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; return terrno; } @@ -96,6 +97,9 @@ int32_t mndTakeVgroupSnapshot(SMnode *pMnode, bool *allReady, SArray** pList) { *allReady = true; SArray *pVgroupList = taosArrayInit(4, sizeof(SNodeEntry)); + if (pVgroupList == NULL) { + return terrno; + } while (1) { pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup); @@ -511,6 +515,10 @@ static int32_t doSetDropActionFromId(SMnode *pMnode, STrans *pTrans, SOrphanTask int32_t mndStreamSetDropActionFromList(SMnode *pMnode, STrans *pTrans, SArray* pList) { for(int32_t i = 0; i < taosArrayGetSize(pList); ++i) { SOrphanTask* pTask = taosArrayGet(pList, i); + if (pTask == NULL) { + return terrno; + } + int32_t code = doSetDropActionFromId(pMnode, pTrans, pTask); if (code != 0) { return code; @@ -530,8 +538,8 @@ static void initNodeUpdateMsg(SStreamTaskNodeUpdateMsg *pMsg, const SVgroupChang pMsg->transId = transId; pMsg->pNodeList = taosArrayInit(taosArrayGetSize(pInfo->pUpdateNodeList), sizeof(SNodeUpdateInfo)); if (pMsg->pNodeList == NULL) { - mError("failed to prepare node list, code:out of memory"); - code = TSDB_CODE_OUT_OF_MEMORY; + mError("failed to prepare node list, code:%s", tstrerror(terrno)); + code = terrno; } if (code == 0) { @@ -561,7 +569,6 @@ static int32_t doBuildStreamTaskUpdateMsg(void **pBuf, int32_t *pLen, SVgroupCha void *buf = taosMemoryMalloc(tlen); if (buf == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; taosArrayDestroy(req.pNodeList); return terrno; } @@ -655,9 +662,8 @@ int32_t mndStreamSetUpdateEpsetAction(SMnode *pMnode, SStreamObj *pStream, SVgro static int32_t doSetResetAction(SMnode *pMnode, STrans *pTrans, SStreamTask *pTask) { SVResetStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVResetStreamTaskReq)); if (pReq == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; mError("failed to malloc in reset stream, size:%" PRIzu ", code:%s", sizeof(SVResetStreamTaskReq), - tstrerror(TSDB_CODE_OUT_OF_MEMORY)); + tstrerror(terrno)); return terrno; } @@ -734,6 +740,14 @@ int32_t mndInitExecInfo() { execInfo.pChkptStreams = taosHashInit(32, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, HASH_NO_LOCK); execInfo.pStreamConsensus = taosHashInit(32, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, HASH_NO_LOCK); execInfo.pNodeList = taosArrayInit(4, sizeof(SNodeEntry)); + execInfo.pKilledChkptTrans = taosArrayInit(4, sizeof(SStreamTaskResetMsg)); + + if (execInfo.pTaskList == NULL || execInfo.pTaskMap == NULL || execInfo.transMgmt.pDBTrans == NULL || + execInfo.pTransferStateStreams == NULL || execInfo.pChkptStreams == NULL || execInfo.pStreamConsensus == NULL || + execInfo.pNodeList == NULL || execInfo.pKilledChkptTrans == NULL) { + mError("failed to initialize the stream runtime env, code:%s", tstrerror(terrno)); + return terrno; + } taosHashSetFreeFp(execInfo.pTransferStateStreams, freeTaskList); taosHashSetFreeFp(execInfo.pChkptStreams, freeTaskList); @@ -743,14 +757,25 @@ int32_t mndInitExecInfo() { void removeExpiredNodeInfo(const SArray *pNodeSnapshot) { SArray *pValidList = taosArrayInit(4, sizeof(SNodeEntry)); + if (pValidList == NULL) { // not continue + return; + } + int32_t size = taosArrayGetSize(pNodeSnapshot); int32_t oldSize = taosArrayGetSize(execInfo.pNodeList); for (int32_t i = 0; i < oldSize; ++i) { SNodeEntry *p = taosArrayGet(execInfo.pNodeList, i); + if (p == NULL) { + continue; + } for (int32_t j = 0; j < size; ++j) { SNodeEntry *pEntry = taosArrayGet(pNodeSnapshot, j); + if (pEntry == NULL) { + continue; + } + if (pEntry->nodeId == p->nodeId) { void* px = taosArrayPush(pValidList, p); if (px == NULL) { @@ -781,6 +806,10 @@ int32_t doRemoveTasks(SStreamExecInfo *pExecNode, STaskId *pRemovedId) { for (int32_t k = 0; k < taosArrayGetSize(pExecNode->pTaskList); ++k) { STaskId *pId = taosArrayGet(pExecNode->pTaskList, k); + if (pId == NULL) { + continue; + } + if (pId->taskId == pRemovedId->taskId && pId->streamId == pRemovedId->streamId) { taosArrayRemove(pExecNode->pTaskList, k); @@ -796,6 +825,10 @@ int32_t doRemoveTasks(SStreamExecInfo *pExecNode, STaskId *pRemovedId) { void removeTasksInBuf(SArray *pTaskIds, SStreamExecInfo* pExecInfo) { for (int32_t i = 0; i < taosArrayGetSize(pTaskIds); ++i) { STaskId *pId = taosArrayGet(pTaskIds, i); + if (pId == NULL) { + continue; + } + int32_t code = doRemoveTasks(pExecInfo, pId); if (code) { mError("failed to remove task in buffer list, 0x%"PRIx64, pId->taskId); @@ -843,6 +876,10 @@ static bool taskNodeExists(SArray *pList, int32_t nodeId) { for (int32_t i = 0; i < num; ++i) { SNodeEntry *pEntry = taosArrayGet(pList, i); + if (pEntry == NULL) { + continue; + } + if (pEntry->nodeId == nodeId) { return true; } @@ -853,12 +890,22 @@ static bool taskNodeExists(SArray *pList, int32_t nodeId) { int32_t removeExpiredNodeEntryAndTaskInBuf(SArray *pNodeSnapshot) { SArray *pRemovedTasks = taosArrayInit(4, sizeof(STaskId)); + if (pRemovedTasks == NULL) { + return terrno; + } int32_t numOfTask = taosArrayGetSize(execInfo.pTaskList); for (int32_t i = 0; i < numOfTask; ++i) { STaskId *pId = taosArrayGet(execInfo.pTaskList, i); + if (pId == NULL) { + continue; + } STaskStatusEntry *pEntry = taosHashGet(execInfo.pTaskMap, pId, sizeof(*pId)); + if (pEntry == NULL) { + continue; + } + if (pEntry->nodeId == SNODE_HANDLE) { continue; } @@ -902,6 +949,10 @@ static int32_t doSetUpdateChkptAction(SMnode *pMnode, STrans *pTrans, SStreamTas int32_t size = taosArrayGetSize(*pReqTaskList); for(int32_t i = 0; i < size; ++i) { STaskChkptInfo* pInfo = taosArrayGet(*pReqTaskList, i); + if (pInfo == NULL) { + continue; + } + if (pInfo->taskId == pTask->id.taskId) { pReq->checkpointId = pInfo->checkpointId; pReq->checkpointVer = pInfo->version; @@ -965,8 +1016,11 @@ int32_t mndStreamSetUpdateChkptAction(SMnode *pMnode, STrans *pTrans, SStreamObj int32_t mndScanCheckpointReportInfo(SRpcMsg *pReq) { SMnode *pMnode = pReq->info.node; void *pIter = NULL; - SArray *pDropped = taosArrayInit(4, sizeof(int64_t)); int32_t code = 0; + SArray *pDropped = taosArrayInit(4, sizeof(int64_t)); + if (pDropped == NULL) { + return terrno; + } mDebug("start to scan checkpoint report info"); @@ -974,11 +1028,15 @@ int32_t mndScanCheckpointReportInfo(SRpcMsg *pReq) { SArray *pList = *(SArray **)pIter; STaskChkptInfo *pInfo = taosArrayGet(pList, 0); - SStreamObj *pStream = NULL; + if (pInfo == NULL) { + continue; + } + + SStreamObj *pStream = NULL; code = mndGetStreamObj(pMnode, pInfo->streamId, &pStream); if (pStream == NULL || code != 0) { mDebug("failed to acquire stream:0x%" PRIx64 " remove it from checkpoint-report list", pInfo->streamId); - void* p = taosArrayPush(pDropped, &pInfo->streamId); + void *p = taosArrayPush(pDropped, &pInfo->streamId); if (p == NULL) { mError("failed to put stream into drop list:0x%" PRIx64, pInfo->streamId); } @@ -1022,10 +1080,14 @@ int32_t mndScanCheckpointReportInfo(SRpcMsg *pReq) { int32_t size = taosArrayGetSize(pDropped); if (size > 0) { for (int32_t i = 0; i < size; ++i) { - int64_t streamId = *(int64_t *)taosArrayGet(pDropped, i); - code = taosHashRemove(execInfo.pChkptStreams, &streamId, sizeof(streamId)); + int64_t* pStreamId = (int64_t *)taosArrayGet(pDropped, i); + if (pStreamId == NULL) { + continue; + } + + code = taosHashRemove(execInfo.pChkptStreams, pStreamId, sizeof(*pStreamId)); if (code) { - mError("failed to remove stream in buf:0x%"PRIx64, streamId); + mError("failed to remove stream in buf:0x%"PRIx64, *pStreamId); } } @@ -1051,14 +1113,14 @@ static int32_t mndStreamSetChkptIdAction(SMnode *pMnode, STrans *pTrans, SStream int32_t blen; tEncodeSize(tEncodeRestoreCheckpointInfo, &req, blen, code); if (code < 0) { - return terrno = TSDB_CODE_OUT_OF_MEMORY; + return terrno; } int32_t tlen = sizeof(SMsgHead) + blen; void *pBuf = taosMemoryMalloc(tlen); if (pBuf == NULL) { - return terrno = TSDB_CODE_OUT_OF_MEMORY; + return terrno; } void *abuf = POINTER_SHIFT(pBuf, sizeof(SMsgHead)); @@ -1160,6 +1222,10 @@ int32_t mndGetConsensusInfo(SHashObj* pHash, int64_t streamId, int32_t numOfTask .streamId = streamId, }; + if (p.pTaskList == NULL) { + return terrno; + } + int32_t code = taosHashPut(pHash, &streamId, sizeof(streamId), &p, sizeof(p)); if (code == 0) { void *pChkptInfo = (SCheckpointConsensusInfo *)taosHashGet(pHash, &streamId, sizeof(streamId)); @@ -1167,6 +1233,7 @@ int32_t mndGetConsensusInfo(SHashObj* pHash, int64_t streamId, int32_t numOfTask } else { *pInfo = NULL; } + return code; } @@ -1178,6 +1245,10 @@ void mndAddConsensusTasks(SCheckpointConsensusInfo *pInfo, const SRestoreCheckpo for (int32_t i = 0; i < taosArrayGetSize(pInfo->pTaskList); ++i) { SCheckpointConsensusEntry *p = taosArrayGet(pInfo->pTaskList, i); + if (p == NULL) { + continue; + } + if (p->req.taskId == info.req.taskId) { mDebug("s-task:0x%x already in consensus-checkpointId list for stream:0x%" PRIx64 ", update ts %" PRId64 "->%" PRId64 " total existed:%d", @@ -1218,5 +1289,491 @@ int64_t mndClearConsensusCheckpointId(SHashObj* pHash, int64_t streamId) { mError("failed to remove stream:0x%"PRIx64" in consensus-checkpointId list, remain:%d", streamId, numOfStreams); } + return code; +} + +static void mndShowStreamStatus(char *dst, SStreamObj *pStream) { + int8_t status = atomic_load_8(&pStream->status); + if (status == STREAM_STATUS__NORMAL) { + strcpy(dst, "ready"); + } else if (status == STREAM_STATUS__STOP) { + strcpy(dst, "stop"); + } else if (status == STREAM_STATUS__FAILED) { + strcpy(dst, "failed"); + } else if (status == STREAM_STATUS__RECOVER) { + strcpy(dst, "recover"); + } else if (status == STREAM_STATUS__PAUSE) { + strcpy(dst, "paused"); + } +} + +static void mndShowStreamTrigger(char *dst, SStreamObj *pStream) { + int8_t trigger = pStream->conf.trigger; + if (trigger == STREAM_TRIGGER_AT_ONCE) { + strcpy(dst, "at once"); + } else if (trigger == STREAM_TRIGGER_WINDOW_CLOSE) { + strcpy(dst, "window close"); + } else if (trigger == STREAM_TRIGGER_MAX_DELAY) { + strcpy(dst, "max delay"); + } +} + +static void int64ToHexStr(int64_t id, char *pBuf, int32_t bufLen) { + memset(pBuf, 0, bufLen); + pBuf[2] = '0'; + pBuf[3] = 'x'; + + int32_t len = tintToHex(id, &pBuf[4]); + varDataSetLen(pBuf, len + 2); +} + +int32_t setStreamAttrInResBlock(SStreamObj *pStream, SSDataBlock *pBlock, int32_t numOfRows) { + int32_t code = 0; + int32_t cols = 0; + int32_t lino = 0; + + char streamName[TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE] = {0}; + STR_WITH_MAXSIZE_TO_VARSTR(streamName, mndGetDbStr(pStream->name), sizeof(streamName)); + SColumnInfoData *pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + code = colDataSetVal(pColInfo, numOfRows, (const char *)streamName, false); + TSDB_CHECK_CODE(code, lino, _end); + + // create time + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + code = colDataSetVal(pColInfo, numOfRows, (const char *)&pStream->createTime, false); + TSDB_CHECK_CODE(code, lino, _end); + + // stream id + char buf[128] = {0}; + int64ToHexStr(pStream->uid, buf, tListLen(buf)); + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + code = colDataSetVal(pColInfo, numOfRows, buf, false); + TSDB_CHECK_CODE(code, lino, _end); + + // related fill-history stream id + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + if (pStream->hTaskUid != 0) { + int64ToHexStr(pStream->hTaskUid, buf, tListLen(buf)); + code = colDataSetVal(pColInfo, numOfRows, buf, false); + } else { + code = colDataSetVal(pColInfo, numOfRows, buf, true); + } + TSDB_CHECK_CODE(code, lino, _end); + + // related fill-history stream id + char sql[TSDB_SHOW_SQL_LEN + VARSTR_HEADER_SIZE] = {0}; + STR_WITH_MAXSIZE_TO_VARSTR(sql, pStream->sql, sizeof(sql)); + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + code = colDataSetVal(pColInfo, numOfRows, (const char *)sql, false); + TSDB_CHECK_CODE(code, lino, _end); + + char status[20 + VARSTR_HEADER_SIZE] = {0}; + char status2[20] = {0}; + mndShowStreamStatus(status2, pStream); + STR_WITH_MAXSIZE_TO_VARSTR(status, status2, sizeof(status)); + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + code = colDataSetVal(pColInfo, numOfRows, (const char *)&status, false); + TSDB_CHECK_CODE(code, lino, _end); + + char sourceDB[TSDB_DB_NAME_LEN + VARSTR_HEADER_SIZE] = {0}; + STR_WITH_MAXSIZE_TO_VARSTR(sourceDB, mndGetDbStr(pStream->sourceDb), sizeof(sourceDB)); + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + code = colDataSetVal(pColInfo, numOfRows, (const char *)&sourceDB, false); + TSDB_CHECK_CODE(code, lino, _end); + + char targetDB[TSDB_DB_NAME_LEN + VARSTR_HEADER_SIZE] = {0}; + STR_WITH_MAXSIZE_TO_VARSTR(targetDB, mndGetDbStr(pStream->targetDb), sizeof(targetDB)); + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + code = colDataSetVal(pColInfo, numOfRows, (const char *)&targetDB, false); + TSDB_CHECK_CODE(code, lino, _end); + + if (pStream->targetSTbName[0] == 0) { + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + code = colDataSetVal(pColInfo, numOfRows, NULL, true); + } else { + char targetSTB[TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE] = {0}; + STR_WITH_MAXSIZE_TO_VARSTR(targetSTB, mndGetStbStr(pStream->targetSTbName), sizeof(targetSTB)); + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + code = colDataSetVal(pColInfo, numOfRows, (const char *)&targetSTB, false); + } + TSDB_CHECK_CODE(code, lino, _end); + + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + code = colDataSetVal(pColInfo, numOfRows, (const char *)&pStream->conf.watermark, false); + TSDB_CHECK_CODE(code, lino, _end); + + char trigger[20 + VARSTR_HEADER_SIZE] = {0}; + char trigger2[20] = {0}; + mndShowStreamTrigger(trigger2, pStream); + STR_WITH_MAXSIZE_TO_VARSTR(trigger, trigger2, sizeof(trigger)); + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + code = colDataSetVal(pColInfo, numOfRows, (const char *)&trigger, false); + TSDB_CHECK_CODE(code, lino, _end); + + // sink_quota + char sinkQuota[20 + VARSTR_HEADER_SIZE] = {0}; + sinkQuota[0] = '0'; + char dstStr[20] = {0}; + STR_TO_VARSTR(dstStr, sinkQuota) + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + code = colDataSetVal(pColInfo, numOfRows, (const char *)dstStr, false); + TSDB_CHECK_CODE(code, lino, _end); + + // checkpoint interval + char tmp[20 + VARSTR_HEADER_SIZE] = {0}; + sprintf(varDataVal(tmp), "%d sec", tsStreamCheckpointInterval); + varDataSetLen(tmp, strlen(varDataVal(tmp))); + + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + code = colDataSetVal(pColInfo, numOfRows, (const char *)tmp, false); + TSDB_CHECK_CODE(code, lino, _end); + + // checkpoint backup type + char backup[20 + VARSTR_HEADER_SIZE] = {0}; + STR_TO_VARSTR(backup, "none") + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + code = colDataSetVal(pColInfo, numOfRows, (const char *)backup, false); + TSDB_CHECK_CODE(code, lino, _end); + + // history scan idle + char scanHistoryIdle[20 + VARSTR_HEADER_SIZE] = {0}; + strcpy(scanHistoryIdle, "100a"); + + memset(dstStr, 0, tListLen(dstStr)); + STR_TO_VARSTR(dstStr, scanHistoryIdle) + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + code = colDataSetVal(pColInfo, numOfRows, (const char *)dstStr, false); + +_end: + if (code) { + mError("error happens when build stream attr result block, lino:%d, code:%s", lino, tstrerror(code)); + } + return code; +} + +int32_t setTaskAttrInResBlock(SStreamObj *pStream, SStreamTask *pTask, SSDataBlock *pBlock, int32_t numOfRows) { + SColumnInfoData *pColInfo = NULL; + int32_t cols = 0; + int32_t code = 0; + int32_t lino = 0; + + STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId}; + + STaskStatusEntry *pe = taosHashGet(execInfo.pTaskMap, &id, sizeof(id)); + if (pe == NULL) { + mError("task:0x%" PRIx64 " not exists in any vnodes, streamName:%s, streamId:0x%" PRIx64 " createTs:%" PRId64 + " no valid status/stage info", + id.taskId, pStream->name, pStream->uid, pStream->createTime); + return TSDB_CODE_STREAM_TASK_NOT_EXIST; + } + + // stream name + char streamName[TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE] = {0}; + STR_WITH_MAXSIZE_TO_VARSTR(streamName, mndGetDbStr(pStream->name), sizeof(streamName)); + + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + code = colDataSetVal(pColInfo, numOfRows, (const char *)streamName, false); + TSDB_CHECK_CODE(code, lino, _end); + + // task id + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + char idstr[128] = {0}; + int64ToHexStr(pTask->id.taskId, idstr, tListLen(idstr)); + code = colDataSetVal(pColInfo, numOfRows, idstr, false); + TSDB_CHECK_CODE(code, lino, _end); + + // node type + char nodeType[20 + VARSTR_HEADER_SIZE] = {0}; + varDataSetLen(nodeType, 5); + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + if (pTask->info.nodeId > 0) { + memcpy(varDataVal(nodeType), "vnode", 5); + } else { + memcpy(varDataVal(nodeType), "snode", 5); + } + code = colDataSetVal(pColInfo, numOfRows, nodeType, false); + TSDB_CHECK_CODE(code, lino, _end); + + // node id + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + int64_t nodeId = TMAX(pTask->info.nodeId, 0); + code = colDataSetVal(pColInfo, numOfRows, (const char *)&nodeId, false); + TSDB_CHECK_CODE(code, lino, _end); + + // level + char level[20 + VARSTR_HEADER_SIZE] = {0}; + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { + memcpy(varDataVal(level), "source", 6); + varDataSetLen(level, 6); + } else if (pTask->info.taskLevel == TASK_LEVEL__AGG) { + memcpy(varDataVal(level), "agg", 3); + varDataSetLen(level, 3); + } else if (pTask->info.taskLevel == TASK_LEVEL__SINK) { + memcpy(varDataVal(level), "sink", 4); + varDataSetLen(level, 4); + } + + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + code = colDataSetVal(pColInfo, numOfRows, (const char *)level, false); + TSDB_CHECK_CODE(code, lino, _end); + + // status + char status[20 + VARSTR_HEADER_SIZE] = {0}; + + const char *pStatus = streamTaskGetStatusStr(pe->status); + STR_TO_VARSTR(status, pStatus); + + // status + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + code = colDataSetVal(pColInfo, numOfRows, (const char *)status, false); + TSDB_CHECK_CODE(code, lino, _end); + + // stage + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + code = colDataSetVal(pColInfo, numOfRows, (const char *)&pe->stage, false); + TSDB_CHECK_CODE(code, lino, _end); + + // input queue + char vbuf[40] = {0}; + char buf[38] = {0}; + const char *queueInfoStr = "%4.2f MiB (%6.2f%)"; + snprintf(buf, tListLen(buf), queueInfoStr, pe->inputQUsed, pe->inputRate); + STR_TO_VARSTR(vbuf, buf); + + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + code = colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); + TSDB_CHECK_CODE(code, lino, _end); + + // input total + const char *formatTotalMb = "%7.2f MiB"; + const char *formatTotalGb = "%7.2f GiB"; + if (pe->procsTotal < 1024) { + snprintf(buf, tListLen(buf), formatTotalMb, pe->procsTotal); + } else { + snprintf(buf, tListLen(buf), formatTotalGb, pe->procsTotal / 1024); + } + + memset(vbuf, 0, tListLen(vbuf)); + STR_TO_VARSTR(vbuf, buf); + + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + code = colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); + TSDB_CHECK_CODE(code, lino, _end); + + // process throughput + const char *formatKb = "%7.2f KiB/s"; + const char *formatMb = "%7.2f MiB/s"; + if (pe->procsThroughput < 1024) { + snprintf(buf, tListLen(buf), formatKb, pe->procsThroughput); + } else { + snprintf(buf, tListLen(buf), formatMb, pe->procsThroughput / 1024); + } + + memset(vbuf, 0, tListLen(vbuf)); + STR_TO_VARSTR(vbuf, buf); + + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + code = colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); + TSDB_CHECK_CODE(code, lino, _end); + + // output total + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + if (pTask->info.taskLevel == TASK_LEVEL__SINK) { + colDataSetNULL(pColInfo, numOfRows); + } else { + sprintf(buf, formatTotalMb, pe->outputTotal); + memset(vbuf, 0, tListLen(vbuf)); + STR_TO_VARSTR(vbuf, buf); + + code = colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); + TSDB_CHECK_CODE(code, lino, _end); + } + + // output throughput + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + if (pTask->info.taskLevel == TASK_LEVEL__SINK) { + colDataSetNULL(pColInfo, numOfRows); + } else { + if (pe->outputThroughput < 1024) { + snprintf(buf, tListLen(buf), formatKb, pe->outputThroughput); + } else { + snprintf(buf, tListLen(buf), formatMb, pe->outputThroughput / 1024); + } + + memset(vbuf, 0, tListLen(vbuf)); + STR_TO_VARSTR(vbuf, buf); + + code = colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); + TSDB_CHECK_CODE(code, lino, _end); + } + + // output queue + // sprintf(buf, queueInfoStr, pe->outputQUsed, pe->outputRate); + // STR_TO_VARSTR(vbuf, buf); + + // pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + // colDataSetVal(pColInfo, numOfRows, (const char*)vbuf, false); + + // info + if (pTask->info.taskLevel == TASK_LEVEL__SINK) { + const char *sinkStr = "%.2f MiB"; + snprintf(buf, tListLen(buf), sinkStr, pe->sinkDataSize); + } else if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { + // offset info + const char *offsetStr = "%" PRId64 " [%" PRId64 ", %" PRId64 "]"; + snprintf(buf, tListLen(buf), offsetStr, pe->processedVer, pe->verRange.minVer, pe->verRange.maxVer); + } else { + memset(buf, 0, tListLen(buf)); + } + + STR_TO_VARSTR(vbuf, buf); + + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + code = colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); + TSDB_CHECK_CODE(code, lino, _end); + + // start_time + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + code = colDataSetVal(pColInfo, numOfRows, (const char *)&pe->startTime, false); + TSDB_CHECK_CODE(code, lino, _end); + + // start id + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + code = colDataSetVal(pColInfo, numOfRows, (const char *)&pe->startCheckpointId, false); + TSDB_CHECK_CODE(code, lino, _end); + + // start ver + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + code = colDataSetVal(pColInfo, numOfRows, (const char *)&pe->startCheckpointVer, false); + TSDB_CHECK_CODE(code, lino, _end); + + // checkpoint time + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + if (pe->checkpointInfo.latestTime != 0) { + code = colDataSetVal(pColInfo, numOfRows, (const char *)&pe->checkpointInfo.latestTime, false); + } else { + code = colDataSetVal(pColInfo, numOfRows, 0, true); + } + TSDB_CHECK_CODE(code, lino, _end); + + // checkpoint_id + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + code = colDataSetVal(pColInfo, numOfRows, (const char *)&pe->checkpointInfo.latestId, false); + TSDB_CHECK_CODE(code, lino, _end); + + // checkpoint version + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + code = colDataSetVal(pColInfo, numOfRows, (const char *)&pe->checkpointInfo.latestVer, false); + TSDB_CHECK_CODE(code, lino, _end); + + // checkpoint size + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + colDataSetNULL(pColInfo, numOfRows); + + // checkpoint backup status + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + code = colDataSetVal(pColInfo, numOfRows, 0, true); + TSDB_CHECK_CODE(code, lino, _end); + + // ds_err_info + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + code = colDataSetVal(pColInfo, numOfRows, 0, true); + TSDB_CHECK_CODE(code, lino, _end); + + // history_task_id + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + if (pe->hTaskId != 0) { + int64ToHexStr(pe->hTaskId, idstr, tListLen(idstr)); + code = colDataSetVal(pColInfo, numOfRows, idstr, false); + } else { + code = colDataSetVal(pColInfo, numOfRows, 0, true); + } + TSDB_CHECK_CODE(code, lino, _end); + + // history_task_status + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + TSDB_CHECK_NULL(pColInfo, code, lino, _end, terrno); + + code = colDataSetVal(pColInfo, numOfRows, 0, true); + TSDB_CHECK_CODE(code, lino, _end); + + _end: + if (code) { + mError("error happens during build task attr result blocks, lino:%d, code:%s", lino, tstrerror(code)); + } return code; } \ No newline at end of file diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 99528d01b0..86090fed43 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -872,6 +872,7 @@ void streamTaskStatusCopy(STaskStatusEntry* pDst, const STaskStatusEntry* pSrc) pDst->checkpointInfo = pSrc->checkpointInfo; pDst->startCheckpointId = pSrc->startCheckpointId; pDst->startCheckpointVer = pSrc->startCheckpointVer; + pDst->status = pSrc->status; pDst->startTime = pSrc->startTime; pDst->hTaskId = pSrc->hTaskId; From 6c8303297c8d8a20d7e5756eb2856cf10baeb78f Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Wed, 31 Jul 2024 14:36:05 +0800 Subject: [PATCH 09/17] fix:[TD-31015]monitor close first before slow log thread exit --- source/client/src/clientMonitor.c | 61 ++++++++++--------------------- 1 file changed, 19 insertions(+), 42 deletions(-) diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index 92a8fc3b29..4bb29f8d97 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -12,13 +12,13 @@ SRWLatch monitorLock; void* monitorTimer; SHashObj* monitorCounterHash; -int32_t slowLogFlag = -1; -int32_t monitorFlag = -1; +int32_t monitorFlag = 0; int32_t quitCnt = 0; tsem2_t monitorSem; STaosQueue* monitorQueue; SHashObj* monitorSlowLogHash; char tmpSlowLogPath[PATH_MAX] = {0}; +TdThread monitorThread; static int32_t getSlowLogTmpDir(char* tmpPath, int32_t size) { int ret = snprintf(tmpPath, size, "%s/tdengine_slow_log/", tsTempDir); @@ -113,11 +113,11 @@ static int32_t monitorReportAsyncCB(void* param, SDataBuf* pMsg, int32_t code) { tscError("failed to send slow log:%s, clusterId:%" PRIx64, p->data, p->clusterId); } MonitorSlowLogData tmp = {.clusterId = p->clusterId, - .type = p->type, - .fileName = p->fileName, - .pFile = p->pFile, - .offset = p->offset, - .data = NULL}; + .type = p->type, + .fileName = p->fileName, + .pFile = p->pFile, + .offset = p->offset, + .data = NULL}; if (monitorPutData2MonitorQueue(tmp) == 0) { p->fileName = NULL; } @@ -164,7 +164,7 @@ static int32_t sendReport(void* pTransporter, SEpSet* epSet, char* pCont, MONITO int64_t transporterId = 0; return asyncSendMsgToServer(pTransporter, epSet, &transporterId, pInfo); -FAILED: + FAILED: monitorFreeSlowLogDataEx(param); return TAOS_GET_TERRNO(TSDB_CODE_TSC_INTERNAL_ERROR); } @@ -276,12 +276,10 @@ void monitorCreateClient(int64_t clusterId) { tscInfo("[monitor] monitorCreateClient for %" PRIx64 "finished %p.", clusterId, pMonitor); } taosWUnLockLatch(&monitorLock); - if (-1 != atomic_val_compare_exchange_32(&monitorFlag, -1, 0)) { - tscDebug("[monitor] monitorFlag already is 0"); - } + return; -fail: + fail: destroyMonitorClient(&pMonitor); taosWUnLockLatch(&monitorLock); } @@ -301,7 +299,7 @@ void monitorCreateClientCounter(int64_t clusterId, const char* name, const char* tscError("failed to add metric to collector"); (void)taos_counter_destroy(newCounter); goto end; -} + } if (taosHashPut(pMonitor->counters, name, strlen(name), &newCounter, POINTER_BYTES) != 0) { tscError("failed to put counter to monitor"); (void)taos_counter_destroy(newCounter); @@ -310,7 +308,7 @@ void monitorCreateClientCounter(int64_t clusterId, const char* name, const char* tscInfo("[monitor] monitorCreateClientCounter %" PRIx64 "(%p):%s : %p.", pMonitor->clusterId, pMonitor, name, newCounter); -end: + end: taosWUnLockLatch(&monitorLock); } @@ -339,7 +337,7 @@ void monitorCounterInc(int64_t clusterId, const char* counterName, const char** } tscDebug("[monitor] monitorCounterInc %" PRIx64 "(%p):%s", pMonitor->clusterId, pMonitor, counterName); -end: + end: taosWUnLockLatch(&monitorLock); } @@ -348,8 +346,6 @@ const char* monitorResultStr(SQL_RESULT_CODE code) { return result_state[code]; } -static void monitorThreadFuncUnexpectedStopped(void) { atomic_store_32(&slowLogFlag, -1); } - static void monitorWriteSlowLog2File(MonitorSlowLogData* slowLogData, char* tmpPath) { TdFilePtr pFile = NULL; void* tmp = taosHashGet(monitorSlowLogHash, &slowLogData->clusterId, LONG_BYTES); @@ -693,20 +689,10 @@ static void monitorSendAllSlowLogFromTempDir(int64_t clusterId) { static void* monitorThreadFunc(void* param) { setThreadName("client-monitor-slowlog"); - -#ifdef WINDOWS - if (taosCheckCurrentInDll()) { - atexit(monitorThreadFuncUnexpectedStopped); - } -#endif - - if (-1 != atomic_val_compare_exchange_32(&slowLogFlag, -1, 0)) { - return NULL; - } tscDebug("monitorThreadFunc start"); int64_t quitTime = 0; while (1) { - if (atomic_load_32(&slowLogFlag) > 0) { + if (atomic_load_32(&monitorFlag) == 1) { if (quitCnt == 0) { monitorSendAllSlowLogAtQuit(); if (quitCnt == 0) { @@ -752,7 +738,6 @@ static void* monitorThreadFunc(void* param) { } (void)tsem2_timewait(&monitorSem, 100); } - atomic_store_32(&slowLogFlag, -2); return NULL; } @@ -767,7 +752,6 @@ static int32_t tscMonitortInit() { return TSDB_CODE_TSC_INTERNAL_ERROR; } - TdThread monitorThread; if (taosThreadCreate(&monitorThread, &thAttr, monitorThreadFunc, NULL) != 0) { tscError("failed to create monitor thread since %s", strerror(errno)); return TSDB_CODE_TSC_INTERNAL_ERROR; @@ -778,13 +762,9 @@ static int32_t tscMonitortInit() { } static void tscMonitorStop() { - if (atomic_val_compare_exchange_32(&slowLogFlag, 0, 1)) { - tscDebug("monitor thread already stopped"); - return; - } - - while (atomic_load_32(&slowLogFlag) > 0) { - taosMsleep(100); + if (taosCheckPthreadValid(monitorThread)) { + (void)taosThreadJoin(monitorThread, NULL); + (void)taosThreadClear(&monitorThread); } } @@ -842,10 +822,7 @@ int32_t monitorInit() { void monitorClose() { tscInfo("[monitor] tscMonitor close"); taosWLockLatch(&monitorLock); - - if (atomic_val_compare_exchange_32(&monitorFlag, 0, 1)) { - tscDebug("[monitor] monitorFlag is not 0"); - } + atomic_store_32(&monitorFlag, 1); tscMonitorStop(); sendAllCounter(); taosHashCleanup(monitorCounterHash); @@ -860,7 +837,7 @@ int32_t monitorPutData2MonitorQueue(MonitorSlowLogData data) { int32_t code = 0; MonitorSlowLogData* slowLogData = NULL; - if (atomic_load_32(&slowLogFlag) == -2) { + if (atomic_load_32(&monitorFlag) == 1) { tscError("[monitor] slow log thread is exiting"); return -1; } From 04d525d42966baf25e2426c52de5f67ef868d138 Mon Sep 17 00:00:00 2001 From: sima Date: Fri, 5 Jul 2024 17:11:55 +0800 Subject: [PATCH 10/17] feat:[TS-5137] Support group/partition by position and alias. --- include/libs/nodes/querynodes.h | 1 + source/libs/nodes/src/nodesTraverseFuncs.c | 3 + source/libs/parser/src/parTranslater.c | 174 +++++++++++++++++-- source/libs/parser/src/parUtil.c | 2 +- source/util/src/terror.c | 2 +- tests/system-test/2-query/distinct.py | 6 +- tests/system-test/2-query/explain.py | 2 +- tests/system-test/2-query/group_partition.py | 159 +++++++++++++++++ tests/system-test/2-query/leastsquares.py | 2 +- tests/system-test/2-query/spread.py | 2 +- tests/system-test/2-query/sum.py | 3 +- tests/system-test/2-query/td-28068.py | 18 +- 12 files changed, 336 insertions(+), 38 deletions(-) mode change 100644 => 100755 source/libs/parser/src/parTranslater.c diff --git a/include/libs/nodes/querynodes.h b/include/libs/nodes/querynodes.h index 54c8686161..bb06b65898 100644 --- a/include/libs/nodes/querynodes.h +++ b/include/libs/nodes/querynodes.h @@ -60,6 +60,7 @@ typedef struct SExprNode { bool orderAlias; bool asAlias; bool asParam; + bool asPosition; } SExprNode; typedef enum EColumnType { diff --git a/source/libs/nodes/src/nodesTraverseFuncs.c b/source/libs/nodes/src/nodesTraverseFuncs.c index 1edfa840e2..927bc6b661 100644 --- a/source/libs/nodes/src/nodesTraverseFuncs.c +++ b/source/libs/nodes/src/nodesTraverseFuncs.c @@ -229,6 +229,9 @@ static void checkParamIsFunc(SFunctionNode* pFunc) { if (nodeType(pPara) == QUERY_NODE_COLUMN) { ((SColumnNode*)pPara)->node.asParam = true; } + if (nodeType(pPara) == QUERY_NODE_VALUE) { + ((SValueNode*)pPara)->node.asParam = true; + } } } diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c old mode 100644 new mode 100755 index cfe1fa4eb3..879f527a85 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -1450,6 +1450,8 @@ static EDealRes translateColumnWithoutPrefix(STranslateContext* pCxt, SColumnNod return DEAL_RES_CONTINUE; } +static int32_t getFuncInfo(STranslateContext* pCxt, SFunctionNode* pFunc); + static EDealRes translateColumnUseAlias(STranslateContext* pCxt, SColumnNode** pCol, bool* pFound) { SNodeList* pProjectionList = getProjectListFromCurrStmt(pCxt->pCurrStmt); SNode* pNode; @@ -1470,6 +1472,25 @@ static EDealRes translateColumnUseAlias(STranslateContext* pCxt, SColumnNode** p } } if (*pFound) { + if (QUERY_NODE_FUNCTION == nodeType(pFoundNode) && (SQL_CLAUSE_GROUP_BY == pCxt->currClause || SQL_CLAUSE_PARTITION_BY == pCxt->currClause)) { + pCxt->errCode = getFuncInfo(pCxt, (SFunctionNode*)pFoundNode); + if (TSDB_CODE_SUCCESS == pCxt->errCode) { + if (fmIsVectorFunc(((SFunctionNode*)pFoundNode)->funcId)) { + pCxt->errCode = TSDB_CODE_PAR_ILLEGAL_USE_AGG_FUNCTION; + return DEAL_RES_ERROR; + } else if (fmIsPseudoColumnFunc(((SFunctionNode*)pFoundNode)->funcId)) { + if ('\0' != (*pCol)->tableAlias[0]) { + return translateColumnWithPrefix(pCxt, pCol); + } else { + return translateColumnWithoutPrefix(pCxt, pCol); + } + } else { + /* Do nothing and replace old node with found node. */ + } + } else { + return DEAL_RES_ERROR; + } + } SNode* pNew = NULL; int32_t code = nodesCloneNode(pFoundNode, &pNew); if (NULL == pNew) { @@ -1478,6 +1499,13 @@ static EDealRes translateColumnUseAlias(STranslateContext* pCxt, SColumnNode** p } nodesDestroyNode(*(SNode**)pCol); *(SNode**)pCol = (SNode*)pNew; + if (QUERY_NODE_COLUMN == nodeType(pFoundNode)) { + if ('\0' != (*pCol)->tableAlias[0]) { + return translateColumnWithPrefix(pCxt, pCol); + } else { + return translateColumnWithoutPrefix(pCxt, pCol); + } + } } return DEAL_RES_CONTINUE; } @@ -1716,6 +1744,12 @@ int32_t biCheckCreateTableTbnameCol(STranslateContext* pCxt, SCreateTableStmt* p return TSDB_CODE_SUCCESS; } +static bool clauseSupportAlias(ESqlClause clause) { + return SQL_CLAUSE_GROUP_BY == clause || + SQL_CLAUSE_PARTITION_BY == clause || + SQL_CLAUSE_ORDER_BY == clause; +} + static EDealRes translateColumn(STranslateContext* pCxt, SColumnNode** pCol) { if (NULL == pCxt->pCurrStmt || (isSelectStmt(pCxt->pCurrStmt) && NULL == ((SSelectStmt*)pCxt->pCurrStmt)->pFromTable)) { @@ -1742,7 +1776,8 @@ static EDealRes translateColumn(STranslateContext* pCxt, SColumnNode** pCol) { res = translateColumnWithPrefix(pCxt, pCol); } else { bool found = false; - if (SQL_CLAUSE_ORDER_BY == pCxt->currClause && !(*pCol)->node.asParam) { + if ((clauseSupportAlias(pCxt->currClause)) && + !(*pCol)->node.asParam) { res = translateColumnUseAlias(pCxt, pCol, &found); } if (DEAL_RES_ERROR != res && !found) { @@ -1752,7 +1787,9 @@ static EDealRes translateColumn(STranslateContext* pCxt, SColumnNode** pCol) { res = translateColumnWithoutPrefix(pCxt, pCol); } } - if (SQL_CLAUSE_ORDER_BY == pCxt->currClause && !(*pCol)->node.asParam && res != DEAL_RES_CONTINUE && + if (clauseSupportAlias(pCxt->currClause) && + !(*pCol)->node.asParam && + res != DEAL_RES_CONTINUE && res != DEAL_RES_END) { res = translateColumnUseAlias(pCxt, pCol, &found); } @@ -2954,6 +2991,13 @@ static EDealRes translateFunction(STranslateContext* pCxt, SFunctionNode** pFunc } pCxt->errCode = getFuncInfo(pCxt, *pFunc); + if (TSDB_CODE_SUCCESS == pCxt->errCode) { + if ((SQL_CLAUSE_GROUP_BY == pCxt->currClause || + SQL_CLAUSE_PARTITION_BY == pCxt->currClause) && + fmIsVectorFunc((*pFunc)->funcId)) { + pCxt->errCode = TSDB_CODE_PAR_ILLEGAL_USE_AGG_FUNCTION; + } + } if (TSDB_CODE_SUCCESS == pCxt->errCode) { pCxt->errCode = translateFunctionImpl(pCxt, pFunc); } @@ -4940,7 +4984,7 @@ static int32_t getPositionValue(const SValueNode* pVal) { case TSDB_DATA_TYPE_GEOMETRY: return -1; case TSDB_DATA_TYPE_BOOL: - return (pVal->datum.b ? 1 : 0); + return -1; case TSDB_DATA_TYPE_TINYINT: case TSDB_DATA_TYPE_SMALLINT: case TSDB_DATA_TYPE_INT: @@ -4948,7 +4992,7 @@ static int32_t getPositionValue(const SValueNode* pVal) { return pVal->datum.i; case TSDB_DATA_TYPE_FLOAT: case TSDB_DATA_TYPE_DOUBLE: - return pVal->datum.d; + return -1; case TSDB_DATA_TYPE_UTINYINT: case TSDB_DATA_TYPE_USMALLINT: case TSDB_DATA_TYPE_UINT: @@ -4960,25 +5004,36 @@ static int32_t getPositionValue(const SValueNode* pVal) { return -1; } -static int32_t translateOrderByPosition(STranslateContext* pCxt, SNodeList* pProjectionList, SNodeList* pOrderByList, +static int32_t translateClausePosition(STranslateContext* pCxt, SNodeList* pProjectionList, SNodeList* pClauseList, bool* pOther) { *pOther = false; SNode* pNode = NULL; - WHERE_EACH(pNode, pOrderByList) { - SNode* pExpr = ((SOrderByExprNode*)pNode)->pExpr; + WHERE_EACH(pNode, pClauseList) { + SNode* pExpr = NULL; + switch (pNode->type) { + case QUERY_NODE_GROUPING_SET: + pExpr = getGroupByNode(pNode); + break; + case QUERY_NODE_ORDER_BY_EXPR: + pExpr = ((SOrderByExprNode*)pNode)->pExpr; + break; + default: + pExpr = pNode; + break; + } if (QUERY_NODE_VALUE == nodeType(pExpr)) { SValueNode* pVal = (SValueNode*)pExpr; + pVal->node.asPosition = false; if (DEAL_RES_ERROR == translateValue(pCxt, pVal)) { return pCxt->errCode; } int32_t pos = getPositionValue(pVal); if (pos < 0) { - ERASE_NODE(pOrderByList); - continue; + pVal->node.asPosition = false; } else if (0 == pos || pos > LIST_LENGTH(pProjectionList)) { return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_WRONG_NUMBER_OF_SELECT); } else { - // No longer using SColumnRefNode, processing in replaceOrderByAliasImpl function + pVal->node.asPosition = true; } } else { *pOther = true; @@ -4990,7 +5045,7 @@ static int32_t translateOrderByPosition(STranslateContext* pCxt, SNodeList* pPro static int32_t translateOrderBy(STranslateContext* pCxt, SSelectStmt* pSelect) { bool other; - int32_t code = translateOrderByPosition(pCxt, pSelect->pProjectionList, pSelect->pOrderByList, &other); + int32_t code = translateClausePosition(pCxt, pSelect->pProjectionList, pSelect->pOrderByList, &other); if (TSDB_CODE_SUCCESS == code) { if (0 == LIST_LENGTH(pSelect->pOrderByList)) { NODES_DESTORY_LIST(pSelect->pOrderByList); @@ -5121,6 +5176,68 @@ static int32_t translateProjectionList(STranslateContext* pCxt, SSelectStmt* pSe } } +typedef struct SReplaceGroupByAliasCxt { + STranslateContext* pTranslateCxt; + SNodeList* pProjectionList; +} SReplaceGroupByAliasCxt; + +static EDealRes replaceGroupByAliasImpl(SNode** pNode, void* pContext) { + SReplaceGroupByAliasCxt* pCxt = pContext; + SNodeList* pProjectionList = pCxt->pProjectionList; + SNode* pProject = NULL; + if (QUERY_NODE_VALUE == nodeType(*pNode)) { + STranslateContext* pTransCxt = pCxt->pTranslateCxt; + SValueNode* pVal = (SValueNode*) *pNode; + if (DEAL_RES_ERROR == translateValue(pTransCxt, pVal)) { + return DEAL_RES_CONTINUE; + } + if (!pVal->node.asPosition) { + return DEAL_RES_CONTINUE; + } + int32_t pos = getPositionValue(pVal); + if (0 < pos && pos <= LIST_LENGTH(pProjectionList)) { + SNode* pNew = NULL; + int32_t code = nodesCloneNode(nodesListGetNode(pProjectionList, pos - 1), (SNode**)&pNew); + if (TSDB_CODE_SUCCESS != code) { + pCxt->pTranslateCxt->errCode = code; + return DEAL_RES_ERROR; + } + nodesDestroyNode(*pNode); + *pNode = pNew; + return DEAL_RES_CONTINUE; + } else { + return DEAL_RES_CONTINUE; + } + } else if (QUERY_NODE_COLUMN == nodeType(*pNode)) { + STranslateContext* pTransCxt = pCxt->pTranslateCxt; + return translateColumn(pTransCxt, (SColumnNode**)pNode); + } + + return DEAL_RES_CONTINUE; +} + +static int32_t replaceGroupByAlias(STranslateContext* pCxt, SSelectStmt* pSelect) { + if (NULL == pSelect->pGroupByList) { + return TSDB_CODE_SUCCESS; + } + SReplaceGroupByAliasCxt cxt = { + .pTranslateCxt = pCxt, .pProjectionList = pSelect->pProjectionList}; + nodesRewriteExprsPostOrder(pSelect->pGroupByList, replaceGroupByAliasImpl, &cxt); + + return pCxt->errCode; +} + +static int32_t replacePartitionByAlias(STranslateContext* pCxt, SSelectStmt* pSelect) { + if (NULL == pSelect->pPartitionByList) { + return TSDB_CODE_SUCCESS; + } + SReplaceGroupByAliasCxt cxt = { + .pTranslateCxt = pCxt, .pProjectionList = pSelect->pProjectionList}; + nodesRewriteExprsPostOrder(pSelect->pPartitionByList, replaceGroupByAliasImpl, &cxt); + + return pCxt->errCode; +} + static int32_t translateSelectList(STranslateContext* pCxt, SSelectStmt* pSelect) { pCxt->currClause = SQL_CLAUSE_SELECT; int32_t code = translateExprList(pCxt, pSelect->pProjectionList); @@ -5172,9 +5289,21 @@ static int32_t translateGroupBy(STranslateContext* pCxt, SSelectStmt* pSelect) { if (NULL != pSelect->pWindow) { return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_GROUPBY_WINDOW_COEXIST); } - pCxt->currClause = SQL_CLAUSE_GROUP_BY; - pSelect->timeLineResMode = TIME_LINE_NONE; - return translateExprList(pCxt, pSelect->pGroupByList); + bool other; + int32_t code = translateClausePosition(pCxt, pSelect->pProjectionList, pSelect->pGroupByList, &other); + if (TSDB_CODE_SUCCESS == code) { + if (0 == LIST_LENGTH(pSelect->pGroupByList)) { + NODES_DESTORY_LIST(pSelect->pGroupByList); + return TSDB_CODE_SUCCESS; + } + code = replaceGroupByAlias(pCxt, pSelect); + } + if (TSDB_CODE_SUCCESS == code) { + pCxt->currClause = SQL_CLAUSE_GROUP_BY; + pSelect->timeLineResMode = TIME_LINE_NONE; + code = translateExprList(pCxt, pSelect->pGroupByList); + } + return code; } static int32_t getTimeRange(SNode** pPrimaryKeyCond, STimeWindow* pTimeRange, bool* pIsStrict) { @@ -5781,7 +5910,8 @@ static int32_t translatePartitionBy(STranslateContext* pCxt, SSelectStmt* pSelec int32_t code = TSDB_CODE_SUCCESS; if (pSelect->pPartitionByList) { - code = removeConstantValueFromList(&pSelect->pPartitionByList); + bool other; + code = translateClausePosition(pCxt, pSelect->pProjectionList, pSelect->pPartitionByList, &other); } if (TSDB_CODE_SUCCESS == code && pSelect->pPartitionByList) { @@ -5791,8 +5921,10 @@ static int32_t translatePartitionBy(STranslateContext* pCxt, SSelectStmt* pSelec (QUERY_NODE_FUNCTION == nodeType(pPar) && FUNCTION_TYPE_TBNAME == ((SFunctionNode*)pPar)->funcType))) { pSelect->timeLineResMode = TIME_LINE_MULTI; } - - code = translateExprList(pCxt, pSelect->pPartitionByList); + code = replacePartitionByAlias(pCxt, pSelect); + if (TSDB_CODE_SUCCESS == code) { + code = translateExprList(pCxt, pSelect->pPartitionByList); + } } if (TSDB_CODE_SUCCESS == code) { code = translateExprList(pCxt, pSelect->pTags); @@ -6519,7 +6651,11 @@ static int32_t translateSelectFrom(STranslateContext* pCxt, SSelectStmt* pSelect code = removeConstantValueFromList(&pSelect->pPartitionByList); } } - + if (TSDB_CODE_SUCCESS == code) { + if (pSelect->pGroupByList) { + code = removeConstantValueFromList(&pSelect->pGroupByList); + } + } return code; } @@ -6606,7 +6742,7 @@ static int32_t translateSetOperOrderBy(STranslateContext* pCxt, SSetOperator* pS } bool other; - int32_t code = translateOrderByPosition(pCxt, pSetOperator->pProjectionList, pSetOperator->pOrderByList, &other); + int32_t code = translateClausePosition(pCxt, pSetOperator->pProjectionList, pSetOperator->pOrderByList, &other); /* if (TSDB_CODE_SUCCESS == code) { if (other) { diff --git a/source/libs/parser/src/parUtil.c b/source/libs/parser/src/parUtil.c index 35d54ad43c..6a36ea7b85 100644 --- a/source/libs/parser/src/parUtil.c +++ b/source/libs/parser/src/parUtil.c @@ -44,7 +44,7 @@ static char* getSyntaxErrFormat(int32_t errCode) { case TSDB_CODE_PAR_ILLEGAL_USE_AGG_FUNCTION: return "There mustn't be aggregation"; case TSDB_CODE_PAR_WRONG_NUMBER_OF_SELECT: - return "ORDER BY item must be the number of a SELECT-list expression"; + return "ORDER BY / GROUP BY item must be the number of a SELECT-list expression"; case TSDB_CODE_PAR_GROUPBY_LACK_EXPRESSION: return "Not a GROUP BY expression"; case TSDB_CODE_PAR_NOT_SELECTED_EXPRESSION: diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 8400675ec6..a0c3b3f766 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -595,7 +595,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_PAR_TABLE_NOT_EXIST, "Table does not exist TAOS_DEFINE_ERROR(TSDB_CODE_PAR_AMBIGUOUS_COLUMN, "Column ambiguously defined") TAOS_DEFINE_ERROR(TSDB_CODE_PAR_WRONG_VALUE_TYPE, "Invalid value type") TAOS_DEFINE_ERROR(TSDB_CODE_PAR_ILLEGAL_USE_AGG_FUNCTION, "There mustn't be aggregation") -TAOS_DEFINE_ERROR(TSDB_CODE_PAR_WRONG_NUMBER_OF_SELECT, "ORDER BY item must be the number of a SELECT-list expression") +TAOS_DEFINE_ERROR(TSDB_CODE_PAR_WRONG_NUMBER_OF_SELECT, "ORDER BY / GROUP BY item must be the number of a SELECT-list expression") TAOS_DEFINE_ERROR(TSDB_CODE_PAR_GROUPBY_LACK_EXPRESSION, "Not a GROUP BY expression") TAOS_DEFINE_ERROR(TSDB_CODE_PAR_NOT_SELECTED_EXPRESSION, "Not SELECTed expression") TAOS_DEFINE_ERROR(TSDB_CODE_PAR_NOT_SINGLE_GROUP, "Not a single-group group function") diff --git a/tests/system-test/2-query/distinct.py b/tests/system-test/2-query/distinct.py index 5c07544d5d..5025b39753 100644 --- a/tests/system-test/2-query/distinct.py +++ b/tests/system-test/2-query/distinct.py @@ -144,8 +144,8 @@ class TDTestCase: tdSql.query(f"select distinct c1, c2 from (select c2, c1 from {dbname}.stb1 where c1 > 2 order by ts)") tdSql.query(f"select distinct c1, c2 from (select c2, c1 from {dbname}.t1 where c1 > 2 order by ts)") tdSql.error(f"select distinct c1, c2 from (select c2, c1 from {dbname}.stb1 where c1 > 2 group by c1)") - tdSql.query(f"select distinct c1, c2 from (select max(c1) c1, max(c2) c2 from {dbname}.stb1 group by c1)") - tdSql.query(f"select distinct c1, c2 from (select max(c1) c1, max(c2) c2 from {dbname}.t1 group by c1)") + tdSql.query(f"select distinct c1, c2 from (select max(c1) c1, max(c2) c2 from {dbname}.stb1 group by stb1.c1)") + tdSql.query(f"select distinct c1, c2 from (select max(c1) c1, max(c2) c2 from {dbname}.t1 group by t1.c1)") tdSql.query(f"select distinct c1, c2 from (select max(c1) c1, max(c2) c2 from {dbname}.stb1 )") tdSql.checkRows(1) tdSql.query(f"select distinct c1, c2 from (select max(c1) c1, max(c2) c2 from {dbname}.t1 )") @@ -245,7 +245,7 @@ class TDTestCase: tdSql.query(f"select distinct t1 from (select t0, t1 from {dbname}.stb1 where t0 > 2 ) where t1 < 3") tdSql.checkRows(1) tdSql.error(f"select distinct t1, t0 from (select t1 from {dbname}.stb1 where t0 > 2 ) where t1 < 3") - tdSql.query(f"select distinct t1, t0 from (select max(t1) t1, max(t0) t0 from {dbname}.stb1 group by t1)") + tdSql.query(f"select distinct t1, t0 from (select max(t1) t1, max(t0) t0 from {dbname}.stb1 group by stb1.t1)") tdSql.query(f"select distinct t1, t0 from (select max(t1) t1, max(t0) t0 from {dbname}.stb1)") tdSql.query(f"select distinct t1, t0 from (select t1,t0 from {dbname}.stb1 where t0 > 2 ) where t1 < 3") tdSql.checkRows(1) diff --git a/tests/system-test/2-query/explain.py b/tests/system-test/2-query/explain.py index 92cd28a929..55f484884c 100644 --- a/tests/system-test/2-query/explain.py +++ b/tests/system-test/2-query/explain.py @@ -77,7 +77,7 @@ class TDTestCase: ) query_condition.extend( ( - 1010, + 1010.1, ''' "test1234!@#$%^&*():'>=0") + tdSql.checkRows(check_num) + + # having filter out empty + tdSql.query(f"select tbname, count(*) from {self.dbname}.{self.stable} {keyword} by 1 having count(*) <= 0") + tdSql.checkRows(check_num - nonempty_tb_num) + + ####### by tag + tdSql.query(f"select t2, count(*), count(1), count(c1) from {self.dbname}.{self.stable} {keyword} by 1 ") + tdSql.checkRows(check_num) + + tdSql.query(f"select t2, count(*) from {self.dbname}.{self.stable} {keyword} by 1 having count(*) <= 0") + tdSql.checkRows(check_num - nonempty_tb_num) + + # where + tdSql.query(f"select t2, count(*) from {self.dbname}.{self.stable} where ts < now {keyword} by 1 ") + tdSql.checkRows(check_num) + + tdSql.query(f"select t2, count(*) from {self.dbname}.{self.stable} where ts > 1737146000000 {keyword} by 1 ") + tdSql.checkRows(check_num) + + tdSql.query(f"select t2, count(*) from {self.dbname}.{self.stable} where c1 = 1 {keyword} by 1 ") + tdSql.checkRows(check_num) + + ####### by col + tdSql.query(f"select c1, count(*), count(1), count(c1) from {self.dbname}.{self.stable} {keyword} by 1 ") + num = 0 + if nonempty_tb_num > 0: + num = self.row_nums + tdSql.checkRows(num) + + tdSql.query(f"select ts, count(*) from {self.dbname}.{self.stable} {keyword} by 1 ") + tdSql.checkRows(nonempty_tb_num * self.row_nums) + + # col + tag + tdSql.query(f"select t2, c1, count(*) from {self.dbname}.{self.stable} {keyword} by 1, 2 ") + tdSql.checkRows(nonempty_tb_num * self.row_nums) + tdSql.query(f"select t2, c1, count(*) from {self.dbname}.{self.stable} {keyword} by 1, c1 ") + tdSql.checkRows(nonempty_tb_num * self.row_nums) + tdSql.query(f"select t2, c1, count(*) from {self.dbname}.{self.stable} {keyword} by t2, 2 ") + tdSql.checkRows(nonempty_tb_num * self.row_nums) + + tdSql.query(f"select t2, t3, c1, count(*) from {self.dbname}.{self.stable} {keyword} by 1, 2, 3 ") + tdSql.checkRows(nonempty_tb_num * self.row_nums) + tdSql.query(f"select t2, t3, c1, count(*) from {self.dbname}.{self.stable} {keyword} by t2, 2, 3 ") + tdSql.checkRows(nonempty_tb_num * self.row_nums) + tdSql.query(f"select t2, t3, c1, count(*) from {self.dbname}.{self.stable} {keyword} by 1, t3, 3 ") + tdSql.checkRows(nonempty_tb_num * self.row_nums) + + tdSql.query(f"select sum(t0.sumc2) from (select c1, sum(c2) as sumc2 from {self.dbname}.{self.stable} {keyword} by 1) t0") + num = 0 + if nonempty_tb_num > 0: + num = 1 + tdSql.checkRows(num) + + tdSql.query(f"select abs(c1), count(*) from {self.dbname}.{self.stable} {keyword} by 1") + num = 0 + if nonempty_tb_num > 0: + num = self.row_nums + tdSql.checkRows(num) + + ####### error case + tdSql.error(f"select c1, count(*) from {self.dbname}.{self.stable} {keyword} by 10") + tdSql.error(f"select c1, count(*) from {self.dbname}.{self.stable} {keyword} by 0") + tdSql.error(f"select c1, c2, count(*) from {self.dbname}.{self.stable} {keyword} by 0, 1") + tdSql.error(f"select c1, count(*) from {self.dbname}.{self.stable} {keyword} by 1.2") + tdSql.error(f"select c1, c2, c3, count(*) from {self.dbname}.{self.stable} {keyword} by 1, 2.2, 3") + tdSql.error(f"select c1, c2, count(*) from {self.dbname}.{self.stable} {keyword} by 1") + tdSql.error(f"select c1, avg(c2), count(*) from {self.dbname}.{self.stable} {keyword} by 1, 2") + + def test_groupby_alias(self, keyword, check_num, nonempty_tb_num): + tdSql.query(f"select t1 as t1_alias, count(*) from {self.dbname}.{self.stable} {keyword} by t1_alias ") + tdSql.checkRows(check_num) + + tdSql.query(f"select t1 as t1_alias from {self.dbname}.{self.stable} {keyword} by t1_alias order by count(*)") + tdSql.checkRows(check_num) + + # last + tdSql.query(f"select t1 as t1_alias from {self.dbname}.{self.stable} {keyword} by t1_alias having count(*)>=0") + tdSql.checkRows(check_num) + + # having filter out empty + tdSql.query(f"select t1 as t1_alias, count(*) from {self.dbname}.{self.stable} {keyword} by t1_alias having count(*) <= 0") + tdSql.checkRows(check_num - nonempty_tb_num) + + ####### by tag + tdSql.query(f"select t2 as t2_alias, count(*), count(1), count(c1) from {self.dbname}.{self.stable} {keyword} by t2_alias ") + tdSql.checkRows(check_num) + + tdSql.query(f"select t2 as t2_alias, count(*) from {self.dbname}.{self.stable} {keyword} by t2_alias having count(*) <= 0") + tdSql.checkRows(check_num - nonempty_tb_num) + + # where + tdSql.query(f"select t2 as t2_alias, count(*) from {self.dbname}.{self.stable} where ts < now {keyword} by t2_alias ") + tdSql.checkRows(check_num) + + tdSql.query(f"select t2 as t2_alias, count(*) from {self.dbname}.{self.stable} where ts > 1737146000000 {keyword} by t2_alias ") + tdSql.checkRows(check_num) + + tdSql.query(f"select t2 as t2_alias, count(*) from {self.dbname}.{self.stable} where c1 = 1 {keyword} by t2_alias ") + tdSql.checkRows(check_num) + + ####### by col + tdSql.query(f"select c1 as c1_alias, count(*), count(1), count(c1) from {self.dbname}.{self.stable} {keyword} by c1_alias ") + num = 0 + if nonempty_tb_num > 0: + num = self.row_nums + tdSql.checkRows(num) + + tdSql.query(f"select ts as ts_alias, count(*) from {self.dbname}.{self.stable} {keyword} by ts_alias ") + tdSql.checkRows(nonempty_tb_num * self.row_nums) + + # col + tag + tdSql.query(f"select t2 as t2_alias, c1 as c1_alias, count(*) from {self.dbname}.{self.stable} {keyword} by 1, 2 ") + tdSql.checkRows(nonempty_tb_num * self.row_nums) + tdSql.query(f"select t2 as t2_alias, c1 as c1_alias, count(*) from {self.dbname}.{self.stable} {keyword} by 1, c1 ") + tdSql.checkRows(nonempty_tb_num * self.row_nums) + tdSql.query(f"select t2 as t2_alias, c1 as c1_alias, count(*) from {self.dbname}.{self.stable} {keyword} by t2, 2 ") + tdSql.checkRows(nonempty_tb_num * self.row_nums) + + tdSql.query(f"select t2 as t2_alias, t3 as t3_alias, c1 as c1_alias, count(*) from {self.dbname}.{self.stable} {keyword} by t2_alias, t3_alias, 3 ") + tdSql.checkRows(nonempty_tb_num * self.row_nums) + tdSql.query(f"select t2 as t2_alias, t3 as t3_alias, c1 as c1_alias, count(*) from {self.dbname}.{self.stable} {keyword} by t2, t3_alias, c1_alias ") + tdSql.checkRows(nonempty_tb_num * self.row_nums) + tdSql.query(f"select t2 as t2_alias, t3 as t3_alias, c1 as c1_alias, count(*) from {self.dbname}.{self.stable} {keyword} by t2_alias, t3, c1_alias ") + tdSql.checkRows(nonempty_tb_num * self.row_nums) + + tdSql.query(f"select sum(t0.sumc2) from (select c1 as c1_alias, sum(c2) as sumc2 from {self.dbname}.{self.stable} {keyword} by c1_alias) t0") + num = 0 + if nonempty_tb_num > 0: + num = 1 + tdSql.checkRows(num) + + tdSql.query(f"select abs(c1) as abs_alias, count(*) from {self.dbname}.{self.stable} {keyword} by abs_alias") + num = 0 + if nonempty_tb_num > 0: + num = self.row_nums + tdSql.checkRows(num) + + ####### error case + tdSql.error(f"select c1, avg(c2) as avg_alias, count(*) from {self.dbname}.{self.stable} {keyword} by 1, avg_alias") + def test_groupby_sub_table(self): for i in range(self.tb_nums): tbname = f"{self.dbname}.sub_{self.stable}_{i}" @@ -276,6 +427,10 @@ class TDTestCase: # empty table only self.test_groupby('group', self.tb_nums, 0) self.test_groupby('partition', self.tb_nums, 0) + self.test_groupby_position('group', self.tb_nums, 0) + self.test_groupby_position('partition', self.tb_nums, 0) + self.test_groupby_alias('group', self.tb_nums, 0) + self.test_groupby_alias('partition', self.tb_nums, 0) self.test_innerSelect(self.tb_nums) self.test_multi_group_key(self.tb_nums, 0) self.test_multi_agg(self.tb_nums, 0) @@ -287,6 +442,10 @@ class TDTestCase: self.test_groupby('group', self.tb_nums, nonempty_tb_num) self.test_groupby('partition', self.tb_nums, nonempty_tb_num) + self.test_groupby_position('group', self.tb_nums, nonempty_tb_num) + self.test_groupby_position('partition', self.tb_nums, nonempty_tb_num) + self.test_groupby_alias('group', self.tb_nums, nonempty_tb_num) + self.test_groupby_alias('partition', self.tb_nums, nonempty_tb_num) self.test_groupby_sub_table() self.test_innerSelect(self.tb_nums) self.test_multi_group_key(self.tb_nums, nonempty_tb_num) diff --git a/tests/system-test/2-query/leastsquares.py b/tests/system-test/2-query/leastsquares.py index 3dfd1f6aca..91cd02bf8f 100644 --- a/tests/system-test/2-query/leastsquares.py +++ b/tests/system-test/2-query/leastsquares.py @@ -77,7 +77,7 @@ class TDTestCase: ) query_condition.extend( ( - 1010, + 1010.1, ''' "test1234!@#$%^&*():'> Date: Wed, 31 Jul 2024 15:37:21 +0800 Subject: [PATCH 11/17] fix: fetch row failed issue --- source/client/src/clientImpl.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index a458edcad9..75c1eabe7e 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -2920,8 +2920,10 @@ void taosAsyncFetchImpl(SRequestObj* pRequest, __taos_async_fn_t fp, void* param .cbParam = pRequest, }; - if (TSDB_CODE_SUCCESS != schedulerFetchRows(pRequest->body.queryJob, &req)) { - tscError("0x%" PRIx64 " failed to schedule fetch rows", pRequest->self); + int32_t code = schedulerFetchRows(pRequest->body.queryJob, &req); + if (TSDB_CODE_SUCCESS != code) { + tscError("0x%" PRIx64 " failed to schedule fetch rows", pRequest->requestId); + pRequest->body.fetchFp(param, pRequest, code); } } From 715b6428aaae8a0d10b1e35cab8599f9a0a657ea Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 31 Jul 2024 16:33:44 +0800 Subject: [PATCH 12/17] fix(stream): update the merge result check. --- source/libs/stream/src/streamData.c | 8 ++++++-- source/libs/stream/src/streamQueue.c | 11 ++++++----- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/source/libs/stream/src/streamData.c b/source/libs/stream/src/streamData.c index 00a62d4773..57e5322e38 100644 --- a/source/libs/stream/src/streamData.c +++ b/source/libs/stream/src/streamData.c @@ -219,13 +219,17 @@ int32_t streamQueueMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* pElem if (dst->type == STREAM_INPUT__DATA_BLOCK && pElem->type == STREAM_INPUT__DATA_BLOCK) { SStreamDataBlock* pBlock = (SStreamDataBlock*)dst; SStreamDataBlock* pBlockSrc = (SStreamDataBlock*)pElem; - (void) taosArrayAddAll(pBlock->blocks, pBlockSrc->blocks); + void* px = taosArrayAddAll(pBlock->blocks, pBlockSrc->blocks); + if (px == NULL) { + return terrno; + } + taosArrayDestroy(pBlockSrc->blocks); streamQueueItemIncSize(dst, streamQueueItemGetSize(pElem)); taosFreeQitem(pElem); *pRes = dst; - return TSDB_CODE_SUCCESS; + return code; } else if (dst->type == STREAM_INPUT__MERGED_SUBMIT && pElem->type == STREAM_INPUT__DATA_SUBMIT) { SStreamMergedSubmit* pMerged = (SStreamMergedSubmit*)dst; SStreamDataSubmit* pBlockSrc = (SStreamDataSubmit*)pElem; diff --git a/source/libs/stream/src/streamQueue.c b/source/libs/stream/src/streamQueue.c index 537062b04e..5e538c1e42 100644 --- a/source/libs/stream/src/streamQueue.c +++ b/source/libs/stream/src/streamQueue.c @@ -231,13 +231,14 @@ EExtractDataCode streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueIte if (*pInput == NULL) { ASSERT((*numOfBlocks) == 0); *pInput = qItem; - } else { - // merge current block failed, let's handle the already merged blocks. + } else { // merge current block failed, let's handle the already merged blocks. void* newRet = NULL; int32_t code = streamQueueMergeQueueItem(*pInput, qItem, (SStreamQueueItem**)&newRet); - if (code != TSDB_CODE_SUCCESS) { - stError("s-task:%s failed to merge blocks from inputQ, numOfBlocks:%d, code:%s", id, *numOfBlocks, - tstrerror(terrno)); + if (newRet == NULL) { + if (code) { + stError("s-task:%s failed to merge blocks from inputQ, numOfBlocks:%d, code:%s", id, *numOfBlocks, + tstrerror(code)); + } *blockSize = streamQueueItemGetSize(*pInput); if (taskLevel == TASK_LEVEL__SINK) { From d9750319b848da6062b84354c68374f33376ea9a Mon Sep 17 00:00:00 2001 From: sima Date: Wed, 31 Jul 2024 08:45:33 +0800 Subject: [PATCH 13/17] feat:[TS-5137] Support group/partition by position and alias --- docs/en/12-taos-sql/06-select.md | 18 +++++++++++++++--- docs/zh/12-taos-sql/06-select.md | 18 +++++++++++++++--- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/docs/en/12-taos-sql/06-select.md b/docs/en/12-taos-sql/06-select.md index 8bfdaeb9c8..33236c0173 100755 --- a/docs/en/12-taos-sql/06-select.md +++ b/docs/en/12-taos-sql/06-select.md @@ -65,10 +65,16 @@ interp_clause: RANGE(ts_val [, ts_val]) EVERY(every_val) FILL(fill_mod_and_val) partition_by_clause: - PARTITION BY expr [, expr] ... + PARTITION BY partition_by_expr [, partition_by_expr] ... + +partition_by_expr: + {expr | position | c_alias} group_by_clause: - GROUP BY expr [, expr] ... HAVING condition + GROUP BY group_by_expr [, group_by_expr] ... HAVING condition + +group_by_expr: + {expr | position | c_alias} order_by_clasue: ORDER BY order_expr [, order_expr] ... @@ -274,7 +280,13 @@ If you use a GROUP BY clause, the SELECT list can only include the following ite The GROUP BY clause groups each row of data by the value of the expression following the clause and returns a combined result for each group. -The expressions in a GROUP BY clause can include any column in any table or view. It is not necessary that the expressions appear in the SELECT list. +In the GROUP BY clause, columns from a table or view can be grouped by specifying the column name. These columns do not need to be included in the SELECT list. + +You can specify integers in GROUP BY expression to indicate the expressions in the select list used for grouping. For example, 1 indicates the first item in the select list. + +You can specify column names in result set to indicate the expressions in the select list used for grouping. + +When using position and result set column names for grouping in the GROUP BY clause, the corresponding expressions in the select list must not be aggregate functions. The GROUP BY clause does not guarantee that the results are ordered. If you want to ensure that grouped data is ordered, use the ORDER BY clause. diff --git a/docs/zh/12-taos-sql/06-select.md b/docs/zh/12-taos-sql/06-select.md index f10c5ebb69..af19559c81 100755 --- a/docs/zh/12-taos-sql/06-select.md +++ b/docs/zh/12-taos-sql/06-select.md @@ -65,10 +65,16 @@ interp_clause: RANGE(ts_val [, ts_val]) EVERY(every_val) FILL(fill_mod_and_val) partition_by_clause: - PARTITION BY expr [, expr] ... + PARTITION BY partition_by_expr [, partition_by_expr] ... + +partition_by_expr: + {expr | position | c_alias} group_by_clause: - GROUP BY expr [, expr] ... HAVING condition + GROUP BY group_by_expr [, group_by_expr] ... HAVING condition + +group_by_expr: + {expr | position | c_alias} order_by_clasue: ORDER BY order_expr [, order_expr] ... @@ -274,7 +280,13 @@ TDengine 支持基于时间戳主键的 INNER JOIN,规则如下: GROUP BY 子句对每行数据按 GROUP BY 后的表达式的值进行分组,并为每个组返回一行汇总信息。 -GROUP BY 子句中的表达式可以包含表或视图中的任何列,这些列不需要出现在 SELECT 列表中。 +GROUP BY 子句中可以通过指定表或视图的列名来按照表或视图中的任何列分组,这些列不需要出现在 SELECT 列表中。 + +GROUP BY 子句中可以使用位置语法,位置标识为正整数,从 1 开始,表示使用 SELECT 列表的第几个表达式进行分组。 + +GROUP BY 子句中可以使用结果集列名,表示使用 SELECT 列表的指定表达式进行分组。 + +GROUP BY 子句中在使用位置语法和结果集列名进行分组时,其对应的 SELECT 列表中的表达式不能是聚集函数。 该子句对行进行分组,但不保证结果集的顺序。若要对分组进行排序,请使用 ORDER BY 子句 From ffba28b6c3f7875feddafa7d764173c87e71958a Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Wed, 31 Jul 2024 23:36:16 +0800 Subject: [PATCH 14/17] feat:[TD-31097]init lock to avoid error in mac os --- source/dnode/mnode/impl/src/mndMain.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/source/dnode/mnode/impl/src/mndMain.c b/source/dnode/mnode/impl/src/mndMain.c index 7e072b8fe5..37a171e9a4 100644 --- a/source/dnode/mnode/impl/src/mndMain.c +++ b/source/dnode/mnode/impl/src/mndMain.c @@ -669,6 +669,13 @@ SMnode *mndOpen(const char *path, const SMnodeOpt *pOption) { } (void)memset(pMnode, 0, sizeof(SMnode)); + int32_t code = taosThreadRwlockInit(&pMnode->lock, NULL); + if (code != 0) { + taosMemoryFree(pMnode); + mError("failed to open mnode lock since %s", tstrerror(code)); + return NULL; + } + char timestr[24] = "1970-01-01 00:00:00.00"; (void)taosParseTime(timestr, &pMnode->checkTime, (int32_t)strlen(timestr), TSDB_TIME_PRECISION_MILLI, 0); mndSetOptions(pMnode, pOption); @@ -682,7 +689,7 @@ SMnode *mndOpen(const char *path, const SMnodeOpt *pOption) { return NULL; } - int32_t code = mndCreateDir(pMnode, path); + code = mndCreateDir(pMnode, path); if (code != 0) { code = terrno; mError("failed to open mnode since %s", tstrerror(code)); From 75efea55513161a182eec91a84d61436fed7c57a Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Wed, 31 Jul 2024 23:44:20 +0800 Subject: [PATCH 15/17] feat:[TS-4592]remove lost status for consumer --- source/dnode/mnode/impl/src/mndConsumer.c | 2 ++ source/dnode/mnode/impl/src/mndSubscribe.c | 5 ++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndConsumer.c b/source/dnode/mnode/impl/src/mndConsumer.c index 37527325db..1a9f808688 100644 --- a/source/dnode/mnode/impl/src/mndConsumer.c +++ b/source/dnode/mnode/impl/src/mndConsumer.c @@ -901,6 +901,7 @@ static int32_t mndRetrieveConsumer(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock * // status const char *pStatusName = mndConsumerStatusName(pConsumer->status); status = taosMemoryCalloc(1, pShow->pMeta->pSchemas[cols].bytes); + MND_TMQ_NULL_CHECK(status); STR_TO_VARSTR(status, pStatusName); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); @@ -940,6 +941,7 @@ static int32_t mndRetrieveConsumer(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock * tFormatOffset(buf, TSDB_OFFSET_LEN, &pVal); parasStr = taosMemoryCalloc(1, pShow->pMeta->pSchemas[cols].bytes); + MND_TMQ_NULL_CHECK(parasStr); (void)sprintf(varDataVal(parasStr), "tbname:%d,commit:%d,interval:%dms,reset:%s", pConsumer->withTbName, pConsumer->autoCommit, pConsumer->autoCommitInterval, buf); varDataSetLen(parasStr, strlen(varDataVal(parasStr))); diff --git a/source/dnode/mnode/impl/src/mndSubscribe.c b/source/dnode/mnode/impl/src/mndSubscribe.c index b2a866979c..8bc3c9064e 100644 --- a/source/dnode/mnode/impl/src/mndSubscribe.c +++ b/source/dnode/mnode/impl/src/mndSubscribe.c @@ -396,7 +396,6 @@ static int32_t processRemoveAddVgs(SMnode *pMnode, SMqRebOutputObj *pOutput) { END: taosMemoryFree(pVgEp); - sdbRelease(pMnode->pSdb, pVgroup); taosArrayDestroyP(newVgs, (FDelete)tDeleteSMqVgEp); return code; } @@ -773,7 +772,7 @@ static int32_t mndCheckConsumer(SRpcMsg *pMsg, SHashObj *rebSubHash) { if (status == MQ_CONSUMER_STATUS_READY) { if (taosArrayGetSize(pConsumer->currentTopics) == 0) { // unsubscribe or close - mndSendConsumerMsg(pMnode, pConsumer->consumerId, TDMT_MND_TMQ_LOST_CONSUMER_CLEAR, &pMsg->info); + MND_TMQ_RETURN_CHECK(mndSendConsumerMsg(pMnode, pConsumer->consumerId, TDMT_MND_TMQ_LOST_CONSUMER_CLEAR, &pMsg->info)); } else if (hbStatus * tsMqRebalanceInterval * 1000 >= pConsumer->sessionTimeoutMs || pollStatus * tsMqRebalanceInterval * 1000 >= pConsumer->maxPollIntervalMs) { taosRLockLatch(&pConsumer->lock); @@ -788,7 +787,7 @@ static int32_t mndCheckConsumer(SRpcMsg *pMsg, SHashObj *rebSubHash) { MND_TMQ_RETURN_CHECK(buildRebInfo(rebSubHash, pConsumer->rebRemovedTopics, 0, pConsumer->cgroup, pConsumer->consumerId)); taosRUnLockLatch(&pConsumer->lock); } else { - mndSendConsumerMsg(pMnode, pConsumer->consumerId, TDMT_MND_TMQ_LOST_CONSUMER_CLEAR, &pMsg->info); + MND_TMQ_RETURN_CHECK(mndSendConsumerMsg(pMnode, pConsumer->consumerId, TDMT_MND_TMQ_LOST_CONSUMER_CLEAR, &pMsg->info)); } mndReleaseConsumer(pMnode, pConsumer); From ff1778220c27b3634c397c358a5bf62af1ac285e Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Thu, 1 Aug 2024 10:24:43 +0800 Subject: [PATCH 16/17] feat:[TS-4592]remove lost status for consumer --- source/dnode/mnode/impl/src/mndSubscribe.c | 1 + 1 file changed, 1 insertion(+) diff --git a/source/dnode/mnode/impl/src/mndSubscribe.c b/source/dnode/mnode/impl/src/mndSubscribe.c index 8bc3c9064e..9a64387e82 100644 --- a/source/dnode/mnode/impl/src/mndSubscribe.c +++ b/source/dnode/mnode/impl/src/mndSubscribe.c @@ -395,6 +395,7 @@ static int32_t processRemoveAddVgs(SMnode *pMnode, SMqRebOutputObj *pOutput) { return totalVgNum; END: + sdbRelease(pMnode->pSdb, pVgroup); taosMemoryFree(pVgEp); taosArrayDestroyP(newVgs, (FDelete)tDeleteSMqVgEp); return code; From b93e283945053dcbd8f0c36ae21f16d876aea18d Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 1 Aug 2024 10:30:37 +0800 Subject: [PATCH 17/17] fix(stream): check existence for dst stable. --- source/dnode/vnode/src/tq/tqSink.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/source/dnode/vnode/src/tq/tqSink.c b/source/dnode/vnode/src/tq/tqSink.c index d9e39ad6f5..3b375f7f82 100644 --- a/source/dnode/vnode/src/tq/tqSink.c +++ b/source/dnode/vnode/src/tq/tqSink.c @@ -957,6 +957,12 @@ void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data) { metaReaderDoInit(&mer1, pVnode->pMeta, META_READER_LOCK); code = metaReaderGetTableEntryByUid(&mer1, pOutputInfo->tbSink.stbUid); + if (code != TSDB_CODE_SUCCESS) { + tqError("s-task:%s vgId:%d failed to get the dst stable, failed to sink results", id, vgId); + metaReaderClear(&mer1); + return; + } + pOutputInfo->tbSink.pTagSchema = tCloneSSchemaWrapper(&mer1.me.stbEntry.schemaTag); metaReaderClear(&mer1);