diff --git a/docs/en/12-taos-sql/12-distinguished.md b/docs/en/12-taos-sql/12-distinguished.md index 5dca92a35c..818b67db9b 100644 --- a/docs/en/12-taos-sql/12-distinguished.md +++ b/docs/en/12-taos-sql/12-distinguished.md @@ -80,7 +80,7 @@ These pseudocolumns occur after the aggregation clause. `FILL` clause is used to specify how to fill when there is data missing in any window, including: 1. NONE: No fill (the default fill mode) -2. VALUE: Fill with a fixed value, which should be specified together, for example `FILL(VALUE, 1.23)` Note: The value filled depends on the data type. For example, if you run FILL(VALUE 1.23) on an integer column, the value 1 is filled. +2. VALUE: Fill with a fixed value, which should be specified together, for example `FILL(VALUE, 1.23)` Note: The value filled depends on the data type. For example, if you run FILL(VALUE 1.23) on an integer column, the value 1 is filled. If multiple columns in select list need to be filled, then in the fill clause there must be a fill value for each of these columns, for example, `SELECT _wstart, min(c1), max(c1) FROM ... FILL(VALUE, 0, 0)`. 3. PREV: Fill with the previous non-NULL value, `FILL(PREV)` 4. NULL: Fill with NULL, `FILL(NULL)` 5. LINEAR: Fill with the closest non-NULL value, `FILL(LINEAR)` diff --git a/docs/en/12-taos-sql/14-stream.md b/docs/en/12-taos-sql/14-stream.md index a6759da858..9c6c57ba6a 100644 --- a/docs/en/12-taos-sql/14-stream.md +++ b/docs/en/12-taos-sql/14-stream.md @@ -30,7 +30,7 @@ subquery: SELECT [DISTINCT] select_list from_clause [WHERE condition] [PARTITION BY tag_list] - [window_clause] + window_clause ``` Session windows, state windows, and sliding windows are supported. When you configure a session or state window for a supertable, you must use PARTITION BY TBNAME. If the source table has a composite primary key, state windows, event windows, and count windows are not supported. @@ -193,11 +193,32 @@ All [scalar functions](../function/#scalar-functions) are available in stream pr - [unique](../function/#unique) - [mode](../function/#mode) -## Pause\Resume stream +## Pause Resume stream 1.pause stream +```sql PAUSE STREAM [IF EXISTS] stream_name; +``` If "IF EXISTS" is not specified and the stream does not exist, an error will be reported; If "IF EXISTS" is specified and the stream does not exist, success is returned; If the stream exists, paused all stream tasks. 2.resume stream +```sql RESUME STREAM [IF EXISTS] [IGNORE UNTREATED] stream_name; +``` If "IF EXISTS" is not specified and the stream does not exist, an error will be reported. If "IF EXISTS" is specified and the stream does not exist, success is returned; If the stream exists, all of the stream tasks will be resumed. If "IGNORE UntREATED" is specified, data written during the pause period of stream is ignored when resuming stream. + +## Stream State Backup +The intermediate processing results of stream, a.k.a stream state, need to be persistent on the disk properly during stream processing. The stream state, consisting of multiple files on disk, may be transferred between different computing nodes during the stream processing, as a result of a leader/follower switch or physical computing node offline. You need to deploy the rsync on each physical node to enable the backup and restore processing work, since _ver_.3.3.2.1. To ensure it works correctly, please refer to the following instructions: +1. add the option "snodeAddress" in the configure file +2. add the option "checkpointBackupDir" in the configure file to set the backup data directory. +3. create a _snode_ before creating a stream to ensure the backup service is activated. Otherwise, the checkpoint may not generated during the stream procedure. + +>snodeAddress 127.0.0.1:873 +> +>checkpointBackupDir /home/user/stream/backup/checkpoint/ + +## create snode +The snode, stream node for short, on which the aggregate tasks can be deployed on, is a stateful computing node dedicated to the stream processing. An important feature is to backup and restore the stream state files. The snode needs to be created before creating stream tasks. Use the following SQL statement to create a snode in a TDengine cluster, and only one snode is allowed in a TDengine cluster for now. +```sql +CREATE SNODE ON DNODE id +``` +is the ordinal number of a dnode, which can be acquired by using ```show dnodes``` statement. diff --git a/docs/zh/12-taos-sql/12-distinguished.md b/docs/zh/12-taos-sql/12-distinguished.md index b979d44a5e..bf24d0adac 100755 --- a/docs/zh/12-taos-sql/12-distinguished.md +++ b/docs/zh/12-taos-sql/12-distinguished.md @@ -76,7 +76,7 @@ window_clause: { FILL 语句指定某一窗口区间数据缺失的情况下的填充模式。填充模式包括以下几种: 1. 不进行填充:NONE(默认填充模式)。 -2. VALUE 填充:固定值填充,此时需要指定填充的数值。例如:FILL(VALUE, 1.23)。这里需要注意,最终填充的值受由相应列的类型决定,如 FILL(VALUE, 1.23),相应列为 INT 类型,则填充值为 1。 +2. VALUE 填充:固定值填充,此时需要指定填充的数值。例如:FILL(VALUE, 1.23)。这里需要注意,最终填充的值受由相应列的类型决定,如 FILL(VALUE, 1.23),相应列为 INT 类型,则填充值为 1, 若查询列表中有多列需要FILL, 则需要给每一个FILL列指定VALUE, 如`SELECT _wstart, min(c1), max(c1) FROM ... FILL(VALUE, 0, 0)`。 3. PREV 填充:使用前一个非 NULL 值填充数据。例如:FILL(PREV)。 4. NULL 填充:使用 NULL 填充数据。例如:FILL(NULL)。 5. LINEAR 填充:根据前后距离最近的非 NULL 值做线性插值填充。例如:FILL(LINEAR)。 diff --git a/docs/zh/12-taos-sql/14-stream.md b/docs/zh/12-taos-sql/14-stream.md index 8d814d05a1..c0d14f0455 100644 --- a/docs/zh/12-taos-sql/14-stream.md +++ b/docs/zh/12-taos-sql/14-stream.md @@ -27,7 +27,7 @@ subquery: SELECT select_list from_clause [WHERE condition] [PARTITION BY tag_list] - [window_clause] + window_clause ``` 支持会话窗口、状态窗口、滑动窗口、事件窗口和计数窗口,其中,状态窗口、事件窗口和计数窗口搭配超级表时必须与partition by tbname一起使用。对于数据源表是复合主键的流,不支持状态窗口、事件窗口、计数窗口的计算。 @@ -271,4 +271,23 @@ PAUSE STREAM [IF EXISTS] stream_name; 2.流计算恢复计算任务 RESUME STREAM [IF EXISTS] [IGNORE UNTREATED] stream_name; -没有指定IF EXISTS,如果该stream不存在,则报错,如果存在,则恢复流计算;指定了IF EXISTS,如果stream不存在,则返回成功;如果存在,则恢复流计算。如果指定IGNORE UNTREATED,则恢复流计算时,忽略流计算暂停期间写入的数据。 \ No newline at end of file +没有指定IF EXISTS,如果该stream不存在,则报错,如果存在,则恢复流计算;指定了IF EXISTS,如果stream不存在,则返回成功;如果存在,则恢复流计算。如果指定IGNORE UNTREATED,则恢复流计算时,忽略流计算暂停期间写入的数据。 + +## 状态数据备份与同步 +流计算的中间结果成为计算的状态数据,需要在流计算整个生命周期中进行持久化保存。为了确保流计算中间状态能够在集群环境下在不同的节点间可靠地同步和迁移,至3.3.2.1 版本开始,需要在运行环境中部署 rsync 软件,还需要增加以下的步骤: +1. 在配置文件中配置 snode 的地址(IP+端口)和状态数据备份目录(该目录系 snode 所在的物理节点的目录)。 +2. 然后创建 snode。 +完成上述两个步骤以后才能创建流。 +如果没有创建 snode 并正确配置 snode 的地址,流计算过程中将无法生成检查点(checkpoint),并可能导致后续的计算结果产生错误。 + +> snodeAddress 127.0.0.1:873 +> +> checkpointBackupDir /home/user/stream/backup/checkpoint/ + + +## 创建 snode 的方式 +使用以下命令创建 snode(stream node), snode 是流计算中有状态的计算节点,可用于部署聚合任务,同时负责备份不同的流计算任务生成的检查点数据。 +```sql +CREATE SNODE ON DNODE [id] +``` +其中的 id 是集群中的 dnode 的序号。请注意选择的dnode,流计算的中间状态将自动在其上进行备份。 diff --git a/include/common/tglobal.h b/include/common/tglobal.h index 96b9617fc4..3fd3cc4ca9 100644 --- a/include/common/tglobal.h +++ b/include/common/tglobal.h @@ -134,7 +134,6 @@ extern uint16_t tsMonitorPort; extern int32_t tsMonitorMaxLogs; extern bool tsMonitorComp; extern bool tsMonitorLogProtocol; -extern int32_t tsMonitorIntervalForBasic; extern bool tsMonitorForceV2; // audit diff --git a/include/common/tmsg.h b/include/common/tmsg.h index a83aa4da44..40fce1d67b 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -965,20 +965,20 @@ int32_t tSerializeSConnectReq(void* buf, int32_t bufLen, SConnectReq* pReq); int32_t tDeserializeSConnectReq(void* buf, int32_t bufLen, SConnectReq* pReq); typedef struct { - int32_t acctId; - int64_t clusterId; - uint32_t connId; - int32_t dnodeNum; - int8_t superUser; - int8_t sysInfo; - int8_t connType; - SEpSet epSet; - int32_t svrTimestamp; - int32_t passVer; - int32_t authVer; - char sVer[TSDB_VERSION_LEN]; - char sDetailVer[128]; - int64_t whiteListVer; + int32_t acctId; + int64_t clusterId; + uint32_t connId; + int32_t dnodeNum; + int8_t superUser; + int8_t sysInfo; + int8_t connType; + SEpSet epSet; + int32_t svrTimestamp; + int32_t passVer; + int32_t authVer; + char sVer[TSDB_VERSION_LEN]; + char sDetailVer[128]; + int64_t whiteListVer; SMonitorParas monitorParas; } SConnectRsp; @@ -1638,15 +1638,15 @@ void tFreeSFuncInfo(SFuncInfo* pInfo); void tFreeSRetrieveFuncRsp(SRetrieveFuncRsp* pRsp); typedef struct { - int32_t statusInterval; - int64_t checkTime; // 1970-01-01 00:00:00.000 - char timezone[TD_TIMEZONE_LEN]; // tsTimezone - char locale[TD_LOCALE_LEN]; // tsLocale - char charset[TD_LOCALE_LEN]; // tsCharset - int8_t ttlChangeOnWrite; - int8_t enableWhiteList; - int8_t encryptionKeyStat; - uint32_t encryptionKeyChksum; + int32_t statusInterval; + int64_t checkTime; // 1970-01-01 00:00:00.000 + char timezone[TD_TIMEZONE_LEN]; // tsTimezone + char locale[TD_LOCALE_LEN]; // tsLocale + char charset[TD_LOCALE_LEN]; // tsCharset + int8_t ttlChangeOnWrite; + int8_t enableWhiteList; + int8_t encryptionKeyStat; + uint32_t encryptionKeyChksum; SMonitorParas monitorParas; } SClusterCfg; @@ -1745,9 +1745,9 @@ typedef enum { } MONITOR_TYPE; typedef struct { - int32_t contLen; - char* pCont; - MONITOR_TYPE type; + int32_t contLen; + char* pCont; + MONITOR_TYPE type; } SStatisReq; int32_t tSerializeSStatisReq(void* buf, int32_t bufLen, SStatisReq* pReq); @@ -3035,8 +3035,8 @@ typedef struct { int8_t source; // TD_REQ_FROM_TAOX-taosX or TD_REQ_FROM_APP-taosClient } SVCreateTbBatchReq; -int tEncodeSVCreateTbBatchReq(SEncoder* pCoder, const SVCreateTbBatchReq* pReq); -int tDecodeSVCreateTbBatchReq(SDecoder* pCoder, SVCreateTbBatchReq* pReq); +int tEncodeSVCreateTbBatchReq(SEncoder* pCoder, const SVCreateTbBatchReq* pReq); +int tDecodeSVCreateTbBatchReq(SDecoder* pCoder, SVCreateTbBatchReq* pReq); void tDeleteSVCreateTbBatchReq(SVCreateTbBatchReq* pReq); typedef struct { @@ -3275,10 +3275,10 @@ typedef struct { } SClientHbRsp; typedef struct { - int64_t reqId; - int64_t rspId; - int32_t svrTimestamp; - SArray* rsps; // SArray + int64_t reqId; + int64_t rspId; + int32_t svrTimestamp; + SArray* rsps; // SArray SMonitorParas monitorParas; } SClientHbBatchRsp; @@ -3514,7 +3514,7 @@ typedef struct SVUpdateCheckpointInfoReq { int64_t checkpointVer; int64_t checkpointTs; int32_t transId; - int64_t hStreamId; // add encode/decode + int64_t hStreamId; // add encode/decode int64_t hTaskId; int8_t dropRelHTask; } SVUpdateCheckpointInfoReq; @@ -3993,7 +3993,7 @@ int32_t tDecodeSTaosxRsp(SDecoder* pDecoder, void* pRsp); void tDeleteSTaosxRsp(void* pRsp); typedef struct SMqBatchMetaRsp { - SMqRspHead head; // not serialize + SMqRspHead head; // not serialize STqOffsetVal rspOffset; SArray* batchMetaLen; SArray* batchMetaReq; diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index c92649f1f7..3515df3127 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -250,7 +250,7 @@ TD_DEF_MSG_TYPE(TDMT_MND_DROP_TB_WITH_TSMA, "drop-tb-with-tsma", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_STREAM_UPDATE_CHKPT_EVT, "stream-update-chkpt-evt", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_STREAM_CHKPT_REPORT, "stream-chkpt-report", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_MND_STREAM_CHKPT_CONSEN, "stream-chkpt-consen", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_MND_STREAM_CONSEN_TIMER, "stream-consen-tmr", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_MAX_MSG, "mnd-max", NULL, NULL) TD_CLOSE_MSG_SEG(TDMT_END_MND_MSG) @@ -341,6 +341,7 @@ TD_DEF_MSG_TYPE(TDMT_STREAM_CREATE, "stream-create", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_DROP, "stream-drop", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_RETRIEVE_TRIGGER, "stream-retri-trigger", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_STREAM_CONSEN_CHKPT, "stream-consen-chkpt", NULL, NULL) TD_CLOSE_MSG_SEG(TDMT_STREAM_MSG) TD_NEW_MSG_SEG(TDMT_MON_MSG) //5 << 8 diff --git a/include/dnode/vnode/tqCommon.h b/include/dnode/vnode/tqCommon.h index 566e8dbbd8..4d5e18520c 100644 --- a/include/dnode/vnode/tqCommon.h +++ b/include/dnode/vnode/tqCommon.h @@ -29,7 +29,8 @@ int32_t tqStreamTaskProcessCheckpointReadyMsg(SStreamMeta* pMeta, SRpcMsg* pMsg) int32_t tqStreamProcessStreamHbRsp(SStreamMeta* pMeta, SRpcMsg* pMsg); int32_t tqStreamProcessReqCheckpointRsp(SStreamMeta* pMeta, SRpcMsg* pMsg); int32_t tqStreamProcessChkptReportRsp(SStreamMeta* pMeta, SRpcMsg* pMsg); -int32_t tqStreamProcessConsensusChkptRsp(SStreamMeta* pMeta, SRpcMsg* pMsg); +int32_t tqStreamProcessConsensusChkptRsp2(SStreamMeta* pMeta, SRpcMsg* pMsg); +int32_t tqStreamTaskProcessConsenChkptIdReq(SStreamMeta* pMeta, SRpcMsg* pMsg); int32_t tqStreamProcessCheckpointReadyRsp(SStreamMeta* pMeta, SRpcMsg* pMsg); int32_t tqStreamTaskProcessDeployReq(SStreamMeta* pMeta, SMsgCb* cb, int64_t sversion, char* msg, int32_t msgLen, bool isLeader, bool restored); @@ -37,12 +38,12 @@ int32_t tqStreamTaskProcessDropReq(SStreamMeta* pMeta, char* msg, int32_t msgLen int32_t tqStreamTaskProcessRunReq(SStreamMeta* pMeta, SRpcMsg* pMsg, bool isLeader); int32_t tqStartTaskCompleteCallback(SStreamMeta* pMeta); int32_t tqStreamTasksGetTotalNum(SStreamMeta* pMeta); -int32_t tqStreamTaskProcessTaskResetReq(SStreamMeta* pMeta, SRpcMsg* pMsg); +int32_t tqStreamTaskProcessTaskResetReq(SStreamMeta* pMeta, char* msg); int32_t tqStreamTaskProcessRetrieveTriggerReq(SStreamMeta* pMeta, SRpcMsg* pMsg); int32_t tqStreamTaskProcessRetrieveTriggerRsp(SStreamMeta* pMeta, SRpcMsg* pMsg); int32_t tqStreamTaskProcessTaskPauseReq(SStreamMeta* pMeta, char* pMsg); int32_t tqStreamTaskProcessTaskResumeReq(void* handle, int64_t sversion, char* pMsg, bool fromVnode); -int32_t tqStreamTaskProcessUpdateCheckpointReq(SStreamMeta* pMeta, bool restored, char* msg, int32_t msgLen); +int32_t tqStreamTaskProcessUpdateCheckpointReq(SStreamMeta* pMeta, bool restored, char* msg); void tqSetRestoreVersionInfo(SStreamTask* pTask); int32_t tqExpandStreamTask(SStreamTask* pTask); diff --git a/include/libs/monitor/clientMonitor.h b/include/libs/monitor/clientMonitor.h index 4c7ab6f65a..0085173ecd 100644 --- a/include/libs/monitor/clientMonitor.h +++ b/include/libs/monitor/clientMonitor.h @@ -38,6 +38,13 @@ typedef enum { SLOW_LOG_READ_QUIT = 3, } SLOW_LOG_QUEUE_TYPE; +static char* queueTypeStr[] = { + "SLOW_LOG_WRITE", + "SLOW_LOG_READ_RUNNING", + "SLOW_LOG_READ_BEGINNIG", + "SLOW_LOG_READ_QUIT" +}; + #define SLOW_LOG_SEND_SIZE_MAX 1024*1024 typedef struct { @@ -65,7 +72,7 @@ typedef struct { } MonitorSlowLogData; void monitorClose(); -void monitorInit(); +int32_t monitorInit(); void monitorClientSQLReqInit(int64_t clusterKey); void monitorClientSlowQueryInit(int64_t clusterId); diff --git a/include/libs/monitor/monitor.h b/include/libs/monitor/monitor.h index 9d7878ecf7..6007d52bb4 100644 --- a/include/libs/monitor/monitor.h +++ b/include/libs/monitor/monitor.h @@ -226,7 +226,6 @@ void monSetQmInfo(SMonQmInfo *pInfo); void monSetSmInfo(SMonSmInfo *pInfo); void monSetBmInfo(SMonBmInfo *pInfo); void monGenAndSendReport(); -void monGenAndSendReportBasic(); void monSendContent(char *pCont, const char* uri); void tFreeSMonMmInfo(SMonMmInfo *pInfo); diff --git a/include/libs/parser/parser.h b/include/libs/parser/parser.h index ad41b9a542..3ac357055e 100644 --- a/include/libs/parser/parser.h +++ b/include/libs/parser/parser.h @@ -89,6 +89,7 @@ typedef struct SParseContext { bool isView; bool isAudit; bool nodeOffline; + bool isStmtBind; const char* svrVer; SArray* pTableMetaPos; // sql table pos => catalog data pos SArray* pTableVgroupPos; // sql table pos => catalog data pos diff --git a/include/libs/stream/streamMsg.h b/include/libs/stream/streamMsg.h index b69032330d..34921daac3 100644 --- a/include/libs/stream/streamMsg.h +++ b/include/libs/stream/streamMsg.h @@ -216,6 +216,7 @@ typedef struct SRestoreCheckpointInfo { int64_t startTs; int64_t streamId; int64_t checkpointId; // latest checkpoint id + int32_t transId; // transaction id of the update the consensus-checkpointId transaction int32_t taskId; int32_t nodeId; } SRestoreCheckpointInfo; @@ -223,16 +224,6 @@ typedef struct SRestoreCheckpointInfo { int32_t tEncodeRestoreCheckpointInfo (SEncoder* pEncoder, const SRestoreCheckpointInfo* pReq); int32_t tDecodeRestoreCheckpointInfo(SDecoder* pDecoder, SRestoreCheckpointInfo* pReq); -typedef struct SRestoreCheckpointInfoRsp { - int64_t streamId; - int64_t checkpointId; - int64_t startTs; - int32_t taskId; -} SRestoreCheckpointInfoRsp; - -int32_t tEncodeRestoreCheckpointInfoRsp(SEncoder* pCoder, const SRestoreCheckpointInfoRsp* pInfo); -int32_t tDecodeRestoreCheckpointInfoRsp(SDecoder* pCoder, SRestoreCheckpointInfoRsp* pInfo); - typedef struct { SMsgHead head; int64_t streamId; @@ -242,8 +233,7 @@ typedef struct { typedef struct SCheckpointConsensusEntry { SRestoreCheckpointInfo req; - SRpcMsg rsp; - int64_t ts; + int64_t ts; } SCheckpointConsensusEntry; #ifdef __cplusplus diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index e98039d2fe..5ba0ce454c 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -272,9 +272,9 @@ typedef struct SCheckpointInfo { int64_t checkpointTime; // latest checkpoint time int64_t processedVer; int64_t nextProcessVer; // current offset in WAL, not serialize it - + int64_t msgVer; + int32_t consensusTransId;// consensus checkpoint id SActiveCheckpointInfo* pActiveInfo; - int64_t msgVer; } SCheckpointInfo; typedef struct SStreamStatus { @@ -289,6 +289,8 @@ typedef struct SStreamStatus { int32_t inScanHistorySentinel; bool appendTranstateBlock; // has append the transfer state data block already bool removeBackendFiles; // remove backend files on disk when free stream tasks + bool sendConsensusChkptId; + bool requireConsensusChkptId; } SStreamStatus; typedef struct SDataRange { @@ -528,10 +530,11 @@ typedef int32_t (*__state_trans_user_fn)(SStreamTask*, void* param); SStreamTask* tNewStreamTask(int64_t streamId, int8_t taskLevel, SEpSet* pEpset, bool fillHistory, int64_t triggerParam, SArray* pTaskList, bool hasFillhistory, int8_t subtableWithoutMd5); +void tFreeStreamTask(SStreamTask* pTask); int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask); int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask); -void tFreeStreamTask(SStreamTask* pTask); int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, int64_t ver); +void streamFreeTaskState(SStreamTask* pTask, ETaskStatus status); int32_t tDecodeStreamTaskChkInfo(SDecoder* pDecoder, SCheckpointInfo* pChkpInfo); int32_t tDecodeStreamTaskId(SDecoder* pDecoder, STaskId* pTaskId); @@ -568,14 +571,16 @@ typedef struct { } SStreamScanHistoryReq; typedef struct STaskCkptInfo { - int64_t latestId; // saved checkpoint id - int64_t latestVer; // saved checkpoint ver - int64_t latestTime; // latest checkpoint time - int64_t latestSize; // latest checkpoint size - int8_t remoteBackup; // latest checkpoint backup done - int64_t activeId; // current active checkpoint id - int32_t activeTransId; // checkpoint trans id - int8_t failed; // denote if the checkpoint is failed or not + int64_t latestId; // saved checkpoint id + int64_t latestVer; // saved checkpoint ver + int64_t latestTime; // latest checkpoint time + int64_t latestSize; // latest checkpoint size + int8_t remoteBackup; // latest checkpoint backup done + int64_t activeId; // current active checkpoint id + int32_t activeTransId; // checkpoint trans id + int8_t failed; // denote if the checkpoint is failed or not + int8_t consensusChkptId; // required the consensus-checkpointId + int64_t consensusTs; // } STaskCkptInfo; typedef struct STaskStatusEntry { @@ -586,8 +591,6 @@ typedef struct STaskStatusEntry { int32_t nodeId; SVersionRange verRange; // start/end version in WAL, only valid for source task int64_t processedVer; // only valid for source task - bool inputQChanging; // inputQ is changing or not - int64_t inputQUnchangeCounter; double inputQUsed; // in MiB double inputRate; double procsThroughput; // duration between one element put into input queue and being processed. @@ -616,8 +619,8 @@ typedef struct SStreamTaskState { typedef struct SCheckpointConsensusInfo { SArray* pTaskList; - int64_t checkpointId; - int64_t genTs; + int32_t numOfTasks; + int64_t streamId; } SCheckpointConsensusInfo; int32_t streamSetupScheduleTrigger(SStreamTask* pTask); @@ -800,6 +803,7 @@ int32_t streamTaskBroadcastRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* r void streamTaskSendRetrieveRsp(SStreamRetrieveReq* pReq, SRpcMsg* pRsp); int32_t streamProcessHeartbeatRsp(SStreamMeta* pMeta, SMStreamHbRspMsg* pRsp); +int32_t streamTaskSendPreparedCheckpointsourceRsp(SStreamTask* pTask); #ifdef __cplusplus diff --git a/include/util/taoserror.h b/include/util/taoserror.h index 7ac22ac40f..4a7dbead23 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -46,9 +46,11 @@ const char* terrstr(); char* taosGetErrMsgReturn(); char* taosGetErrMsg(); int32_t* taosGetErrno(); +int32_t* taosGetErrln(); int32_t taosGetErrSize(); #define terrno (*taosGetErrno()) #define terrMsg (taosGetErrMsg()) +#define terrln (*taosGetErrln()) #define SET_ERROR_MSG(MSG, ...) \ snprintf(terrMsg, ERR_MSG_LEN, MSG, ##__VA_ARGS__) @@ -136,6 +138,7 @@ int32_t taosGetErrSize(); #define TSDB_CODE_TIMEOUT_ERROR TAOS_DEF_ERROR_CODE(0, 0x012C) #define TSDB_CODE_MSG_ENCODE_ERROR TAOS_DEF_ERROR_CODE(0, 0x012D) #define TSDB_CODE_NO_ENOUGH_DISKSPACE TAOS_DEF_ERROR_CODE(0, 0x012E) +#define TSDB_CODE_THIRDPARTY_ERROR TAOS_DEF_ERROR_CODE(0, 0x012F) #define TSDB_CODE_APP_IS_STARTING TAOS_DEF_ERROR_CODE(0, 0x0130) #define TSDB_CODE_APP_IS_STOPPING TAOS_DEF_ERROR_CODE(0, 0x0131) @@ -145,6 +148,7 @@ int32_t taosGetErrSize(); #define TSDB_CODE_IP_NOT_IN_WHITE_LIST TAOS_DEF_ERROR_CODE(0, 0x0134) #define TSDB_CODE_FAILED_TO_CONNECT_S3 TAOS_DEF_ERROR_CODE(0, 0x0135) #define TSDB_CODE_MSG_PREPROCESSED TAOS_DEF_ERROR_CODE(0, 0x0136) // internal +#define TSDB_CODE_OUT_OF_BUFFER TAOS_DEF_ERROR_CODE(0, 0x0137) //client #define TSDB_CODE_TSC_INVALID_OPERATION TAOS_DEF_ERROR_CODE(0, 0x0200) diff --git a/include/util/tutil.h b/include/util/tutil.h index c049949590..d1a18dc3e8 100644 --- a/include/util/tutil.h +++ b/include/util/tutil.h @@ -117,6 +117,15 @@ static FORCE_INLINE int32_t taosGetTbHashVal(const char *tbname, int32_t tblen, } } +#define TAOS_CHECK_ERRNO(CODE) \ + do { \ + terrno = (CODE); \ + if (terrno != TSDB_CODE_SUCCESS) { \ + terrln = __LINE__; \ + goto _exit; \ + } \ + } while (0) + #define TSDB_CHECK_CODE(CODE, LINO, LABEL) \ do { \ if (TSDB_CODE_SUCCESS != (CODE)) { \ diff --git a/source/client/inc/clientInt.h b/source/client/inc/clientInt.h index 0d22257433..7c342b85d4 100644 --- a/source/client/inc/clientInt.h +++ b/source/client/inc/clientInt.h @@ -283,6 +283,7 @@ typedef struct SRequestObj { bool inRetry; bool isSubReq; bool inCallback; + bool isStmtBind; // is statement bind parameter uint32_t prevCode; // previous error code: todo refactor, add update flag for catalog uint32_t retry; int64_t allocatorRefId; diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index 3a821768f8..ecfa1e3392 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -864,10 +864,15 @@ void taos_init_imp(void) { initQueryModuleMsgHandle(); if (taosConvInit() != 0) { + tscInitRes = -1; tscError("failed to init conv"); return; } - + if (monitorInit() != 0){ + tscInitRes = -1; + tscError("failed to init monitor"); + return; + } rpcInit(); SCatalogCfg cfg = {.maxDBCacheNum = 100, .maxTblCacheNum = 100}; @@ -891,7 +896,6 @@ void taos_init_imp(void) { taosThreadMutexInit(&appInfo.mutex, NULL); tscCrashReportInit(); - monitorInit(); tscDebug("client is initialized successfully"); } diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index 7034778887..f8a817dc46 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -206,6 +206,7 @@ int32_t buildRequest(uint64_t connId, const char* sql, int sqlLen, void* param, (*pRequest)->sqlstr[sqlLen] = 0; (*pRequest)->sqlLen = sqlLen; (*pRequest)->validateOnly = validateSql; + (*pRequest)->isStmtBind = false; ((SSyncQueryParam*)(*pRequest)->body.interParam)->userParam = param; @@ -266,7 +267,8 @@ int32_t parseSql(SRequestObj* pRequest, bool topicQuery, SQuery** pQuery, SStmtC .isSuperUser = (0 == strcmp(pTscObj->user, TSDB_DEFAULT_USER)), .enableSysInfo = pTscObj->sysInfo, .svrVer = pTscObj->sVer, - .nodeOffline = (pTscObj->pAppInfo->onlineDnodes < pTscObj->pAppInfo->totalDnodes)}; + .nodeOffline = (pTscObj->pAppInfo->onlineDnodes < pTscObj->pAppInfo->totalDnodes), + .isStmtBind = pRequest->isStmtBind}; cxt.mgmtEpSet = getEpSet_s(&pTscObj->pAppInfo->mgmtEp); int32_t code = catalogGetHandle(pTscObj->pAppInfo->clusterId, &cxt.pCatalog); diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index 479ea76fe3..d1a9897caa 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -18,6 +18,7 @@ int32_t quitCnt = 0; tsem2_t monitorSem; STaosQueue* monitorQueue; SHashObj* monitorSlowLogHash; +char tmpSlowLogPath[PATH_MAX] = {0}; static int32_t getSlowLogTmpDir(char* tmpPath, int32_t size){ if (tsTempDir == NULL) { @@ -453,6 +454,10 @@ static int64_t getFileSize(char* path){ } static int32_t sendSlowLog(int64_t clusterId, char* data, TdFilePtr pFile, int64_t offset, SLOW_LOG_QUEUE_TYPE type, char* fileName, void* pTransporter, SEpSet *epSet){ + if (data == NULL){ + taosMemoryFree(fileName); + return -1; + } MonitorSlowLogData* pParam = taosMemoryMalloc(sizeof(MonitorSlowLogData)); if(pParam == NULL){ taosMemoryFree(data); @@ -468,18 +473,26 @@ static int32_t sendSlowLog(int64_t clusterId, char* data, TdFilePtr pFile, int64 return sendReport(pTransporter, epSet, data, MONITOR_TYPE_SLOW_LOG, pParam); } -static void monitorSendSlowLogAtBeginning(int64_t clusterId, char** fileName, TdFilePtr pFile, int64_t offset, void* pTransporter, SEpSet *epSet){ +static int32_t monitorReadSend(int64_t clusterId, TdFilePtr pFile, int64_t* offset, int64_t size, SLOW_LOG_QUEUE_TYPE type, char* fileName){ + SAppInstInfo* pInst = getAppInstByClusterId(clusterId); + if(pInst == NULL){ + tscError("failed to get app instance by clusterId:%" PRId64, clusterId); + return -1; + } + SEpSet ep = getEpSet_s(&pInst->mgmtEp); + char* data = readFile(pFile, offset, size); + return sendSlowLog(clusterId, data, (type == SLOW_LOG_READ_BEGINNIG ? pFile : NULL), *offset, type, fileName, pInst->pTransporter, &ep); +} + +static void monitorSendSlowLogAtBeginning(int64_t clusterId, char** fileName, TdFilePtr pFile, int64_t offset){ int64_t size = getFileSize(*fileName); if(size <= offset){ processFileInTheEnd(pFile, *fileName); tscDebug("[monitor] monitorSendSlowLogAtBeginning delete file:%s", *fileName); }else{ - char* data = readFile(pFile, &offset, size); - if(data != NULL){ - sendSlowLog(clusterId, data, pFile, offset, SLOW_LOG_READ_BEGINNIG, *fileName, pTransporter, epSet); - *fileName = NULL; - } - tscDebug("[monitor] monitorSendSlowLogAtBeginning send slow log file:%p, data:%s", pFile, data); + int32_t code = monitorReadSend(clusterId, pFile, &offset, size, SLOW_LOG_READ_BEGINNIG, *fileName); + tscDebug("[monitor] monitorSendSlowLogAtBeginning send slow log clusterId:%"PRId64",ret:%d", clusterId, code); + *fileName = NULL; } } @@ -500,17 +513,8 @@ static void monitorSendSlowLogAtRunning(int64_t clusterId){ tscDebug("[monitor] monitorSendSlowLogAtRunning truncate file to 0 file:%p", pClient->pFile); pClient->offset = 0; }else{ - SAppInstInfo* pInst = getAppInstByClusterId(clusterId); - if(pInst == NULL){ - tscError("failed to get app instance by clusterId:%" PRId64, clusterId); - return; - } - SEpSet ep = getEpSet_s(&pInst->mgmtEp); - char* data = readFile(pClient->pFile, &pClient->offset, size); - if(data != NULL){ - sendSlowLog(clusterId, data, pClient->pFile, pClient->offset, SLOW_LOG_READ_RUNNING, NULL, pInst->pTransporter, &ep); - } - tscDebug("[monitor] monitorSendSlowLogAtRunning send slow log:%s", data); + int32_t code = monitorReadSend(clusterId, pClient->pFile, &pClient->offset, size, SLOW_LOG_READ_RUNNING, NULL); + tscDebug("[monitor] monitorSendSlowLogAtRunning send slow log clusterId:%"PRId64",ret:%d", clusterId, code); } } @@ -532,16 +536,8 @@ static bool monitorSendSlowLogAtQuit(int64_t clusterId) { return true; } }else{ - SAppInstInfo* pInst = getAppInstByClusterId(clusterId); - if(pInst == NULL) { - return true; - } - SEpSet ep = getEpSet_s(&pInst->mgmtEp); - char* data = readFile(pClient->pFile, &pClient->offset, size); - if(data != NULL){ - sendSlowLog(clusterId, data, pClient->pFile, pClient->offset, SLOW_LOG_READ_QUIT, NULL, pInst->pTransporter, &ep); - } - tscInfo("[monitor] monitorSendSlowLogAtQuit send slow log:%s", data); + int32_t code = monitorReadSend(clusterId, pClient->pFile, &pClient->offset, size, SLOW_LOG_READ_QUIT, NULL); + tscDebug("[monitor] monitorSendSlowLogAtQuit send slow log clusterId:%"PRId64",ret:%d", clusterId, code); } return false; } @@ -558,16 +554,11 @@ static void monitorSendAllSlowLogAtQuit(){ pClient->pFile = NULL; }else if(pClient->offset == 0){ int64_t* clusterId = (int64_t*)taosHashGetKey(pIter, NULL); - SAppInstInfo* pInst = getAppInstByClusterId(*clusterId); - if(pInst == NULL) { - continue; - } - SEpSet ep = getEpSet_s(&pInst->mgmtEp); - char* data = readFile(pClient->pFile, &pClient->offset, size); - if(data != NULL && sendSlowLog(*clusterId, data, NULL, pClient->offset, SLOW_LOG_READ_QUIT, NULL, pInst->pTransporter, &ep) == 0){ + int32_t code = monitorReadSend(*clusterId, pClient->pFile, &pClient->offset, size, SLOW_LOG_READ_QUIT, NULL); + tscDebug("[monitor] monitorSendAllSlowLogAtQuit send slow log clusterId:%"PRId64",ret:%d", *clusterId, code); + if (code == 0){ quitCnt ++; } - tscInfo("[monitor] monitorSendAllSlowLogAtQuit send slow log :%s", data); } } } @@ -613,12 +604,8 @@ static void monitorSendAllSlowLog(){ } continue; } - SEpSet ep = getEpSet_s(&pInst->mgmtEp); - char* data = readFile(pClient->pFile, &pClient->offset, size); - if(data != NULL){ - sendSlowLog(*clusterId, data, NULL, pClient->offset, SLOW_LOG_READ_RUNNING, NULL, pInst->pTransporter, &ep); - } - tscDebug("[monitor] monitorSendAllSlowLog send slow log :%s", data); + int32_t code = monitorReadSend(*clusterId, pClient->pFile, &pClient->offset, size, SLOW_LOG_READ_RUNNING, NULL); + tscDebug("[monitor] monitorSendAllSlowLog send slow log clusterId:%"PRId64",ret:%d", *clusterId, code); } } } @@ -631,7 +618,7 @@ static void monitorSendAllSlowLogFromTempDir(int64_t clusterId){ return; } char namePrefix[PATH_MAX] = {0}; - if (snprintf(namePrefix, sizeof(namePrefix), "%s%"PRIx64, TD_TMP_FILE_PREFIX, pInst->clusterId) < 0) { + if (snprintf(namePrefix, sizeof(namePrefix), "%s%"PRIx64, TD_TMP_FILE_PREFIX, clusterId) < 0) { tscError("failed to generate slow log file name prefix"); return; } @@ -656,7 +643,7 @@ static void monitorSendAllSlowLogFromTempDir(int64_t clusterId){ if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0 || strstr(name, namePrefix) == NULL) { - tscInfo("skip file:%s, for cluster id:%"PRIx64, name, pInst->clusterId); + tscInfo("skip file:%s, for cluster id:%"PRIx64, name, clusterId); continue; } @@ -672,9 +659,8 @@ static void monitorSendAllSlowLogFromTempDir(int64_t clusterId){ taosCloseFile(&pFile); continue; } - SEpSet ep = getEpSet_s(&pInst->mgmtEp); char *tmp = taosStrdup(filename); - monitorSendSlowLogAtBeginning(pInst->clusterId, &tmp, pFile, 0, pInst->pTransporter, &ep); + monitorSendSlowLogAtBeginning(clusterId, &tmp, pFile, 0); taosMemoryFree(tmp); } @@ -690,28 +676,6 @@ static void* monitorThreadFunc(void *param){ } #endif - char tmpPath[PATH_MAX] = {0}; - if (getSlowLogTmpDir(tmpPath, sizeof(tmpPath)) < 0){ - return NULL; - } - - if (taosMulModeMkDir(tmpPath, 0777, true) != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); - printf("failed to create dir:%s since %s", tmpPath, terrstr()); - return NULL; - } - - if (tsem2_init(&monitorSem, 0, 0) != 0) { - tscError("sem init error since %s", terrstr()); - return NULL; - } - - monitorQueue = taosOpenQueue(); - if(monitorQueue == NULL){ - tscError("open queue error since %s", terrstr()); - return NULL; - } - if (-1 != atomic_val_compare_exchange_32(&slowLogFlag, -1, 0)) { return NULL; } @@ -738,16 +702,12 @@ static void* monitorThreadFunc(void *param){ if (slowLogData != NULL) { if (slowLogData->type == SLOW_LOG_READ_BEGINNIG){ if(slowLogData->pFile != NULL){ - SAppInstInfo* pInst = getAppInstByClusterId(slowLogData->clusterId); - if(pInst != NULL) { - SEpSet ep = getEpSet_s(&pInst->mgmtEp); - monitorSendSlowLogAtBeginning(slowLogData->clusterId, &(slowLogData->fileName), slowLogData->pFile, slowLogData->offset, pInst->pTransporter, &ep); - } + monitorSendSlowLogAtBeginning(slowLogData->clusterId, &(slowLogData->fileName), slowLogData->pFile, slowLogData->offset); }else{ monitorSendAllSlowLogFromTempDir(slowLogData->clusterId); } } else if(slowLogData->type == SLOW_LOG_WRITE){ - monitorWriteSlowLog2File(slowLogData, tmpPath); + monitorWriteSlowLog2File(slowLogData, tmpSlowLogPath); } else if(slowLogData->type == SLOW_LOG_READ_RUNNING){ monitorSendSlowLogAtRunning(slowLogData->clusterId); } else if(slowLogData->type == SLOW_LOG_READ_QUIT){ @@ -799,27 +759,59 @@ static void tscMonitorStop() { } } -void monitorInit() { +int32_t monitorInit() { tscInfo("[monitor] tscMonitor init"); monitorCounterHash = (SHashObj*)taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_ENTRY_LOCK); if (monitorCounterHash == NULL) { tscError("failed to create monitorCounterHash"); + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; } taosHashSetFreeFp(monitorCounterHash, destroyMonitorClient); monitorSlowLogHash = (SHashObj*)taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_ENTRY_LOCK); if (monitorSlowLogHash == NULL) { tscError("failed to create monitorSlowLogHash"); + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; } taosHashSetFreeFp(monitorSlowLogHash, destroySlowLogClient); monitorTimer = taosTmrInit(0, 0, 0, "MONITOR"); if (monitorTimer == NULL) { tscError("failed to create monitor timer"); + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + if (getSlowLogTmpDir(tmpSlowLogPath, sizeof(tmpSlowLogPath)) < 0){ + terrno = TSDB_CODE_TSC_INTERNAL_ERROR; + return -1; + } + + if (taosMulModeMkDir(tmpSlowLogPath, 0777, true) != 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + tscError("failed to create dir:%s since %s", tmpSlowLogPath, terrstr()); + return -1; + } + + if (tsem2_init(&monitorSem, 0, 0) != 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + tscError("sem init error since %s", terrstr()); + return -1; + } + + monitorQueue = taosOpenQueue(); + if(monitorQueue == NULL){ + tscError("open queue error since %s", terrstr()); + return -1; } taosInitRWLatch(&monitorLock); - tscMonitortInit(); + if (tscMonitortInit() != 0){ + return -1; + } + return 0; } void monitorClose() { @@ -844,10 +836,7 @@ int32_t monitorPutData2MonitorQueue(MonitorSlowLogData data){ return -1; } *slowLogData = data; - tscDebug("[monitor] write slow log to queue, clusterId:%"PRIx64 " type:%d", slowLogData->clusterId, slowLogData->type); - while (atomic_load_32(&slowLogFlag) == -1) { - taosMsleep(5); - } + tscDebug("[monitor] write slow log to queue, clusterId:%"PRIx64 " type:%s, data:%s", slowLogData->clusterId, queueTypeStr[slowLogData->type], slowLogData->data); if (taosWriteQitem(monitorQueue, slowLogData) == 0){ tsem2_post(&monitorSem); }else{ diff --git a/source/client/src/clientMsgHandler.c b/source/client/src/clientMsgHandler.c index e5baa7137e..d587deffc5 100644 --- a/source/client/src/clientMsgHandler.c +++ b/source/client/src/clientMsgHandler.c @@ -154,13 +154,14 @@ int32_t processConnectRsp(void* param, SDataBuf* pMsg, int32_t code) { if(taosHashGet(appInfo.pInstMapByClusterId, &connectRsp.clusterId, LONG_BYTES) == NULL){ if(taosHashPut(appInfo.pInstMapByClusterId, &connectRsp.clusterId, LONG_BYTES, &pTscObj->pAppInfo, POINTER_BYTES) != 0){ tscError("failed to put appInfo into appInfo.pInstMapByClusterId"); + }else{ + MonitorSlowLogData data = {0}; + data.clusterId = pTscObj->pAppInfo->clusterId; + data.type = SLOW_LOG_READ_BEGINNIG; + monitorPutData2MonitorQueue(data); + monitorClientSlowQueryInit(connectRsp.clusterId); + monitorClientSQLReqInit(connectRsp.clusterId); } - MonitorSlowLogData data = {0}; - data.clusterId = pTscObj->pAppInfo->clusterId; - data.type = SLOW_LOG_READ_BEGINNIG; - monitorPutData2MonitorQueue(data); - monitorClientSlowQueryInit(connectRsp.clusterId); - monitorClientSQLReqInit(connectRsp.clusterId); } taosThreadMutexLock(&clientHbMgr.lock); diff --git a/source/client/src/clientStmt.c b/source/client/src/clientStmt.c index e8b76d34c2..17b52521b8 100644 --- a/source/client/src/clientStmt.c +++ b/source/client/src/clientStmt.c @@ -72,6 +72,7 @@ static int32_t stmtCreateRequest(STscStmt* pStmt) { } if (TSDB_CODE_SUCCESS == code) { pStmt->exec.pRequest->syncQuery = true; + pStmt->exec.pRequest->isStmtBind = true; } } @@ -830,6 +831,7 @@ TAOS_STMT* stmtInit(STscObj* taos, int64_t reqid, TAOS_STMT_OPTIONS* pOptions) { pStmt->bInfo.needParse = true; pStmt->sql.status = STMT_INIT; pStmt->reqid = reqid; + pStmt->errCode = TSDB_CODE_SUCCESS; if (NULL != pOptions) { memcpy(&pStmt->options, pOptions, sizeof(pStmt->options)); @@ -882,6 +884,10 @@ int stmtPrepare(TAOS_STMT* stmt, const char* sql, unsigned long length) { STMT_DLOG_E("start to prepare"); + if (pStmt->errCode != TSDB_CODE_SUCCESS) { + return pStmt->errCode; + } + if (pStmt->sql.status >= STMT_PREPARE) { STMT_ERR_RET(stmtResetStmt(pStmt)); } @@ -953,6 +959,10 @@ int stmtSetTbName(TAOS_STMT* stmt, const char* tbName) { STMT_DLOG("start to set tbName: %s", tbName); + if (pStmt->errCode != TSDB_CODE_SUCCESS) { + return pStmt->errCode; + } + STMT_ERR_RET(stmtSwitchStatus(pStmt, STMT_SETTBNAME)); int32_t insert = 0; @@ -999,8 +1009,18 @@ int stmtSetTbTags(TAOS_STMT* stmt, TAOS_MULTI_BIND* tags) { STMT_DLOG_E("start to set tbTags"); + if (pStmt->errCode != TSDB_CODE_SUCCESS) { + return pStmt->errCode; + } + STMT_ERR_RET(stmtSwitchStatus(pStmt, STMT_SETTAGS)); + SBoundColInfo *tags_info = (SBoundColInfo*)pStmt->bInfo.boundTags; + if (tags_info->numOfBound <= 0 || tags_info->numOfCols <= 0) { + tscWarn("no tags bound in sql, will not bound tags"); + return TSDB_CODE_SUCCESS; + } + if (pStmt->bInfo.inExecCache) { return TSDB_CODE_SUCCESS; } @@ -1021,6 +1041,10 @@ int stmtSetTbTags(TAOS_STMT* stmt, TAOS_MULTI_BIND* tags) { } int stmtFetchTagFields(STscStmt* pStmt, int32_t* fieldNum, TAOS_FIELD_E** fields) { + if (pStmt->errCode != TSDB_CODE_SUCCESS) { + return pStmt->errCode; + } + if (STMT_TYPE_QUERY == pStmt->sql.type) { tscError("invalid operation to get query tag fileds"); STMT_ERR_RET(TSDB_CODE_TSC_STMT_API_ERROR); @@ -1039,6 +1063,10 @@ int stmtFetchTagFields(STscStmt* pStmt, int32_t* fieldNum, TAOS_FIELD_E** fields } int stmtFetchColFields(STscStmt* pStmt, int32_t* fieldNum, TAOS_FIELD_E** fields) { + if (pStmt->errCode != TSDB_CODE_SUCCESS) { + return pStmt->errCode; + } + if (STMT_TYPE_QUERY == pStmt->sql.type) { tscError("invalid operation to get query column fileds"); STMT_ERR_RET(TSDB_CODE_TSC_STMT_API_ERROR); @@ -1150,8 +1178,13 @@ int stmtBindBatch(TAOS_STMT* stmt, TAOS_MULTI_BIND* bind, int32_t colIdx) { STMT_DLOG("start to bind stmt data, colIdx: %d", colIdx); + if (pStmt->errCode != TSDB_CODE_SUCCESS) { + return pStmt->errCode; + } + STMT_ERR_RET(stmtSwitchStatus(pStmt, STMT_BIND)); + if (pStmt->bInfo.needParse && pStmt->sql.runTimes && pStmt->sql.type > 0 && STMT_TYPE_MULTI_INSERT != pStmt->sql.type) { pStmt->bInfo.needParse = false; @@ -1307,6 +1340,10 @@ int stmtAddBatch(TAOS_STMT* stmt) { STMT_DLOG_E("start to add batch"); + if (pStmt->errCode != TSDB_CODE_SUCCESS) { + return pStmt->errCode; + } + STMT_ERR_RET(stmtSwitchStatus(pStmt, STMT_ADD_BATCH)); if (pStmt->sql.stbInterlaceMode) { @@ -1471,6 +1508,10 @@ int stmtExec(TAOS_STMT* stmt) { STMT_DLOG_E("start to exec"); + if (pStmt->errCode != TSDB_CODE_SUCCESS) { + return pStmt->errCode; + } + STMT_ERR_RET(stmtSwitchStatus(pStmt, STMT_EXECUTE)); if (STMT_TYPE_QUERY == pStmt->sql.type) { @@ -1599,6 +1640,10 @@ int stmtGetTagFields(TAOS_STMT* stmt, int* nums, TAOS_FIELD_E** fields) { STMT_DLOG_E("start to get tag fields"); + if (pStmt->errCode != TSDB_CODE_SUCCESS) { + return pStmt->errCode; + } + if (STMT_TYPE_QUERY == pStmt->sql.type) { STMT_ERRI_JRET(TSDB_CODE_TSC_STMT_API_ERROR); } @@ -1637,6 +1682,10 @@ int stmtGetColFields(TAOS_STMT* stmt, int* nums, TAOS_FIELD_E** fields) { STMT_DLOG_E("start to get col fields"); + if (pStmt->errCode != TSDB_CODE_SUCCESS) { + return pStmt->errCode; + } + if (STMT_TYPE_QUERY == pStmt->sql.type) { STMT_ERRI_JRET(TSDB_CODE_TSC_STMT_API_ERROR); } @@ -1674,6 +1723,10 @@ int stmtGetParamNum(TAOS_STMT* stmt, int* nums) { STMT_DLOG_E("start to get param num"); + if (pStmt->errCode != TSDB_CODE_SUCCESS) { + return pStmt->errCode; + } + STMT_ERR_RET(stmtSwitchStatus(pStmt, STMT_FETCH_FIELDS)); if (pStmt->bInfo.needParse && pStmt->sql.runTimes && pStmt->sql.type > 0 && @@ -1706,6 +1759,10 @@ int stmtGetParam(TAOS_STMT* stmt, int idx, int* type, int* bytes) { STMT_DLOG_E("start to get param"); + if (pStmt->errCode != TSDB_CODE_SUCCESS) { + return pStmt->errCode; + } + if (STMT_TYPE_QUERY == pStmt->sql.type) { STMT_RET(TSDB_CODE_TSC_STMT_API_ERROR); } diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 1c5d085aaf..3c6ef00bf4 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -27,6 +27,7 @@ #define EMPTY_BLOCK_POLL_IDLE_DURATION 10 #define DEFAULT_AUTO_COMMIT_INTERVAL 5000 #define DEFAULT_HEARTBEAT_INTERVAL 3000 +#define DEFAULT_ASKEP_INTERVAL 1000 struct SMqMgmt { tmr_h timer; @@ -736,35 +737,33 @@ static void generateTimedTask(int64_t refId, int32_t type) { if (tmq == NULL) return; int8_t* pTaskType = taosAllocateQitem(sizeof(int8_t), DEF_QITEM, 0); - if (pTaskType == NULL) return; + if (pTaskType != NULL){ + *pTaskType = type; + if (taosWriteQitem(tmq->delayedTask, pTaskType) == 0){ + tsem2_post(&tmq->rspSem); + } + } - *pTaskType = type; - taosWriteQitem(tmq->delayedTask, pTaskType); - tsem2_post(&tmq->rspSem); taosReleaseRef(tmqMgmt.rsetId, refId); } void tmqAssignAskEpTask(void* param, void* tmrId) { - int64_t refId = *(int64_t*)param; + int64_t refId = (int64_t)param; generateTimedTask(refId, TMQ_DELAYED_TASK__ASK_EP); - taosMemoryFree(param); } void tmqReplayTask(void* param, void* tmrId) { - int64_t refId = *(int64_t*)param; + int64_t refId = (int64_t)param; tmq_t* tmq = taosAcquireRef(tmqMgmt.rsetId, refId); - if (tmq == NULL) goto END; + if (tmq == NULL) return; tsem2_post(&tmq->rspSem); taosReleaseRef(tmqMgmt.rsetId, refId); -END: - taosMemoryFree(param); } void tmqAssignDelayedCommitTask(void* param, void* tmrId) { - int64_t refId = *(int64_t*)param; + int64_t refId = (int64_t)param; generateTimedTask(refId, TMQ_DELAYED_TASK__COMMIT); - taosMemoryFree(param); } int32_t tmqHbCb(void* param, SDataBuf* pMsg, int32_t code) { @@ -801,11 +800,10 @@ int32_t tmqHbCb(void* param, SDataBuf* pMsg, int32_t code) { } void tmqSendHbReq(void* param, void* tmrId) { - int64_t refId = *(int64_t*)param; + int64_t refId = (int64_t)param; tmq_t* tmq = taosAcquireRef(tmqMgmt.rsetId, refId); if (tmq == NULL) { - taosMemoryFree(param); return; } @@ -879,7 +877,9 @@ void tmqSendHbReq(void* param, void* tmrId) { OVER: tDestroySMqHbReq(&req); - taosTmrReset(tmqSendHbReq, DEFAULT_HEARTBEAT_INTERVAL, param, tmqMgmt.timer, &tmq->hbLiveTimer); + if(tmrId != NULL){ + taosTmrReset(tmqSendHbReq, DEFAULT_HEARTBEAT_INTERVAL, param, tmqMgmt.timer, &tmq->hbLiveTimer); + } taosReleaseRef(tmqMgmt.rsetId, refId); } @@ -907,21 +907,14 @@ int32_t tmqHandleAllDelayedTask(tmq_t* pTmq) { if (*pTaskType == TMQ_DELAYED_TASK__ASK_EP) { askEp(pTmq, NULL, false, false); - int64_t* pRefId = taosMemoryMalloc(sizeof(int64_t)); - *pRefId = pTmq->refId; - tscDebug("consumer:0x%" PRIx64 " retrieve ep from mnode in 1s", pTmq->consumerId); - taosTmrReset(tmqAssignAskEpTask, 1000, pRefId, tmqMgmt.timer, &pTmq->epTimer); + taosTmrReset(tmqAssignAskEpTask, DEFAULT_ASKEP_INTERVAL, (void*)(pTmq->refId), tmqMgmt.timer, &pTmq->epTimer); } else if (*pTaskType == TMQ_DELAYED_TASK__COMMIT) { tmq_commit_cb* pCallbackFn = pTmq->commitCb ? pTmq->commitCb : defaultCommitCbFn; - asyncCommitAllOffsets(pTmq, pCallbackFn, pTmq->commitCbUserParam); - int64_t* pRefId = taosMemoryMalloc(sizeof(int64_t)); - *pRefId = pTmq->refId; - tscDebug("consumer:0x%" PRIx64 " next commit to vnode(s) in %.2fs", pTmq->consumerId, pTmq->autoCommitInterval / 1000.0); - taosTmrReset(tmqAssignDelayedCommitTask, pTmq->autoCommitInterval, pRefId, tmqMgmt.timer, &pTmq->commitTimer); + taosTmrReset(tmqAssignDelayedCommitTask, pTmq->autoCommitInterval, (void*)(pTmq->refId), tmqMgmt.timer, &pTmq->commitTimer); } else { tscError("consumer:0x%" PRIx64 " invalid task type:%d", pTmq->consumerId, *pTaskType); } @@ -1192,9 +1185,7 @@ tmq_t* tmq_consumer_new(tmq_conf_t* conf, char* errstr, int32_t errstrLen) { goto _failed; } - int64_t* pRefId = taosMemoryMalloc(sizeof(int64_t)); - *pRefId = pTmq->refId; - pTmq->hbLiveTimer = taosTmrStart(tmqSendHbReq, DEFAULT_HEARTBEAT_INTERVAL, pRefId, tmqMgmt.timer); + pTmq->hbLiveTimer = taosTmrStart(tmqSendHbReq, DEFAULT_HEARTBEAT_INTERVAL, (void*)pTmq->refId, tmqMgmt.timer); char buf[TSDB_OFFSET_LEN] = {0}; STqOffsetVal offset = {.type = pTmq->resetOffsetCfg}; @@ -1322,18 +1313,9 @@ int32_t tmq_subscribe(tmq_t* tmq, const tmq_list_t* topic_list) { } // init ep timer - if (tmq->epTimer == NULL) { - int64_t* pRefId1 = taosMemoryMalloc(sizeof(int64_t)); - *pRefId1 = tmq->refId; - tmq->epTimer = taosTmrStart(tmqAssignAskEpTask, 1000, pRefId1, tmqMgmt.timer); - } - + tmq->epTimer = taosTmrStart(tmqAssignAskEpTask, DEFAULT_ASKEP_INTERVAL, (void*)(tmq->refId), tmqMgmt.timer); // init auto commit timer - if (tmq->autoCommit && tmq->commitTimer == NULL) { - int64_t* pRefId2 = taosMemoryMalloc(sizeof(int64_t)); - *pRefId2 = tmq->refId; - tmq->commitTimer = taosTmrStart(tmqAssignDelayedCommitTask, tmq->autoCommitInterval, pRefId2, tmqMgmt.timer); - } + tmq->commitTimer = taosTmrStart(tmqAssignDelayedCommitTask, tmq->autoCommitInterval, (void*)(tmq->refId), tmqMgmt.timer); FAIL: taosArrayDestroyP(req.topicNames, taosMemoryFree); @@ -2036,9 +2018,7 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout) { pVg->blockReceiveTs = taosGetTimestampMs(); pVg->blockSleepForReplay = pRsp->rsp.sleepTime; if (pVg->blockSleepForReplay > 0) { - int64_t* pRefId1 = taosMemoryMalloc(sizeof(int64_t)); - *pRefId1 = tmq->refId; - taosTmrStart(tmqReplayTask, pVg->blockSleepForReplay, pRefId1, tmqMgmt.timer); + taosTmrStart(tmqReplayTask, pVg->blockSleepForReplay, (void*)(tmq->refId), tmqMgmt.timer); } } tscDebug("consumer:0x%" PRIx64 " process poll rsp, vgId:%d, offset:%s, blocks:%d, rows:%" PRId64 @@ -2295,7 +2275,7 @@ int32_t tmq_consumer_close(tmq_t* tmq) { return code; } } - taosSsleep(2); // sleep 2s for hb to send offset and rows to server + tmqSendHbReq((void*)(tmq->refId), NULL); tmq_list_t* lst = tmq_list_new(); int32_t code = tmq_subscribe(tmq, lst); diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index 3372c7b1cc..84d4e7b7e7 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -111,7 +111,6 @@ uint16_t tsMonitorPort = 6043; int32_t tsMonitorMaxLogs = 100; bool tsMonitorComp = false; bool tsMonitorLogProtocol = false; -int32_t tsMonitorIntervalForBasic = 30; bool tsMonitorForceV2 = true; // audit @@ -712,7 +711,6 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddInt32(pCfg, "monitorMaxLogs", tsMonitorMaxLogs, 1, 1000000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "monitorComp", tsMonitorComp, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "monitorLogProtocol", tsMonitorLogProtocol, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1; - if (cfgAddInt32(pCfg, "monitorIntervalForBasic", tsMonitorIntervalForBasic, 1, 200000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "monitorForceV2", tsMonitorForceV2, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "audit", tsEnableAudit, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; @@ -1165,7 +1163,6 @@ static int32_t taosSetServerCfg(SConfig *pCfg) { tsMonitorComp = cfgGetItem(pCfg, "monitorComp")->bval; tsQueryRspPolicy = cfgGetItem(pCfg, "queryRspPolicy")->i32; tsMonitorLogProtocol = cfgGetItem(pCfg, "monitorLogProtocol")->bval; - tsMonitorIntervalForBasic = cfgGetItem(pCfg, "monitorIntervalForBasic")->i32; tsMonitorForceV2 = cfgGetItem(pCfg, "monitorForceV2")->i32; tsEnableAudit = cfgGetItem(pCfg, "audit")->bval; diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index 10719674f5..7e89753241 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -69,7 +69,7 @@ pReq->sql = NULL; \ } while (0) -static int32_t tSerializeSMonitorParas(SEncoder *encoder, const SMonitorParas* pMonitorParas) { +static int32_t tSerializeSMonitorParas(SEncoder *encoder, const SMonitorParas *pMonitorParas) { if (tEncodeI8(encoder, pMonitorParas->tsEnableMonitor) < 0) return -1; if (tEncodeI32(encoder, pMonitorParas->tsMonitorInterval) < 0) return -1; if (tEncodeI32(encoder, pMonitorParas->tsSlowLogScope) < 0) return -1; @@ -80,7 +80,7 @@ static int32_t tSerializeSMonitorParas(SEncoder *encoder, const SMonitorParas* p return 0; } -static int32_t tDeserializeSMonitorParas(SDecoder *decoder, SMonitorParas* pMonitorParas){ +static int32_t tDeserializeSMonitorParas(SDecoder *decoder, SMonitorParas *pMonitorParas) { if (tDecodeI8(decoder, (int8_t *)&pMonitorParas->tsEnableMonitor) < 0) return -1; if (tDecodeI32(decoder, &pMonitorParas->tsMonitorInterval) < 0) return -1; if (tDecodeI32(decoder, &pMonitorParas->tsSlowLogScope) < 0) return -1; @@ -1577,7 +1577,7 @@ int32_t tDeserializeSStatisReq(void *buf, int32_t bufLen, SStatisReq *pReq) { if (tDecodeCStrTo(&decoder, pReq->pCont) < 0) return -1; } if (!tDecodeIsEnd(&decoder)) { - if (tDecodeI8(&decoder, (int8_t*)&pReq->type) < 0) return -1; + if (tDecodeI8(&decoder, (int8_t *)&pReq->type) < 0) return -1; } tEndDecode(&decoder); tDecoderClear(&decoder); @@ -5737,65 +5737,74 @@ _exit: } int32_t tSerializeSAlterVnodeConfigReq(void *buf, int32_t bufLen, SAlterVnodeConfigReq *pReq) { + int32_t tlen; SEncoder encoder = {0}; + tEncoderInit(&encoder, buf, bufLen); - if (tStartEncode(&encoder) < 0) return -1; - if (tEncodeI32(&encoder, pReq->vgVersion) < 0) return -1; - if (tEncodeI32(&encoder, pReq->buffer) < 0) return -1; - if (tEncodeI32(&encoder, pReq->pageSize) < 0) return -1; - if (tEncodeI32(&encoder, pReq->pages) < 0) return -1; - if (tEncodeI32(&encoder, pReq->cacheLastSize) < 0) return -1; - if (tEncodeI32(&encoder, pReq->daysPerFile) < 0) return -1; - if (tEncodeI32(&encoder, pReq->daysToKeep0) < 0) return -1; - if (tEncodeI32(&encoder, pReq->daysToKeep1) < 0) return -1; - if (tEncodeI32(&encoder, pReq->daysToKeep2) < 0) return -1; - if (tEncodeI32(&encoder, pReq->walFsyncPeriod) < 0) return -1; - if (tEncodeI8(&encoder, pReq->walLevel) < 0) return -1; - if (tEncodeI8(&encoder, pReq->strict) < 0) return -1; - if (tEncodeI8(&encoder, pReq->cacheLast) < 0) return -1; + TAOS_CHECK_ERRNO(tStartEncode(&encoder)); + TAOS_CHECK_ERRNO(tEncodeI32(&encoder, pReq->vgVersion)); + TAOS_CHECK_ERRNO(tEncodeI32(&encoder, pReq->buffer)); + TAOS_CHECK_ERRNO(tEncodeI32(&encoder, pReq->pageSize)); + TAOS_CHECK_ERRNO(tEncodeI32(&encoder, pReq->pages)); + TAOS_CHECK_ERRNO(tEncodeI32(&encoder, pReq->cacheLastSize)); + TAOS_CHECK_ERRNO(tEncodeI32(&encoder, pReq->daysPerFile)); + TAOS_CHECK_ERRNO(tEncodeI32(&encoder, pReq->daysToKeep0)); + TAOS_CHECK_ERRNO(tEncodeI32(&encoder, pReq->daysToKeep1)); + TAOS_CHECK_ERRNO(tEncodeI32(&encoder, pReq->daysToKeep2)); + TAOS_CHECK_ERRNO(tEncodeI32(&encoder, pReq->walFsyncPeriod)); + TAOS_CHECK_ERRNO(tEncodeI8(&encoder, pReq->walLevel)); + TAOS_CHECK_ERRNO(tEncodeI8(&encoder, pReq->strict)); + TAOS_CHECK_ERRNO(tEncodeI8(&encoder, pReq->cacheLast)); for (int32_t i = 0; i < 7; ++i) { - if (tEncodeI64(&encoder, pReq->reserved[i]) < 0) return -1; + TAOS_CHECK_ERRNO(tEncodeI64(&encoder, pReq->reserved[i])); } // 1st modification - if (tEncodeI16(&encoder, pReq->sttTrigger) < 0) return -1; - if (tEncodeI32(&encoder, pReq->minRows) < 0) return -1; + TAOS_CHECK_ERRNO(tEncodeI16(&encoder, pReq->sttTrigger)); + TAOS_CHECK_ERRNO(tEncodeI32(&encoder, pReq->minRows)); // 2nd modification - if (tEncodeI32(&encoder, pReq->walRetentionPeriod) < 0) return -1; - if (tEncodeI32(&encoder, pReq->walRetentionSize) < 0) return -1; - if (tEncodeI32(&encoder, pReq->keepTimeOffset) < 0) return -1; + TAOS_CHECK_ERRNO(tEncodeI32(&encoder, pReq->walRetentionPeriod)); + TAOS_CHECK_ERRNO(tEncodeI32(&encoder, pReq->walRetentionSize)); + TAOS_CHECK_ERRNO(tEncodeI32(&encoder, pReq->keepTimeOffset)); - if (tEncodeI32(&encoder, pReq->s3KeepLocal) < 0) return -1; - if (tEncodeI8(&encoder, pReq->s3Compact) < 0) return -1; + TAOS_CHECK_ERRNO(tEncodeI32(&encoder, pReq->s3KeepLocal)); + TAOS_CHECK_ERRNO(tEncodeI8(&encoder, pReq->s3Compact)); tEndEncode(&encoder); - int32_t tlen = encoder.pos; +_exit: + if (terrno) { + uError("%s failed at line %d since %s", __func__, terrln, terrstr()); + tlen = -1; + } else { + tlen = encoder.pos; + } tEncoderClear(&encoder); return tlen; } int32_t tDeserializeSAlterVnodeConfigReq(void *buf, int32_t bufLen, SAlterVnodeConfigReq *pReq) { SDecoder decoder = {0}; + tDecoderInit(&decoder, buf, bufLen); - if (tStartDecode(&decoder) < 0) return -1; - if (tDecodeI32(&decoder, &pReq->vgVersion) < 0) return -1; - if (tDecodeI32(&decoder, &pReq->buffer) < 0) return -1; - if (tDecodeI32(&decoder, &pReq->pageSize) < 0) return -1; - if (tDecodeI32(&decoder, &pReq->pages) < 0) return -1; - if (tDecodeI32(&decoder, &pReq->cacheLastSize) < 0) return -1; - if (tDecodeI32(&decoder, &pReq->daysPerFile) < 0) return -1; - if (tDecodeI32(&decoder, &pReq->daysToKeep0) < 0) return -1; - if (tDecodeI32(&decoder, &pReq->daysToKeep1) < 0) return -1; - if (tDecodeI32(&decoder, &pReq->daysToKeep2) < 0) return -1; - if (tDecodeI32(&decoder, &pReq->walFsyncPeriod) < 0) return -1; - if (tDecodeI8(&decoder, &pReq->walLevel) < 0) return -1; - if (tDecodeI8(&decoder, &pReq->strict) < 0) return -1; - if (tDecodeI8(&decoder, &pReq->cacheLast) < 0) return -1; + TAOS_CHECK_ERRNO(tStartDecode(&decoder)); + TAOS_CHECK_ERRNO(tDecodeI32(&decoder, &pReq->vgVersion)); + TAOS_CHECK_ERRNO(tDecodeI32(&decoder, &pReq->buffer)); + TAOS_CHECK_ERRNO(tDecodeI32(&decoder, &pReq->pageSize)); + TAOS_CHECK_ERRNO(tDecodeI32(&decoder, &pReq->pages)); + TAOS_CHECK_ERRNO(tDecodeI32(&decoder, &pReq->cacheLastSize)); + TAOS_CHECK_ERRNO(tDecodeI32(&decoder, &pReq->daysPerFile)); + TAOS_CHECK_ERRNO(tDecodeI32(&decoder, &pReq->daysToKeep0)); + TAOS_CHECK_ERRNO(tDecodeI32(&decoder, &pReq->daysToKeep1)); + TAOS_CHECK_ERRNO(tDecodeI32(&decoder, &pReq->daysToKeep2)); + TAOS_CHECK_ERRNO(tDecodeI32(&decoder, &pReq->walFsyncPeriod)); + TAOS_CHECK_ERRNO(tDecodeI8(&decoder, &pReq->walLevel)); + TAOS_CHECK_ERRNO(tDecodeI8(&decoder, &pReq->strict)); + TAOS_CHECK_ERRNO(tDecodeI8(&decoder, &pReq->cacheLast)); for (int32_t i = 0; i < 7; ++i) { - if (tDecodeI64(&decoder, &pReq->reserved[i]) < 0) return -1; + TAOS_CHECK_ERRNO(tDecodeI64(&decoder, &pReq->reserved[i])); } // 1st modification @@ -5803,8 +5812,8 @@ int32_t tDeserializeSAlterVnodeConfigReq(void *buf, int32_t bufLen, SAlterVnodeC pReq->sttTrigger = -1; pReq->minRows = -1; } else { - if (tDecodeI16(&decoder, &pReq->sttTrigger) < 0) return -1; - if (tDecodeI32(&decoder, &pReq->minRows) < 0) return -1; + TAOS_CHECK_ERRNO(tDecodeI16(&decoder, &pReq->sttTrigger)); + TAOS_CHECK_ERRNO(tDecodeI32(&decoder, &pReq->minRows)); } // 2n modification @@ -5812,24 +5821,29 @@ int32_t tDeserializeSAlterVnodeConfigReq(void *buf, int32_t bufLen, SAlterVnodeC pReq->walRetentionPeriod = -1; pReq->walRetentionSize = -1; } else { - if (tDecodeI32(&decoder, &pReq->walRetentionPeriod) < 0) return -1; - if (tDecodeI32(&decoder, &pReq->walRetentionSize) < 0) return -1; + TAOS_CHECK_ERRNO(tDecodeI32(&decoder, &pReq->walRetentionPeriod)); + TAOS_CHECK_ERRNO(tDecodeI32(&decoder, &pReq->walRetentionSize)); } pReq->keepTimeOffset = TSDB_DEFAULT_KEEP_TIME_OFFSET; if (!tDecodeIsEnd(&decoder)) { - if (tDecodeI32(&decoder, &pReq->keepTimeOffset) < 0) return -1; + TAOS_CHECK_ERRNO(tDecodeI32(&decoder, &pReq->keepTimeOffset)); } pReq->s3KeepLocal = TSDB_DEFAULT_S3_KEEP_LOCAL; pReq->s3Compact = TSDB_DEFAULT_S3_COMPACT; if (!tDecodeIsEnd(&decoder)) { - if (tDecodeI32(&decoder, &pReq->s3KeepLocal) < 0) return -1; - if (tDecodeI8(&decoder, &pReq->s3Compact) < 0) return -1; + TAOS_CHECK_ERRNO(tDecodeI32(&decoder, &pReq->s3KeepLocal) < 0); + TAOS_CHECK_ERRNO(tDecodeI8(&decoder, &pReq->s3Compact) < 0); } tEndDecode(&decoder); + +_exit: tDecoderClear(&decoder); - return 0; + if (terrno) { + uError("%s failed at line %d since %s", __func__, terrln, terrstr()); + } + return terrno; } int32_t tSerializeSAlterVnodeReplicaReq(void *buf, int32_t bufLen, SAlterVnodeReplicaReq *pReq) { @@ -9296,7 +9310,7 @@ int32_t tDecodeSTqCheckInfo(SDecoder *pDecoder, STqCheckInfo *pInfo) { } void tDeleteSTqCheckInfo(STqCheckInfo *pInfo) { taosArrayDestroy(pInfo->colIdList); } -int32_t tEncodeSMqRebVgReq(SEncoder* pCoder, const SMqRebVgReq* pReq) { +int32_t tEncodeSMqRebVgReq(SEncoder *pCoder, const SMqRebVgReq *pReq) { if (tStartEncode(pCoder) < 0) return -1; if (tEncodeI64(pCoder, pReq->leftForVer) < 0) return -1; if (tEncodeI32(pCoder, pReq->vgId) < 0) return -1; @@ -9316,7 +9330,7 @@ int32_t tEncodeSMqRebVgReq(SEncoder* pCoder, const SMqRebVgReq* pReq) { return 0; } -int32_t tDecodeSMqRebVgReq(SDecoder* pCoder, SMqRebVgReq* pReq) { +int32_t tDecodeSMqRebVgReq(SDecoder *pCoder, SMqRebVgReq *pReq) { if (tStartDecode(pCoder) < 0) return -1; if (tDecodeI64(pCoder, &pReq->leftForVer) < 0) return -1; @@ -9341,7 +9355,6 @@ int32_t tDecodeSMqRebVgReq(SDecoder* pCoder, SMqRebVgReq* pReq) { return 0; } - int32_t tEncodeDeleteRes(SEncoder *pCoder, const SDeleteRes *pRes) { int32_t nUid = taosArrayGetSize(pRes->uidList); diff --git a/source/dnode/mgmt/mgmt_dnode/inc/dmInt.h b/source/dnode/mgmt/mgmt_dnode/inc/dmInt.h index 46f8dd06d4..be9ff56674 100644 --- a/source/dnode/mgmt/mgmt_dnode/inc/dmInt.h +++ b/source/dnode/mgmt/mgmt_dnode/inc/dmInt.h @@ -43,7 +43,6 @@ typedef struct SDnodeMgmt { GetMnodeLoadsFp getMnodeLoadsFp; GetQnodeLoadsFp getQnodeLoadsFp; int32_t statusSeq; - SendMonitorReportFp sendMonitorReportFpBasic; } SDnodeMgmt; // dmHandle.c diff --git a/source/dnode/mgmt/mgmt_dnode/src/dmInt.c b/source/dnode/mgmt/mgmt_dnode/src/dmInt.c index a651fbf060..b9dd45f1c0 100644 --- a/source/dnode/mgmt/mgmt_dnode/src/dmInt.c +++ b/source/dnode/mgmt/mgmt_dnode/src/dmInt.c @@ -65,7 +65,6 @@ static int32_t dmOpenMgmt(SMgmtInputOpt *pInput, SMgmtOutputOpt *pOutput) { pMgmt->processDropNodeFp = pInput->processDropNodeFp; pMgmt->sendMonitorReportFp = pInput->sendMonitorReportFp; pMgmt->sendAuditRecordsFp = pInput->sendAuditRecordFp; - pMgmt->sendMonitorReportFpBasic = pInput->sendMonitorReportFpBasic; pMgmt->getVnodeLoadsFp = pInput->getVnodeLoadsFp; pMgmt->getVnodeLoadsLiteFp = pInput->getVnodeLoadsLiteFp; pMgmt->getMnodeLoadsFp = pInput->getMnodeLoadsFp; diff --git a/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c b/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c index c48b614f96..eafa10aa32 100644 --- a/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c +++ b/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c @@ -175,15 +175,6 @@ static void *dmMonitorThreadFp(void *param) { taosMemoryTrim(0); } } - - if(tsMonitorForceV2){ - if (curTime < lastTimeForBasic) lastTimeForBasic = curTime; - float intervalForBasic = (curTime - lastTimeForBasic) / 1000.0f; - if (intervalForBasic >= tsMonitorIntervalForBasic) { - (*pMgmt->sendMonitorReportFpBasic)(); - lastTimeForBasic = curTime; - } - } } return NULL; diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c index 677e19d4c1..9b987b3237 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c @@ -233,6 +233,7 @@ SArray *mmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RESUME_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_STOP_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_UPDATE_CHKPT_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_CONSEN_CHKPT_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_CREATE, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_DROP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_CREATE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; @@ -242,7 +243,6 @@ SArray *mmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_RESET_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_HEARTBEAT, mmPutMsgToReadQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_CHKPT_REPORT, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_CHKPT_CONSEN, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_REQ_CHKPT, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_KILL_COMPACT_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/mgmt/mgmt_snode/src/smHandle.c b/source/dnode/mgmt/mgmt_snode/src/smHandle.c index 7a0189b7c1..5c2f54fd10 100644 --- a/source/dnode/mgmt/mgmt_snode/src/smHandle.c +++ b/source/dnode/mgmt/mgmt_snode/src/smHandle.c @@ -76,6 +76,7 @@ SArray *smGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_UPDATE, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DEPLOY, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_UPDATE_CHKPT, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_CONSEN_CHKPT, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DROP, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RUN, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DISPATCH, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; @@ -96,7 +97,6 @@ SArray *smGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_HEARTBEAT_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_REQ_CHKPT_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_CHKPT_REPORT_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_CHKPT_CONSEN_RSP, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; code = 0; _OVER: diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index 001696aecc..fbe1925e3f 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -972,10 +972,10 @@ SArray *vmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_HEARTBEAT_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_REQ_CHKPT_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_CHKPT_REPORT_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_CHKPT_CONSEN_RSP, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_GET_STREAM_PROGRESS, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_UPDATE_CHKPT, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_CONSEN_CHKPT, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_REPLICA, vmPutMsgToMgmtQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_CONFIG, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c index 45d1486912..8c1b33cb14 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c @@ -287,7 +287,8 @@ int32_t vmPutRpcMsgToQueue(SVnodeMgmt *pMgmt, EQueueType qtype, SRpcMsg *pRpc) { return -1; } - SRpcMsg *pMsg = taosAllocateQitem(sizeof(SRpcMsg), RPC_QITEM, pRpc->contLen); + EQItype itype = APPLY_QUEUE == qtype ? DEF_QITEM : RPC_QITEM; + SRpcMsg *pMsg = taosAllocateQitem(sizeof(SRpcMsg), itype, pRpc->contLen); if (pMsg == NULL) { rpcFreeCont(pRpc->pCont); pRpc->pCont = NULL; diff --git a/source/dnode/mgmt/node_mgmt/inc/dmMgmt.h b/source/dnode/mgmt/node_mgmt/inc/dmMgmt.h index 90e44e5acc..bc6a4652e7 100644 --- a/source/dnode/mgmt/node_mgmt/inc/dmMgmt.h +++ b/source/dnode/mgmt/node_mgmt/inc/dmMgmt.h @@ -128,7 +128,6 @@ int32_t dmProcessNodeMsg(SMgmtWrapper *pWrapper, SRpcMsg *pMsg); // dmMonitor.c void dmSendMonitorReport(); void dmSendAuditRecords(); -void dmSendMonitorReportBasic(); void dmGetVnodeLoads(SMonVloadInfo *pInfo); void dmGetVnodeLoadsLite(SMonVloadInfo *pInfo); void dmGetMnodeLoads(SMonMloadInfo *pInfo); diff --git a/source/dnode/mgmt/node_mgmt/src/dmEnv.c b/source/dnode/mgmt/node_mgmt/src/dmEnv.c index 54a118b666..4be1af30b5 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmEnv.c +++ b/source/dnode/mgmt/node_mgmt/src/dmEnv.c @@ -394,7 +394,6 @@ SMgmtInputOpt dmBuildMgmtInputOpt(SMgmtWrapper *pWrapper) { .processDropNodeFp = dmProcessDropNodeReq, .sendMonitorReportFp = dmSendMonitorReport, .sendAuditRecordFp = auditSendRecordsInBatch, - .sendMonitorReportFpBasic = dmSendMonitorReportBasic, .getVnodeLoadsFp = dmGetVnodeLoads, .getVnodeLoadsLiteFp = dmGetVnodeLoadsLite, .getMnodeLoadsFp = dmGetMnodeLoads, diff --git a/source/dnode/mgmt/node_mgmt/src/dmMonitor.c b/source/dnode/mgmt/node_mgmt/src/dmMonitor.c index 21e25f5535..d3197282b6 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmMonitor.c +++ b/source/dnode/mgmt/node_mgmt/src/dmMonitor.c @@ -123,16 +123,6 @@ void dmSendMonitorReport() { monGenAndSendReport(); } -void dmSendMonitorReportBasic() { - if (!tsEnableMonitor || tsMonitorFqdn[0] == 0 || tsMonitorPort == 0) return; - dTrace("send monitor report to %s:%u", tsMonitorFqdn, tsMonitorPort); - - SDnode *pDnode = dmInstance(); - dmGetDmMonitorInfoBasic(pDnode); - dmGetMmMonitorInfo(pDnode); - monGenAndSendReportBasic(); -} - //Todo: put this in seperate file in the future void dmSendAuditRecords() { auditSendRecordsInBatch(); diff --git a/source/dnode/mgmt/node_mgmt/src/dmTransport.c b/source/dnode/mgmt/node_mgmt/src/dmTransport.c index 74bf1f964c..99d641ff3f 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmTransport.c +++ b/source/dnode/mgmt/node_mgmt/src/dmTransport.c @@ -208,7 +208,9 @@ static void dmProcessRpcMsg(SDnode *pDnode, SRpcMsg *pRpc, SEpSet *pEpSet) { } pRpc->info.wrapper = pWrapper; - pMsg = taosAllocateQitem(sizeof(SRpcMsg), RPC_QITEM, pRpc->contLen); + + EQItype itype = IsReq(pRpc) ? RPC_QITEM : DEF_QITEM; // rsp msg is not restricted by tsRpcQueueMemoryUsed + pMsg = taosAllocateQitem(sizeof(SRpcMsg), itype, pRpc->contLen); if (pMsg == NULL) goto _OVER; memcpy(pMsg, pRpc, sizeof(SRpcMsg)); diff --git a/source/dnode/mgmt/node_util/inc/dmUtil.h b/source/dnode/mgmt/node_util/inc/dmUtil.h index aea3286d76..d316a82af2 100644 --- a/source/dnode/mgmt/node_util/inc/dmUtil.h +++ b/source/dnode/mgmt/node_util/inc/dmUtil.h @@ -155,7 +155,6 @@ typedef struct { ProcessDropNodeFp processDropNodeFp; SendMonitorReportFp sendMonitorReportFp; SendAuditRecordsFp sendAuditRecordFp; - SendMonitorReportFp sendMonitorReportFpBasic; GetVnodeLoadsFp getVnodeLoadsFp; GetVnodeLoadsFp getVnodeLoadsLiteFp; GetMnodeLoadsFp getMnodeLoadsFp; diff --git a/source/dnode/mnode/impl/inc/mndStream.h b/source/dnode/mnode/impl/inc/mndStream.h index b261f89057..0b6b6a9ef2 100644 --- a/source/dnode/mnode/impl/inc/mndStream.h +++ b/source/dnode/mnode/impl/inc/mndStream.h @@ -83,7 +83,7 @@ typedef struct SOrphanTask { typedef struct { SMsgHead head; -} SMStreamReqCheckpointRsp, SMStreamUpdateChkptRsp; +} SMStreamReqCheckpointRsp, SMStreamUpdateChkptRsp, SMStreamReqConsensChkptRsp; typedef struct STaskChkptInfo { int32_t nodeId; @@ -133,8 +133,8 @@ int32_t mndCreateStreamResetStatusTrans(SMnode *pMnode, SStreamObj *pStream) int32_t mndStreamSetUpdateChkptAction(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream); int32_t mndCreateStreamChkptInfoUpdateTrans(SMnode *pMnode, SStreamObj *pStream, SArray *pChkptInfoList); int32_t mndScanCheckpointReportInfo(SRpcMsg *pReq); -int32_t mndSendConsensusCheckpointIdRsp(SArray* pList, int64_t checkpointId); - +int32_t mndCreateSetConsensusChkptIdTrans(SMnode *pMnode, SStreamObj *pStream, int32_t taskId, int64_t checkpointId, + int64_t ts); void removeTasksInBuf(SArray *pTaskIds, SStreamExecInfo *pExecInfo); SStreamTaskIter *createStreamTaskIter(SStreamObj *pStream); @@ -146,14 +146,10 @@ void mndInitStreamExecInfo(SMnode *pMnode, SStreamExecInfo *pExecInf int32_t removeExpiredNodeEntryAndTaskInBuf(SArray *pNodeSnapshot); void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode); -SCheckpointConsensusInfo *mndGetConsensusInfo(SHashObj *pHash, int64_t streamId); -void mndAddConsensusTasks(SCheckpointConsensusInfo *pInfo, const SRestoreCheckpointInfo *pRestoreInfo, SRpcMsg *pMsg); -int64_t mndGetConsensusCheckpointId(SCheckpointConsensusInfo *pInfo, SStreamObj *pStream); -bool mndAllTaskSendCheckpointId(SCheckpointConsensusInfo *pInfo, int32_t numOfTasks, int32_t* pTotal); +SCheckpointConsensusInfo *mndGetConsensusInfo(SHashObj *pHash, int64_t streamId, int32_t numOfTasks); +void mndAddConsensusTasks(SCheckpointConsensusInfo *pInfo, const SRestoreCheckpointInfo *pRestoreInfo); void mndClearConsensusRspEntry(SCheckpointConsensusInfo *pInfo); -int32_t doSendConsensusCheckpointRsp(SRestoreCheckpointInfo *pInfo, SRpcMsg *pMsg, int64_t checkpointId); int64_t mndClearConsensusCheckpointId(SHashObj* pHash, int64_t streamId); -int32_t mndRegisterConsensusChkptId(SHashObj* pHash, int64_t streamId); #ifdef __cplusplus } diff --git a/source/dnode/mnode/impl/src/mndMain.c b/source/dnode/mnode/impl/src/mndMain.c index 398ea5d589..723e3701a1 100644 --- a/source/dnode/mnode/impl/src/mndMain.c +++ b/source/dnode/mnode/impl/src/mndMain.c @@ -177,6 +177,15 @@ static void mndStreamCheckNode(SMnode *pMnode) { } } +static void mndStreamConsensusChkpt(SMnode *pMnode) { + int32_t contLen = 0; + void *pReq = mndBuildTimerMsg(&contLen); + if (pReq != NULL) { + SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_CONSEN_TIMER, .pCont = pReq, .contLen = contLen}; + tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); + } +} + static void mndPullupTelem(SMnode *pMnode) { mTrace("pullup telem msg"); int32_t contLen = 0; @@ -308,7 +317,6 @@ static int32_t minCronTime() { min = TMIN(min, tsCompactPullupInterval); min = TMIN(min, tsMqRebalanceInterval); min = TMIN(min, tsStreamCheckpointInterval); - min = TMIN(min, 6); // checkpointRemain min = TMIN(min, tsStreamNodeCheckInterval); min = TMIN(min, tsArbHeartBeatIntervalSec); min = TMIN(min, tsArbCheckSyncIntervalSec); @@ -353,6 +361,10 @@ void mndDoTimerPullupTask(SMnode *pMnode, int64_t sec) { mndStreamCheckNode(pMnode); } + if (sec % 5 == 0) { + mndStreamConsensusChkpt(pMnode); + } + if (sec % tsTelemInterval == (TMIN(60, (tsTelemInterval - 1)))) { mndPullupTelem(pMnode); } diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index a137c10ed5..415d1ff9f0 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -59,7 +59,9 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg); static int32_t extractNodeListFromStream(SMnode *pMnode, SArray *pNodeList); static int32_t mndProcessStreamReqCheckpoint(SRpcMsg *pReq); static int32_t mndProcessCheckpointReport(SRpcMsg *pReq); -static int32_t mndProcessConsensusCheckpointId(SRpcMsg *pReq); +//static int32_t mndProcessConsensusCheckpointId(SRpcMsg *pMsg); +static int32_t mndProcessConsensusInTmr(SRpcMsg *pMsg); +static void doSendQuickRsp(SRpcHandleInfo *pInfo, int32_t msgSize, int32_t vgId, int32_t code); static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pPrevNodeList, const SArray *pNodeList); @@ -106,6 +108,7 @@ int32_t mndInitStream(SMnode *pMnode) { mndSetMsgHandle(pMnode, TDMT_VND_STREAM_TASK_UPDATE_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_VND_STREAM_TASK_RESET_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_STREAM_TASK_UPDATE_CHKPT_RSP, mndTransProcessRsp); + mndSetMsgHandle(pMnode, TDMT_STREAM_CONSEN_CHKPT_RSP, mndTransProcessRsp); // for msgs inside mnode // TODO change the name @@ -118,11 +121,11 @@ int32_t mndInitStream(SMnode *pMnode) { mndSetMsgHandle(pMnode, TDMT_MND_STREAM_BEGIN_CHECKPOINT, mndProcessStreamCheckpoint); mndSetMsgHandle(pMnode, TDMT_MND_STREAM_REQ_CHKPT, mndProcessStreamReqCheckpoint); mndSetMsgHandle(pMnode, TDMT_MND_STREAM_CHKPT_REPORT, mndProcessCheckpointReport); - mndSetMsgHandle(pMnode, TDMT_MND_STREAM_CHKPT_CONSEN, mndProcessConsensusCheckpointId); mndSetMsgHandle(pMnode, TDMT_MND_STREAM_UPDATE_CHKPT_EVT, mndScanCheckpointReportInfo); mndSetMsgHandle(pMnode, TDMT_STREAM_TASK_REPORT_CHECKPOINT, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_MND_STREAM_HEARTBEAT, mndProcessStreamHb); mndSetMsgHandle(pMnode, TDMT_MND_STREAM_NODECHANGE_CHECK, mndProcessNodeCheckReq); + mndSetMsgHandle(pMnode, TDMT_MND_STREAM_CONSEN_TIMER, mndProcessConsensusInTmr); mndSetMsgHandle(pMnode, TDMT_MND_PAUSE_STREAM, mndProcessPauseStreamReq); mndSetMsgHandle(pMnode, TDMT_MND_RESUME_STREAM, mndProcessResumeStreamReq); @@ -803,7 +806,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { taosThreadMutexLock(&execInfo.lock); mDebug("stream stream:%s start to register tasks into task nodeList and set initial checkpointId", createReq.name); saveTaskAndNodeInfoIntoBuf(&streamObj, &execInfo); - mndRegisterConsensusChkptId(execInfo.pStreamConsensus, streamObj.uid); +// mndRegisterConsensusChkptId(execInfo.pStreamConsensus, streamObj.uid); taosThreadMutexUnlock(&execInfo.lock); // execute creation @@ -1134,6 +1137,7 @@ static int32_t mndCheckTaskAndNodeStatus(SMnode *pMnode) { mDebug("s-task:0x%" PRIx64 "-0x%x (nodeId:%d) status:%s, checkpoint not issued", pEntry->id.streamId, (int32_t)pEntry->id.taskId, pEntry->nodeId, streamTaskGetStatusStr(pEntry->status)); ready = false; + break; } if (pEntry->hTaskId != 0) { @@ -1153,6 +1157,27 @@ static int32_t mndCheckTaskAndNodeStatus(SMnode *pMnode) { return ready ? 0 : -1; } +int64_t getStreamTaskLastReadyState(SArray *pTaskList, int64_t streamId) { + int64_t ts = -1; + int32_t taskId = -1; + + for (int32_t i = 0; i < taosArrayGetSize(pTaskList); ++i) { + STaskId *p = taosArrayGet(pTaskList, i); + STaskStatusEntry *pEntry = taosHashGet(execInfo.pTaskMap, p, sizeof(*p)); + if (pEntry == NULL || pEntry->id.streamId != streamId) { + continue; + } + + if (pEntry->status == TASK_STATUS__READY && ts < pEntry->startTime) { + ts = pEntry->startTime; + taskId = pEntry->id.taskId; + } + } + + mDebug("stream:0x%" PRIx64 " last ready ts:%" PRId64 " s-task:0x%x", streamId, ts, taskId); + return ts; +} + typedef struct { int64_t streamId; int64_t duration; @@ -1191,6 +1216,15 @@ static int32_t mndProcessStreamCheckpoint(SRpcMsg *pReq) { continue; } + taosThreadMutexLock(&execInfo.lock); + int64_t startTs = getStreamTaskLastReadyState(execInfo.pTaskList, pStream->uid); + if (startTs != -1 && (now - startTs) < tsStreamCheckpointInterval * 1000) { + taosThreadMutexUnlock(&execInfo.lock); + sdbRelease(pSdb, pStream); + continue; + } + taosThreadMutexUnlock(&execInfo.lock); + SCheckpointInterval in = {.streamId = pStream->uid, .duration = duration}; taosArrayPush(pList, &in); @@ -1237,9 +1271,6 @@ static int32_t mndProcessStreamCheckpoint(SRpcMsg *pReq) { code = mndProcessStreamCheckpointTrans(pMnode, p, checkpointId, 1, true); sdbRelease(pSdb, p); - // clear the consensus checkpoint info - mndClearConsensusCheckpointId(execInfo.pStreamConsensus, p->uid); - if (code != -1) { started += 1; @@ -2309,7 +2340,7 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { taosThreadMutexUnlock(&execInfo.lock); if (numOfNodes == 0) { - mDebug("end to do stream task node change checking, no vgroup exists, do nothing"); + mDebug("end to do stream task(s) node change checking, no stream tasks exist, do nothing"); execInfo.ts = ts; atomic_store_32(&mndNodeCheckSentinel, 0); return 0; @@ -2612,113 +2643,238 @@ int32_t mndProcessCheckpointReport(SRpcMsg *pReq) { taosThreadMutexUnlock(&execInfo.lock); - { - SRpcMsg rsp = {.code = 0, .info = pReq->info, .contLen = sizeof(SMStreamUpdateChkptRsp)}; - rsp.pCont = rpcMallocCont(rsp.contLen); - SMsgHead *pHead = rsp.pCont; - pHead->vgId = htonl(req.nodeId); - - tmsgSendRsp(&rsp); - pReq->info.handle = NULL; // disable auto rsp - } - + doSendQuickRsp(&pReq->info, sizeof(SMStreamUpdateChkptRsp), req.nodeId, TSDB_CODE_SUCCESS); return 0; } -static int32_t mndProcessConsensusCheckpointId(SRpcMsg *pReq) { - SMnode *pMnode = pReq->info.node; - SDecoder decoder = {0}; +static int64_t getConsensusId(int64_t streamId, int32_t numOfTasks, int32_t* pExistedTasks, bool *pAllSame) { + int32_t num = 0; + int64_t chkId = INT64_MAX; + *pExistedTasks = 0; + *pAllSame = true; - SRestoreCheckpointInfo req = {0}; - tDecoderInit(&decoder, pReq->pCont, pReq->contLen); + for(int32_t i = 0; i < taosArrayGetSize(execInfo.pTaskList); ++i) { + STaskId* p = taosArrayGet(execInfo.pTaskList, i); + if (p->streamId != streamId) { + continue; + } - if (tDecodeRestoreCheckpointInfo(&decoder, &req)) { - tDecoderClear(&decoder); - terrno = TSDB_CODE_INVALID_MSG; - mError("invalid task consensus-checkpoint msg received"); + num += 1; + STaskStatusEntry* pe = taosHashGet(execInfo.pTaskMap, p, sizeof(*p)); + if (chkId > pe->checkpointInfo.latestId) { + if (chkId != INT64_MAX) { + *pAllSame = false; + } + chkId = pe->checkpointInfo.latestId; + } + } + + *pExistedTasks = num; + if (num < numOfTasks) { // not all task send info to mnode through hbMsg, no valid checkpoint Id return -1; } - tDecoderClear(&decoder); - mDebug("receive stream task consensus-checkpoint msg, vgId:%d, s-task:0x%" PRIx64 "-0x%x, checkpointId:%" PRId64, - req.nodeId, req.streamId, req.taskId, req.checkpointId); + return chkId; +} + +static void doSendQuickRsp(SRpcHandleInfo *pInfo, int32_t msgSize, int32_t vgId, int32_t code) { + SRpcMsg rsp = {.code = code, .info = *pInfo, .contLen = msgSize}; + rsp.pCont = rpcMallocCont(rsp.contLen); + SMsgHead *pHead = rsp.pCont; + pHead->vgId = htonl(vgId); + + tmsgSendRsp(&rsp); + pInfo->handle = NULL; // disable auto rsp +} + +//static int32_t mndProcessConsensusCheckpointId(SRpcMsg *pMsg) { +// SMnode *pMnode = pMsg->info.node; +// SDecoder decoder = {0}; +// +// SRestoreCheckpointInfo req = {0}; +// tDecoderInit(&decoder, pMsg->pCont, pMsg->contLen); +// +// if (tDecodeRestoreCheckpointInfo(&decoder, &req)) { +// tDecoderClear(&decoder); +// terrno = TSDB_CODE_INVALID_MSG; +// mError("invalid task consensus-checkpoint msg received"); +// return -1; +// } +// tDecoderClear(&decoder); +// +// mDebug("receive stream task consensus-checkpoint msg, vgId:%d, s-task:0x%" PRIx64 "-0x%x, checkpointId:%" PRId64, +// req.nodeId, req.streamId, req.taskId, req.checkpointId); +// +// // register to the stream task done map, if all tasks has sent this kinds of message, start the checkpoint trans. +// taosThreadMutexLock(&execInfo.lock); +// +// // mnode handle the create stream transaction too slow may cause this problem +// SStreamObj *pStream = mndGetStreamObj(pMnode, req.streamId); +// if (pStream == NULL) { +// mWarn("failed to find the stream:0x%" PRIx64 ", not handle consensus-checkpointId", req.streamId); +// +// // not in meta-store yet, try to acquire the task in exec buffer +// // the checkpoint req arrives too soon before the completion of the create stream trans. +// STaskId id = {.streamId = req.streamId, .taskId = req.taskId}; +// void *p = taosHashGet(execInfo.pTaskMap, &id, sizeof(id)); +// if (p == NULL) { +// mError("failed to find the stream:0x%" PRIx64 " in buf, not handle consensus-checkpointId", req.streamId); +// terrno = TSDB_CODE_MND_STREAM_NOT_EXIST; +// taosThreadMutexUnlock(&execInfo.lock); +// +// doSendQuickRsp(&pMsg->info, sizeof(SMStreamReqConsensChkptRsp), req.nodeId, terrno); +// return -1; +// } else { +// mDebug("s-task:0x%" PRIx64 "-0x%x in buf not in mnode/meta, create stream trans may not complete yet", +// req.streamId, req.taskId); +// // todo wait for stream is created +// } +// } +// +// mInfo("vgId:%d stream:0x%" PRIx64 " %s meta-stored checkpointId:%" PRId64, req.nodeId, req.streamId, pStream->name, +// pStream->checkpointId); +// +// int32_t numOfTasks = (pStream == NULL) ? 0 : mndGetNumOfStreamTasks(pStream); +// if ((pStream != NULL) && (pStream->checkpointId == 0)) { // not generated checkpoint yet, return 0 directly +// taosThreadMutexUnlock(&execInfo.lock); +// mndCreateSetConsensusChkptIdTrans(pMnode, pStream, req.taskId, 0, req.startTs); +// +// doSendQuickRsp(&pMsg->info, sizeof(SMStreamReqConsensChkptRsp), req.nodeId, terrno); +// return TSDB_CODE_SUCCESS; +// } +// +// int32_t num = 0; +// int64_t chkId = getConsensusId(req.streamId, numOfTasks, &num); +// +// // some tasks not send hbMsg to mnode yet, wait for 5s. +// if (chkId == -1) { +// mDebug("not all(%d/%d) task(s) send hbMsg yet, wait for a while and check again, s-task:0x%x", req.taskId, num, +// numOfTasks); +// SCheckpointConsensusInfo *pInfo = mndGetConsensusInfo(execInfo.pStreamConsensus, req.streamId, numOfTasks); +// mndAddConsensusTasks(pInfo, &req); +// +// taosThreadMutexUnlock(&execInfo.lock); +// doSendQuickRsp(&pMsg->info, sizeof(SMStreamReqConsensChkptRsp), req.nodeId, terrno); +// return 0; +// } +// +// if (chkId == req.checkpointId) { +// mDebug("vgId:%d stream:0x%" PRIx64 " %s consensus-checkpointId is:%" PRId64 ", meta-stored checkpointId:%" PRId64, +// req.nodeId, req.streamId, pStream->name, chkId, pStream->checkpointId); +// mndCreateSetConsensusChkptIdTrans(pMnode, pStream, req.taskId, chkId, req.startTs); +// +// taosThreadMutexUnlock(&execInfo.lock); +// doSendQuickRsp(&pMsg->info, sizeof(SMStreamReqConsensChkptRsp), req.nodeId, terrno); +// return 0; +// } +// +// // wait for 5s and check again +// SCheckpointConsensusInfo *pInfo = mndGetConsensusInfo(execInfo.pStreamConsensus, req.streamId, numOfTasks); +// mndAddConsensusTasks(pInfo, &req); +// +// if (pStream != NULL) { +// mndReleaseStream(pMnode, pStream); +// } +// +// taosThreadMutexUnlock(&execInfo.lock); +// doSendQuickRsp(&pMsg->info, sizeof(SMStreamReqConsensChkptRsp), req.nodeId, terrno); +// return 0; +//} + +int32_t mndProcessConsensusInTmr(SRpcMsg *pMsg) { + SMnode *pMnode = pMsg->info.node; + int64_t now = taosGetTimestampMs(); + SArray *pStreamList = taosArrayInit(4, sizeof(int64_t)); + + mDebug("start to process consensus-checkpointId in tmr"); + + bool allReady = true; + SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode, &allReady); + taosArrayDestroy(pNodeSnapshot); + if (!allReady) { + mWarn("not all vnodes are ready, end to process the consensus-checkpointId in tmr process"); + taosArrayDestroy(pStreamList); + return 0; + } - // register to the stream task done map, if all tasks has sent this kinds of message, start the checkpoint trans. taosThreadMutexLock(&execInfo.lock); - SStreamObj *pStream = mndGetStreamObj(pMnode, req.streamId); - if (pStream == NULL) { - mWarn("failed to find the stream:0x%" PRIx64 ", not handle checkpoint-report, try to acquire in buf", req.streamId); + void *pIter = NULL; + while ((pIter = taosHashIterate(execInfo.pStreamConsensus, pIter)) != NULL) { + SCheckpointConsensusInfo *pInfo = (SCheckpointConsensusInfo *)pIter; - // not in meta-store yet, try to acquire the task in exec buffer - // the checkpoint req arrives too soon before the completion of the create stream trans. - STaskId id = {.streamId = req.streamId, .taskId = req.taskId}; - void *p = taosHashGet(execInfo.pTaskMap, &id, sizeof(id)); - if (p == NULL) { - mError("failed to find the stream:0x%" PRIx64 " in buf, not handle the checkpoint-report", req.streamId); - terrno = TSDB_CODE_MND_STREAM_NOT_EXIST; - taosThreadMutexUnlock(&execInfo.lock); - return -1; - } else { - mDebug("s-task:0x%" PRIx64 "-0x%x in buf not in mnode/meta, create stream trans may not complete yet", - req.streamId, req.taskId); + int64_t streamId = -1; + int32_t num = taosArrayGetSize(pInfo->pTaskList); + SArray *pList = taosArrayInit(4, sizeof(int32_t)); + + SStreamObj *pStream = mndGetStreamObj(pMnode, pInfo->streamId); + if (pStream == NULL) { // stream has been dropped already + mDebug("stream:0x%" PRIx64 " dropped already, continue", pInfo->streamId); + taosArrayDestroy(pList); + continue; } - } - int32_t numOfTasks = (pStream == NULL) ? 0 : mndGetNumOfStreamTasks(pStream); + for (int32_t j = 0; j < num; ++j) { + SCheckpointConsensusEntry *pe = taosArrayGet(pInfo->pTaskList, j); + streamId = pe->req.streamId; - SCheckpointConsensusInfo *pInfo = mndGetConsensusInfo(execInfo.pStreamConsensus, req.streamId); + int32_t existed = 0; + bool allSame = true; + int64_t chkId = getConsensusId(pe->req.streamId, pInfo->numOfTasks, &existed, &allSame); + if (chkId == -1) { + mDebug("not all(%d/%d) task(s) send hbMsg yet, wait for a while and check again, s-task:0x%x", existed, + pInfo->numOfTasks, pe->req.taskId); + break; + } - int64_t ckId = mndGetConsensusCheckpointId(pInfo, pStream); - if (ckId != -1) { // consensus checkpoint id already exist - SRpcMsg rsp = {0}; - rsp.code = 0; - rsp.info = pReq->info; - rsp.contLen = sizeof(SRestoreCheckpointInfoRsp) + sizeof(SMsgHead); - rsp.pCont = rpcMallocCont(rsp.contLen); + if (((now - pe->ts) >= 10 * 1000) || allSame) { + mDebug("s-task:0x%x sendTs:%" PRId64 " wait %.2fs and all tasks have same checkpointId", pe->req.taskId, + pe->req.startTs, (now - pe->ts) / 1000.0); + ASSERT(chkId <= pe->req.checkpointId); + mndCreateSetConsensusChkptIdTrans(pMnode, pStream, pe->req.taskId, chkId, pe->req.startTs); - SMsgHead *pHead = rsp.pCont; - pHead->vgId = htonl(req.nodeId); - - mDebug("stream:0x%" PRIx64 " consensus-checkpointId:%" PRId64 " exists, return directly", req.streamId, ckId); - doSendConsensusCheckpointRsp(&req, &rsp, ckId); - - taosThreadMutexUnlock(&execInfo.lock); - pReq->info.handle = NULL; // disable auto rsp - - return TSDB_CODE_SUCCESS; - } - - mndAddConsensusTasks(pInfo, &req, pReq); - - int32_t total = 0; - if (mndAllTaskSendCheckpointId(pInfo, numOfTasks, &total)) { // all tasks has send the reqs - // start transaction to set the checkpoint id - int64_t checkpointId = mndGetConsensusCheckpointId(pInfo, pStream); - mInfo("stream:0x%" PRIx64 " %s all %d tasks send latest checkpointId, the consensus-checkpointId is:%" PRId64 - " will be issued soon", - req.streamId, pStream->name, numOfTasks, checkpointId); - - // start the checkpoint consensus trans - int32_t code = mndSendConsensusCheckpointIdRsp(pInfo->pTaskList, checkpointId); - if (code == TSDB_CODE_SUCCESS) { - mndClearConsensusRspEntry(pInfo); - mDebug("clear all waiting for rsp entry for stream:0x%" PRIx64, req.streamId); - } else { - mDebug("stream:0x%" PRIx64 " not start send consensus-checkpointId msg, due to not all task ready", req.streamId); + taosArrayPush(pList, &pe->req.taskId); + streamId = pe->req.streamId; + } else { + mDebug("s-task:0x%x sendTs:%" PRId64 " wait %.2fs already, wait for next round to check", pe->req.taskId, + pe->req.startTs, (now - pe->ts) / 1000.0); + } } - } else { - mDebug("stream:0x%" PRIx64 " %d/%d tasks send consensus-checkpointId info", req.streamId, total, numOfTasks); - } - if (pStream != NULL) { mndReleaseStream(pMnode, pStream); + + if (taosArrayGetSize(pList) > 0) { + for (int32_t i = 0; i < taosArrayGetSize(pList); ++i) { + int32_t *taskId = taosArrayGet(pList, i); + for (int32_t k = 0; k < taosArrayGetSize(pInfo->pTaskList); ++k) { + SCheckpointConsensusEntry *pe = taosArrayGet(pInfo->pTaskList, k); + if (pe->req.taskId == *taskId) { + taosArrayRemove(pInfo->pTaskList, k); + break; + } + } + } + } + + taosArrayDestroy(pList); + + if (taosArrayGetSize(pInfo->pTaskList) == 0) { + mndClearConsensusRspEntry(pInfo); + ASSERT(streamId != -1); + taosArrayPush(pStreamList, &streamId); + } + } + + for (int32_t i = 0; i < taosArrayGetSize(pStreamList); ++i) { + int64_t *pStreamId = (int64_t *)taosArrayGet(pStreamList, i); + mndClearConsensusCheckpointId(execInfo.pStreamConsensus, *pStreamId); } taosThreadMutexUnlock(&execInfo.lock); - pReq->info.handle = NULL; // disable auto rsp - return 0; + taosArrayDestroy(pStreamList); + mDebug("end to process consensus-checkpointId in tmr"); + return TSDB_CODE_SUCCESS; } static int32_t mndProcessCreateStreamReqFromMNode(SRpcMsg *pReq) { diff --git a/source/dnode/mnode/impl/src/mndStreamHb.c b/source/dnode/mnode/impl/src/mndStreamHb.c index c7f97b4a62..1452ac77d2 100644 --- a/source/dnode/mnode/impl/src/mndStreamHb.c +++ b/source/dnode/mnode/impl/src/mndStreamHb.c @@ -246,7 +246,7 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { } tDecoderClear(&decoder); - mTrace("receive stream-meta hb from vgId:%d, active numOfTasks:%d, msgId:%d", req.vgId, req.numOfTasks, req.msgId); + mDebug("receive stream-meta hb from vgId:%d, active numOfTasks:%d, msgId:%d", req.vgId, req.numOfTasks, req.msgId); pFailedChkpt = taosArrayInit(4, sizeof(SFailedCheckpointInfo)); pOrphanTasks = taosArrayInit(4, sizeof(SOrphanTask)); @@ -284,6 +284,23 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { continue; } + STaskCkptInfo *pChkInfo = &p->checkpointInfo; + if (pChkInfo->consensusChkptId != 0) { + SRestoreCheckpointInfo cp = { + .streamId = p->id.streamId, + .taskId = p->id.taskId, + .checkpointId = p->checkpointInfo.latestId, + .startTs = pChkInfo->consensusTs, + }; + + SStreamObj *pStream = mndGetStreamObj(pMnode, p->id.streamId); + int32_t numOfTasks = mndGetNumOfStreamTasks(pStream); + + SCheckpointConsensusInfo *pInfo = mndGetConsensusInfo(execInfo.pStreamConsensus, p->id.streamId, numOfTasks); + mndAddConsensusTasks(pInfo, &cp); + mndReleaseStream(pMnode, pStream); + } + if (pTaskEntry->stage != p->stage && pTaskEntry->stage != -1) { updateStageInfo(pTaskEntry, p->stage); if (pTaskEntry->nodeId == SNODE_HANDLE) { @@ -292,7 +309,6 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { } else { streamTaskStatusCopy(pTaskEntry, p); - STaskCkptInfo *pChkInfo = &p->checkpointInfo; if ((pChkInfo->activeId != 0) && pChkInfo->failed) { mError("stream task:0x%" PRIx64 " checkpointId:%" PRIx64 " transId:%d failed, kill it", p->id.taskId, pChkInfo->activeId, pChkInfo->activeTransId); diff --git a/source/dnode/mnode/impl/src/mndStreamUtil.c b/source/dnode/mnode/impl/src/mndStreamUtil.c index 8fb5bc8a99..c4adbd0fc3 100644 --- a/source/dnode/mnode/impl/src/mndStreamUtil.c +++ b/source/dnode/mnode/impl/src/mndStreamUtil.c @@ -84,9 +84,10 @@ SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool *allReady) { SSdb *pSdb = pMnode->pSdb; void *pIter = NULL; SVgObj *pVgroup = NULL; + int32_t replica = -1; // do the replica check *allReady = true; - SArray *pVgroupListSnapshot = taosArrayInit(4, sizeof(SNodeEntry)); + SArray *pVgroupList = taosArrayInit(4, sizeof(SNodeEntry)); while (1) { pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup); @@ -97,6 +98,17 @@ SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool *allReady) { SNodeEntry entry = {.nodeId = pVgroup->vgId, .hbTimestamp = pVgroup->updateTime}; entry.epset = mndGetVgroupEpset(pMnode, pVgroup); + if (replica == -1) { + replica = pVgroup->replica; + } else { + if (replica != pVgroup->replica) { + mInfo("vgId:%d replica:%d inconsistent with other vgroups replica:%d, not ready for stream operations", + pVgroup->vgId, pVgroup->replica, replica); + *allReady = false; + break; + } + } + // if not all ready till now, no need to check the remaining vgroups. if (*allReady) { for (int32_t i = 0; i < pVgroup->replica; ++i) { @@ -107,8 +119,10 @@ SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool *allReady) { } ESyncState state = pVgroup->vnodeGid[i].syncState; - if (state == TAOS_SYNC_STATE_OFFLINE || state == TAOS_SYNC_STATE_ERROR) { - mInfo("vgId:%d offline/err, not ready for checkpoint or other operations", pVgroup->vgId); + if (state == TAOS_SYNC_STATE_OFFLINE || state == TAOS_SYNC_STATE_ERROR || state == TAOS_SYNC_STATE_LEARNER || + state == TAOS_SYNC_STATE_CANDIDATE) { + mInfo("vgId:%d state:%d , not ready for checkpoint or other operations, not check other vgroups", + pVgroup->vgId, state); *allReady = false; break; } @@ -119,7 +133,7 @@ SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool *allReady) { epsetToStr(&entry.epset, buf, tListLen(buf)); mDebug("take node snapshot, nodeId:%d %s", entry.nodeId, buf); - taosArrayPush(pVgroupListSnapshot, &entry); + taosArrayPush(pVgroupList, &entry); sdbRelease(pSdb, pVgroup); } @@ -138,11 +152,11 @@ SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool *allReady) { epsetToStr(&entry.epset, buf, tListLen(buf)); mDebug("take snode snapshot, nodeId:%d %s", entry.nodeId, buf); - taosArrayPush(pVgroupListSnapshot, &entry); + taosArrayPush(pVgroupList, &entry); sdbRelease(pSdb, pObj); } - return pVgroupListSnapshot; + return pVgroupList; } SStreamObj *mndGetStreamObj(SMnode *pMnode, int64_t streamId) { @@ -637,6 +651,7 @@ void removeTasksInBuf(SArray *pTaskIds, SStreamExecInfo* pExecInfo) { void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode) { taosThreadMutexLock(&pExecNode->lock); + // 1. remove task entries SStreamTaskIter *pIter = createStreamTaskIter(pStream); while (streamTaskIterNextTask(pIter)) { SStreamTask *pTask = streamTaskIterGetCurrent(pIter); @@ -646,8 +661,11 @@ void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode) { } ASSERT(taosHashGetSize(pExecNode->pTaskMap) == taosArrayGetSize(pExecNode->pTaskList)); - taosThreadMutexUnlock(&pExecNode->lock); + // 2. remove stream entry in consensus hash table + mndClearConsensusCheckpointId(execInfo.pStreamConsensus, pStream->uid); + + taosThreadMutexUnlock(&pExecNode->lock); destroyStreamTaskIter(pIter); } @@ -821,50 +839,113 @@ int32_t mndScanCheckpointReportInfo(SRpcMsg *pReq) { return TSDB_CODE_SUCCESS; } -int32_t doSendConsensusCheckpointRsp(SRestoreCheckpointInfo* pInfo, SRpcMsg* pMsg, int64_t checkpointId) { +static int32_t mndStreamSetChkptIdAction(SMnode *pMnode, STrans *pTrans, SStreamTask* pTask, int64_t checkpointId, int64_t ts) { + SRestoreCheckpointInfo req = { + .taskId = pTask->id.taskId, + .streamId = pTask->id.streamId, + .checkpointId = checkpointId, + .startTs = ts, + .nodeId = pTask->info.nodeId, + .transId = pTrans->id, + }; + int32_t code = 0; int32_t blen; - - SRestoreCheckpointInfoRsp req = { - .streamId = pInfo->streamId, .taskId = pInfo->taskId, .checkpointId = checkpointId, .startTs = pInfo->startTs}; - - tEncodeSize(tEncodeRestoreCheckpointInfoRsp, &req, blen, code); + tEncodeSize(tEncodeRestoreCheckpointInfo, &req, blen, code); if (code < 0) { terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } int32_t tlen = sizeof(SMsgHead) + blen; - void *abuf = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + + void *pBuf = taosMemoryMalloc(tlen); + if (pBuf == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + void *abuf = POINTER_SHIFT(pBuf, sizeof(SMsgHead)); SEncoder encoder; tEncoderInit(&encoder, abuf, tlen); - tEncodeRestoreCheckpointInfoRsp(&encoder, &req); + tEncodeRestoreCheckpointInfo(&encoder, &req); - SMsgHead *pMsgHead = (SMsgHead *)pMsg->pCont; + SMsgHead *pMsgHead = (SMsgHead *)pBuf; pMsgHead->contLen = htonl(tlen); - pMsgHead->vgId = htonl(pInfo->nodeId); + pMsgHead->vgId = htonl(pTask->info.nodeId); + tEncoderClear(&encoder); - tmsgSendRsp(pMsg); + SEpSet epset = {0}; + bool hasEpset = false; + code = extractNodeEpset(pMnode, &epset, &hasEpset, pTask->id.taskId, pTask->info.nodeId); + if (code != TSDB_CODE_SUCCESS || !hasEpset) { + taosMemoryFree(pBuf); + return code; + } + + code = setTransAction(pTrans, pBuf, tlen, TDMT_STREAM_CONSEN_CHKPT, &epset, 0, TSDB_CODE_VND_INVALID_VGROUP_ID); + if (code != TSDB_CODE_SUCCESS) { + taosMemoryFree(pBuf); + } + return code; } -int32_t mndSendConsensusCheckpointIdRsp(SArray* pInfoList, int64_t checkpointId) { - for(int32_t i = 0; i < taosArrayGetSize(pInfoList); ++i) { - SCheckpointConsensusEntry* pInfo = taosArrayGet(pInfoList, i); - doSendConsensusCheckpointRsp(&pInfo->req, &pInfo->rsp, checkpointId); +int32_t mndCreateSetConsensusChkptIdTrans(SMnode *pMnode, SStreamObj *pStream, int32_t taskId, int64_t checkpointId, + int64_t ts) { + char msg[128] = {0}; + snprintf(msg, tListLen(msg), "set consen-chkpt-id for task:0x%x", taskId); + + STrans *pTrans = doCreateTrans(pMnode, pStream, NULL, TRN_CONFLICT_NOTHING, MND_STREAM_CHKPT_CONSEN_NAME, msg); + if (pTrans == NULL) { + return terrno; } - return 0; + + STaskId id = {.streamId = pStream->uid, .taskId = taskId}; + SStreamTask *pTask = mndGetStreamTask(&id, pStream); + ASSERT(pTask); + + /*int32_t code = */ mndStreamRegisterTrans(pTrans, MND_STREAM_CHKPT_CONSEN_NAME, pStream->uid); + int32_t code = mndStreamSetChkptIdAction(pMnode, pTrans, pTask, checkpointId, ts); + if (code != 0) { + sdbRelease(pMnode->pSdb, pStream); + mndTransDrop(pTrans); + return code; + } + + code = mndPersistTransLog(pStream, pTrans, SDB_STATUS_READY); + if (code != TSDB_CODE_SUCCESS) { + sdbRelease(pMnode->pSdb, pStream); + mndTransDrop(pTrans); + return -1; + } + + if (mndTransPrepare(pMnode, pTrans) != 0) { + mError("trans:%d, failed to prepare set consensus-chkptId trans since %s", pTrans->id, terrstr()); + sdbRelease(pMnode->pSdb, pStream); + mndTransDrop(pTrans); + return -1; + } + + sdbRelease(pMnode->pSdb, pStream); + mndTransDrop(pTrans); + + return TSDB_CODE_ACTION_IN_PROGRESS; } -SCheckpointConsensusInfo* mndGetConsensusInfo(SHashObj* pHash, int64_t streamId) { +SCheckpointConsensusInfo* mndGetConsensusInfo(SHashObj* pHash, int64_t streamId, int32_t numOfTasks) { void* pInfo = taosHashGet(pHash, &streamId, sizeof(streamId)); if (pInfo != NULL) { return (SCheckpointConsensusInfo*)pInfo; } SCheckpointConsensusInfo p = { - .genTs = -1, .checkpointId = -1, .pTaskList = taosArrayInit(4, sizeof(SCheckpointConsensusEntry))}; + .pTaskList = taosArrayInit(4, sizeof(SCheckpointConsensusEntry)), + .numOfTasks = numOfTasks, + .streamId = streamId, + }; + taosHashPut(pHash, &streamId, sizeof(streamId), &p, sizeof(p)); void* pChkptInfo = (SCheckpointConsensusInfo*)taosHashGet(pHash, &streamId, sizeof(streamId)); @@ -873,87 +954,27 @@ SCheckpointConsensusInfo* mndGetConsensusInfo(SHashObj* pHash, int64_t streamId) // no matter existed or not, add the request into info list anyway, since we need to send rsp mannually // discard the msg may lead to the lost of connections. -void mndAddConsensusTasks(SCheckpointConsensusInfo *pInfo, const SRestoreCheckpointInfo *pRestoreInfo, SRpcMsg *pMsg) { - SCheckpointConsensusEntry info = {0}; +void mndAddConsensusTasks(SCheckpointConsensusInfo *pInfo, const SRestoreCheckpointInfo *pRestoreInfo) { + SCheckpointConsensusEntry info = {.ts = taosGetTimestampMs()}; memcpy(&info.req, pRestoreInfo, sizeof(info.req)); - info.rsp.code = 0; - info.rsp.info = pMsg->info; - info.rsp.contLen = sizeof(SRestoreCheckpointInfoRsp) + sizeof(SMsgHead); - info.rsp.pCont = rpcMallocCont(info.rsp.contLen); - - SMsgHead *pHead = info.rsp.pCont; - pHead->vgId = htonl(pRestoreInfo->nodeId); + for (int32_t i = 0; i < taosArrayGetSize(pInfo->pTaskList); ++i) { + SCheckpointConsensusEntry *p = taosArrayGet(pInfo->pTaskList, i); + if (p->req.taskId == info.req.taskId) { + mDebug("s-task:0x%x already in consensus-checkpointId list for stream:0x%" PRIx64 ", update ts %" PRId64 + "->%" PRId64 " total existed:%d", + pRestoreInfo->taskId, pRestoreInfo->streamId, p->req.startTs, info.req.startTs, + (int32_t)taosArrayGetSize(pInfo->pTaskList)); + p->req.startTs = info.req.startTs; + return; + } + } taosArrayPush(pInfo->pTaskList, &info); -} - -static int32_t entryComparFn(const void* p1, const void* p2) { - const SCheckpointConsensusEntry* pe1 = p1; - const SCheckpointConsensusEntry* pe2 = p2; - - if (pe1->req.taskId == pe2->req.taskId) { - return 0; - } - - return pe1->req.taskId < pe2->req.taskId? -1:1; -} - -bool mndAllTaskSendCheckpointId(SCheckpointConsensusInfo* pInfo, int32_t numOfTasks, int32_t* pTotal) { - int32_t numOfExisted = taosArrayGetSize(pInfo->pTaskList); - if (numOfExisted < numOfTasks) { - if (pTotal != NULL) { - *pTotal = numOfExisted; - } - return false; - } - - taosArraySort(pInfo->pTaskList, entryComparFn); - - int32_t num = 1; - int32_t taskId = ((SCheckpointConsensusEntry*)taosArrayGet(pInfo->pTaskList, 0))->req.taskId; - for(int32_t i = 1; i < taosArrayGetSize(pInfo->pTaskList); ++i) { - SCheckpointConsensusEntry* pe = taosArrayGet(pInfo->pTaskList, i); - if (pe->req.taskId != taskId) { - num += 1; - taskId = pe->req.taskId; - } - } - - if (pTotal != NULL) { - *pTotal = num; - } - - ASSERT(num <= numOfTasks); - return num == numOfTasks; -} - -int64_t mndGetConsensusCheckpointId(SCheckpointConsensusInfo* pInfo, SStreamObj* pStream) { - if (pInfo->genTs > 0) { // there is no checkpoint ever generated if the checkpointId is 0. - mDebug("existed consensus-checkpointId:%" PRId64 " for stream:0x%" PRIx64 " %s exist, and return", - pInfo->checkpointId, pStream->uid, pStream->name); - return pInfo->checkpointId; - } - - int32_t numOfTasks = mndGetNumOfStreamTasks(pStream); - if (!mndAllTaskSendCheckpointId(pInfo, numOfTasks, NULL)) { - return -1; - } - - int64_t checkpointId = INT64_MAX; - - for (int32_t i = 0; i < taosArrayGetSize(pInfo->pTaskList); ++i) { - SCheckpointConsensusEntry *pEntry = taosArrayGet(pInfo->pTaskList, i); - if (pEntry->req.checkpointId < checkpointId) { - checkpointId = pEntry->req.checkpointId; - mTrace("stream:0x%" PRIx64 " %s task:0x%x vgId:%d latest checkpointId:%" PRId64, pStream->uid, pStream->name, - pEntry->req.taskId, pEntry->req.nodeId, pEntry->req.checkpointId); - } - } - - pInfo->checkpointId = checkpointId; - pInfo->genTs = taosGetTimestampMs(); - return checkpointId; + int32_t num = taosArrayGetSize(pInfo->pTaskList); + mDebug("s-task:0x%x checkpointId:%" PRId64 " added into consensus-checkpointId list, stream:0x%" PRIx64 + " waiting tasks:%d", + pRestoreInfo->taskId, pRestoreInfo->checkpointId, pRestoreInfo->streamId, num); } void mndClearConsensusRspEntry(SCheckpointConsensusInfo* pInfo) { @@ -968,15 +989,15 @@ int64_t mndClearConsensusCheckpointId(SHashObj* pHash, int64_t streamId) { return TSDB_CODE_SUCCESS; } -int32_t mndRegisterConsensusChkptId(SHashObj* pHash, int64_t streamId) { - void* pInfo = taosHashGet(pHash, &streamId, sizeof(streamId)); - ASSERT(pInfo == NULL); - - SCheckpointConsensusInfo p = {.genTs = taosGetTimestampMs(), .checkpointId = 0, .pTaskList = NULL}; - taosHashPut(pHash, &streamId, sizeof(streamId), &p, sizeof(p)); - - SCheckpointConsensusInfo* pChkptInfo = (SCheckpointConsensusInfo*)taosHashGet(pHash, &streamId, sizeof(streamId)); - ASSERT(pChkptInfo->genTs > 0 && pChkptInfo->checkpointId == 0); - mDebug("s-task:0x%" PRIx64 " set the initial consensus-checkpointId:0", streamId); - return TSDB_CODE_SUCCESS; -} \ No newline at end of file +//int32_t mndRegisterConsensusChkptId(SHashObj* pHash, int64_t streamId) { +// void* pInfo = taosHashGet(pHash, &streamId, sizeof(streamId)); +// ASSERT(pInfo == NULL); +// +// SCheckpointConsensusInfo p = {.genTs = taosGetTimestampMs(), .checkpointId = 0, .pTaskList = NULL}; +// taosHashPut(pHash, &streamId, sizeof(streamId), &p, sizeof(p)); +// +// SCheckpointConsensusInfo* pChkptInfo = (SCheckpointConsensusInfo*)taosHashGet(pHash, &streamId, sizeof(streamId)); +// ASSERT(pChkptInfo->genTs > 0 && pChkptInfo->checkpointId == 0); +// mDebug("s-task:0x%" PRIx64 " set the initial consensus-checkpointId:0", streamId); +// return TSDB_CODE_SUCCESS; +//} \ No newline at end of file diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index 4669fdfb38..58dab20859 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -1241,6 +1241,7 @@ static void mndTransResetActions(SMnode *pMnode, STrans *pTrans, SArray *pArray) } } +// execute at bottom half static int32_t mndTransWriteSingleLog(SMnode *pMnode, STrans *pTrans, STransAction *pAction, bool topHalf) { if (pAction->rawWritten) return 0; if (topHalf) { @@ -1267,6 +1268,7 @@ static int32_t mndTransWriteSingleLog(SMnode *pMnode, STrans *pTrans, STransActi return code; } +// execute at top half static int32_t mndTransSendSingleMsg(SMnode *pMnode, STrans *pTrans, STransAction *pAction, bool topHalf) { if (pAction->msgSent) return 0; if (mndCannotExecuteTransAction(pMnode, topHalf)) { @@ -1644,6 +1646,11 @@ static bool mndTransPerformCommitActionStage(SMnode *pMnode, STrans *pTrans, boo pTrans->stage = TRN_STAGE_FINISH; // TRN_STAGE_PRE_FINISH is not necessary mInfo("trans:%d, stage from commitAction to finished", pTrans->id); continueExec = true; + } else if (code == TSDB_CODE_MND_TRANS_CTX_SWITCH && topHalf) { + pTrans->code = 0; + pTrans->stage = TRN_STAGE_COMMIT; + mInfo("trans:%d, back to commit stage", pTrans->id); + continueExec = true; } else { pTrans->code = terrno; pTrans->failedTimes++; @@ -1783,11 +1790,13 @@ void mndTransExecuteImp(SMnode *pMnode, STrans *pTrans, bool topHalf) { mndTransSendRpcRsp(pMnode, pTrans); } +// start trans, pullup, receive rsp, kill void mndTransExecute(SMnode *pMnode, STrans *pTrans) { bool topHalf = true; return mndTransExecuteImp(pMnode, pTrans, topHalf); } +// update trans void mndTransRefresh(SMnode *pMnode, STrans *pTrans) { bool topHalf = false; return mndTransExecuteImp(pMnode, pTrans, topHalf); diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index 9686fd3789..cfa24b2430 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -43,14 +43,14 @@ int32_t sndBuildStreamTask(SSnode *pSnode, SStreamTask *pTask, int64_t nextProce char *p = streamTaskGetStatus(pTask)->name; if (pTask->info.fillHistory) { - sndInfo("vgId:%d expand stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 + sndInfo("vgId:%d build stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 " nextProcessVer:%" PRId64 " child id:%d, level:%d, status:%s fill-history:%d, related stream task:0x%x trigger:%" PRId64 " ms", SNODE_HANDLE, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer, pTask->info.selfChildId, pTask->info.taskLevel, p, pTask->info.fillHistory, (int32_t)pTask->streamTaskId.taskId, pTask->info.delaySchedParam); } else { - sndInfo("vgId:%d expand stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 + sndInfo("vgId:%d build stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 " nextProcessVer:%" PRId64 " child id:%d, level:%d, status:%s fill-history:%d, related fill-task:0x%x trigger:%" PRId64 " ms", SNODE_HANDLE, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer, @@ -149,15 +149,15 @@ int32_t sndProcessWriteMsg(SSnode *pSnode, SRpcMsg *pMsg, SRpcMsg *pRsp) { case TDMT_VND_STREAM_TASK_UPDATE: return tqStreamTaskProcessUpdateReq(pSnode->pMeta, &pSnode->msgCb, pMsg, true); case TDMT_VND_STREAM_TASK_RESET: - return tqStreamTaskProcessTaskResetReq(pSnode->pMeta, pMsg); + return tqStreamTaskProcessTaskResetReq(pSnode->pMeta, pMsg->pCont); case TDMT_STREAM_TASK_PAUSE: return tqStreamTaskProcessTaskPauseReq(pSnode->pMeta, pMsg->pCont); case TDMT_STREAM_TASK_RESUME: return tqStreamTaskProcessTaskResumeReq(pSnode->pMeta, pMsg->info.conn.applyIndex, pMsg->pCont, false); case TDMT_STREAM_TASK_UPDATE_CHKPT: - return tqStreamTaskProcessUpdateCheckpointReq(pSnode->pMeta, true, pMsg->pCont, pMsg->contLen); - case TDMT_MND_STREAM_CHKPT_CONSEN_RSP: - return tqStreamProcessConsensusChkptRsp(pSnode->pMeta, pMsg); + return tqStreamTaskProcessUpdateCheckpointReq(pSnode->pMeta, true, pMsg->pCont); + case TDMT_STREAM_CONSEN_CHKPT: + return tqStreamTaskProcessConsenChkptIdReq(pSnode->pMeta, pMsg); default: ASSERT(0); } diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 1bec226489..4a47e08730 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -298,6 +298,7 @@ int32_t tqProcessTaskRetrieveRsp(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg); int32_t tqStreamProgressRetrieveReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskUpdateCheckpointReq(STQ* pTq, char* msg, int32_t msgLen); +int32_t tqProcessTaskConsenChkptIdReq(STQ* pTq, SRpcMsg* pMsg); // sma int32_t smaInit(); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 0a64b9c165..ac57a003c5 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -1016,7 +1016,11 @@ int32_t tqProcessTaskDropReq(STQ* pTq, char* msg, int32_t msgLen) { } int32_t tqProcessTaskUpdateCheckpointReq(STQ* pTq, char* msg, int32_t msgLen) { - return tqStreamTaskProcessUpdateCheckpointReq(pTq->pStreamMeta, pTq->pVnode->restored, msg, msgLen); + return tqStreamTaskProcessUpdateCheckpointReq(pTq->pStreamMeta, pTq->pVnode->restored, msg); +} + +int32_t tqProcessTaskConsenChkptIdReq(STQ* pTq, SRpcMsg* pMsg) { + return tqStreamTaskProcessConsenChkptIdReq(pTq->pStreamMeta, pMsg); } int32_t tqProcessTaskPauseReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { @@ -1239,7 +1243,7 @@ int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) { } int32_t tqProcessTaskResetReq(STQ* pTq, SRpcMsg* pMsg) { - return tqStreamTaskProcessTaskResetReq(pTq->pStreamMeta, pMsg); + return tqStreamTaskProcessTaskResetReq(pTq->pStreamMeta, pMsg->pCont); } int32_t tqProcessTaskRetrieveTriggerReq(STQ* pTq, SRpcMsg* pMsg) { @@ -1277,5 +1281,5 @@ int32_t tqProcessTaskChkptReportRsp(STQ* pTq, SRpcMsg* pMsg) { } int32_t tqProcessTaskConsensusChkptRsp(STQ* pTq, SRpcMsg* pMsg) { - return tqStreamProcessConsensusChkptRsp(pTq->pStreamMeta, pMsg); + return tqStreamProcessConsensusChkptRsp2(pTq->pStreamMeta, pMsg); } diff --git a/source/dnode/vnode/src/tq/tqStreamStateSnap.c b/source/dnode/vnode/src/tq/tqStreamStateSnap.c index 290266d94a..c79fc66a06 100644 --- a/source/dnode/vnode/src/tq/tqStreamStateSnap.c +++ b/source/dnode/vnode/src/tq/tqStreamStateSnap.c @@ -51,10 +51,9 @@ int32_t streamStateSnapReaderOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamS SStreamSnapReader* pSnapReader = NULL; - if (streamSnapReaderOpen(meta, sver, chkpId, meta->path, &pSnapReader) == 0) { + if ((code = streamSnapReaderOpen(meta, sver, chkpId, meta->path, &pSnapReader)) == 0) { pReader->complete = 1; } else { - code = -1; taosMemoryFree(pReader); goto _err; } @@ -75,7 +74,7 @@ _err: int32_t streamStateSnapReaderClose(SStreamStateReader* pReader) { int32_t code = 0; tqDebug("vgId:%d, vnode %s snapshot reader closed", TD_VID(pReader->pTq->pVnode), STREAM_STATE_TRANSFER); - streamSnapReaderClose(pReader->pReaderImpl); + code = streamSnapReaderClose(pReader->pReaderImpl); taosMemoryFree(pReader); return code; } @@ -138,32 +137,36 @@ int32_t streamStateSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamS pWriter->sver = sver; pWriter->ever = ever; - taosMkDir(pTq->pStreamMeta->path); - - SStreamSnapWriter* pSnapWriter = NULL; - if (streamSnapWriterOpen(pTq, sver, ever, pTq->pStreamMeta->path, &pSnapWriter) < 0) { + if (taosMkDir(pTq->pStreamMeta->path) != 0) { + code = TAOS_SYSTEM_ERROR(errno); + tqError("vgId:%d, vnode %s snapshot writer failed to create directory %s since %s", TD_VID(pTq->pVnode), + STREAM_STATE_TRANSFER, pTq->pStreamMeta->path, tstrerror(code)); goto _err; } - tqDebug("vgId:%d, vnode %s snapshot writer opened, path:%s", TD_VID(pTq->pVnode), STREAM_STATE_TRANSFER, pTq->pStreamMeta->path); + SStreamSnapWriter* pSnapWriter = NULL; + if ((code = streamSnapWriterOpen(pTq, sver, ever, pTq->pStreamMeta->path, &pSnapWriter)) < 0) { + goto _err; + } + + tqDebug("vgId:%d, vnode %s snapshot writer opened, path:%s", TD_VID(pTq->pVnode), STREAM_STATE_TRANSFER, + pTq->pStreamMeta->path); pWriter->pWriterImpl = pSnapWriter; *ppWriter = pWriter; - return code; + return 0; + _err: tqError("vgId:%d, vnode %s snapshot writer failed to open since %s", TD_VID(pTq->pVnode), STREAM_STATE_TRANSFER, tstrerror(code)); taosMemoryFree(pWriter); *ppWriter = NULL; - return -1; + return code; } int32_t streamStateSnapWriterClose(SStreamStateWriter* pWriter, int8_t rollback) { - int32_t code = 0; tqDebug("vgId:%d, vnode %s snapshot writer closed", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER); - code = streamSnapWriterClose(pWriter->pWriterImpl, rollback); - - return code; + return streamSnapWriterClose(pWriter->pWriterImpl, rollback); } int32_t streamStateSnapWrite(SStreamStateWriter* pWriter, uint8_t* pData, uint32_t nData) { diff --git a/source/dnode/vnode/src/tqCommon/tqCommon.c b/source/dnode/vnode/src/tqCommon/tqCommon.c index 668e178d2d..1f3c049211 100644 --- a/source/dnode/vnode/src/tqCommon/tqCommon.c +++ b/source/dnode/vnode/src/tqCommon/tqCommon.c @@ -228,6 +228,9 @@ int32_t tqStreamTaskProcessUpdateReq(SStreamMeta* pMeta, SMsgCb* cb, SRpcMsg* pM } updated = streamTaskUpdateEpsetInfo(pTask, req.pNodeList); + + // send the checkpoint-source-rsp for source task to end the checkpoint trans in mnode + streamTaskSendPreparedCheckpointsourceRsp(pTask); streamTaskResetStatus(pTask); streamTaskStopMonitorCheckRsp(&pTask->taskCheckInfo, pTask->id.idStr); @@ -264,7 +267,6 @@ int32_t tqStreamTaskProcessUpdateReq(SStreamMeta* pMeta, SMsgCb* cb, SRpcMsg* pM tqDebug("s-task:%s vgId:%d not save task since not update epset actually, stop task", idstr, vgId); } - // stop streamTaskStop(pTask); if (ppHTask != NULL) { streamTaskStop(*ppHTask); @@ -279,7 +281,10 @@ int32_t tqStreamTaskProcessUpdateReq(SStreamMeta* pMeta, SMsgCb* cb, SRpcMsg* pM int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); int32_t updateTasks = taosHashGetSize(pMeta->updateInfo.pTasks); - pMeta->startInfo.tasksWillRestart = 1; + if (restored) { + tqDebug("vgId:%d s-task:0x%x update epset transId:%d, set the restart flag", vgId, req.taskId, req.transId); + pMeta->startInfo.tasksWillRestart = 1; + } if (updateTasks < numOfTasks) { tqDebug("vgId:%d closed tasks:%d, unclosed:%d, all tasks will be started when nodeEp update completed", vgId, @@ -292,8 +297,7 @@ int32_t tqStreamTaskProcessUpdateReq(SStreamMeta* pMeta, SMsgCb* cb, SRpcMsg* pM streamMetaClearUpdateTaskList(pMeta); if (!restored) { - tqDebug("vgId:%d vnode restore not completed, not start the tasks, clear the start after nodeUpdate flag", vgId); - pMeta->startInfo.tasksWillRestart = 0; + tqDebug("vgId:%d vnode restore not completed, not start all tasks", vgId); } else { tqDebug("vgId:%d all %d task(s) nodeEp updated and closed, transId:%d", vgId, numOfTasks, req.transId); #if 0 @@ -666,7 +670,7 @@ int32_t tqStreamTaskProcessDropReq(SStreamMeta* pMeta, char* msg, int32_t msgLen return 0; } -int32_t tqStreamTaskProcessUpdateCheckpointReq(SStreamMeta* pMeta, bool restored, char* msg, int32_t msgLen) { +int32_t tqStreamTaskProcessUpdateCheckpointReq(SStreamMeta* pMeta, bool restored, char* msg) { SVUpdateCheckpointInfoReq* pReq = (SVUpdateCheckpointInfoReq*)msg; int32_t vgId = pMeta->vgId; @@ -738,6 +742,7 @@ static int32_t restartStreamTasks(SStreamMeta* pMeta, bool isLeader) { streamMetaStartAllTasks(pMeta); } else { streamMetaResetStartInfo(&pMeta->startInfo, pMeta->vgId); + pMeta->startInfo.restartCount = 0; streamMetaWUnLock(pMeta); tqInfo("vgId:%d, follower node not start stream tasks or stream is disabled", vgId); } @@ -854,8 +859,8 @@ int32_t tqStartTaskCompleteCallback(SStreamMeta* pMeta) { return TSDB_CODE_SUCCESS; } -int32_t tqStreamTaskProcessTaskResetReq(SStreamMeta* pMeta, SRpcMsg* pMsg) { - SVPauseStreamTaskReq* pReq = (SVPauseStreamTaskReq*)pMsg->pCont; +int32_t tqStreamTaskProcessTaskResetReq(SStreamMeta* pMeta, char* pMsg) { + SVPauseStreamTaskReq* pReq = (SVPauseStreamTaskReq*)pMsg; SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId); if (pTask == NULL) { @@ -867,7 +872,6 @@ int32_t tqStreamTaskProcessTaskResetReq(SStreamMeta* pMeta, SRpcMsg* pMsg) { tqDebug("s-task:%s receive task-reset msg from mnode, reset status and ready for data processing", pTask->id.idStr); taosThreadMutexLock(&pTask->lock); - streamTaskClearCheckInfo(pTask, true); // clear flag set during do checkpoint, and open inputQ for all upstream tasks @@ -882,9 +886,10 @@ int32_t tqStreamTaskProcessTaskResetReq(SStreamMeta* pMeta, SRpcMsg* pMsg) { streamTaskSetStatusReady(pTask); } else if (pState->state == TASK_STATUS__UNINIT) { - tqDebug("s-task:%s start task by checking downstream tasks", pTask->id.idStr); - ASSERT(pTask->status.downstreamReady == 0); - tqStreamTaskRestoreCheckpoint(pMeta, pTask->id.streamId, pTask->id.taskId); +// tqDebug("s-task:%s start task by checking downstream tasks", pTask->id.idStr); +// ASSERT(pTask->status.downstreamReady == 0); +// tqStreamTaskRestoreCheckpoint(pMeta, pTask->id.streamId, pTask->id.taskId); + tqDebug("s-task:%s status:%s do nothing after receiving reset-task from mnode", pTask->id.idStr, pState->name); } else { tqDebug("s-task:%s status:%s do nothing after receiving reset-task from mnode", pTask->id.idStr, pState->name); } @@ -1111,6 +1116,8 @@ int32_t tqStreamProcessReqCheckpointRsp(SStreamMeta* pMeta, SRpcMsg* pMsg) { ret int32_t tqStreamProcessChkptReportRsp(SStreamMeta* pMeta, SRpcMsg* pMsg) { return doProcessDummyRspMsg(pMeta, pMsg); } +int32_t tqStreamProcessConsensusChkptRsp2(SStreamMeta* pMeta, SRpcMsg* pMsg) { return doProcessDummyRspMsg(pMeta, pMsg); } + int32_t tqStreamProcessCheckpointReadyRsp(SStreamMeta* pMeta, SRpcMsg* pMsg) { SMStreamCheckpointReadyRspMsg* pRsp = pMsg->pCont; @@ -1126,22 +1133,21 @@ int32_t tqStreamProcessCheckpointReadyRsp(SStreamMeta* pMeta, SRpcMsg* pMsg) { return TSDB_CODE_SUCCESS; } -int32_t tqStreamProcessConsensusChkptRsp(SStreamMeta* pMeta, SRpcMsg* pMsg) { +int32_t tqStreamTaskProcessConsenChkptIdReq(SStreamMeta* pMeta, SRpcMsg* pMsg) { int32_t vgId = pMeta->vgId; + int32_t code = 0; + char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); int32_t len = pMsg->contLen - sizeof(SMsgHead); - SRpcMsg rsp = {.info = pMsg->info, .code = TSDB_CODE_SUCCESS}; int64_t now = taosGetTimestampMs(); - SRestoreCheckpointInfoRsp req = {0}; + SRestoreCheckpointInfo req = {0}; SDecoder decoder; tDecoderInit(&decoder, (uint8_t*)msg, len); - rsp.info.handle = NULL; - if (tDecodeRestoreCheckpointInfoRsp(&decoder, &req) < 0) { - // rsp.code = TSDB_CODE_MSG_DECODE_ERROR; // disable it temporarily - tqError("vgId:%d failed to decode restore task checkpointId, code:%s", vgId, tstrerror(rsp.code)); + if (tDecodeRestoreCheckpointInfo(&decoder, &req) < 0) { + tqError("vgId:%d failed to decode set consensus checkpointId req, code:%s", vgId, tstrerror(code)); tDecoderClear(&decoder); return TSDB_CODE_SUCCESS; } @@ -1150,7 +1156,7 @@ int32_t tqStreamProcessConsensusChkptRsp(SStreamMeta* pMeta, SRpcMsg* pMsg) { SStreamTask* pTask = streamMetaAcquireTask(pMeta, req.streamId, req.taskId); if (pTask == NULL) { - tqError("vgId:%d process restore checkpointId req, failed to acquire task:0x%x, it may have been dropped already", + tqError("vgId:%d process set consensus checkpointId req, failed to acquire task:0x%x, it may have been dropped already", pMeta->vgId, req.taskId); streamMetaAddFailedTask(pMeta, req.streamId, req.taskId); return TSDB_CODE_SUCCESS; @@ -1172,16 +1178,25 @@ int32_t tqStreamProcessConsensusChkptRsp(SStreamMeta* pMeta, SRpcMsg* pMsg) { taosThreadMutexLock(&pTask->lock); ASSERT(pTask->chkInfo.checkpointId >= req.checkpointId); + if (pTask->chkInfo.consensusTransId >= req.transId) { + tqDebug("s-task:%s vgId:%d latest consensus transId:%d, expired consensus trans:%d, discard", + pTask->id.idStr, vgId, pTask->chkInfo.consensusTransId, req.transId); + taosThreadMutexUnlock(&pTask->lock); + streamMetaReleaseTask(pMeta, pTask); + return TSDB_CODE_SUCCESS; + } + if (pTask->chkInfo.checkpointId != req.checkpointId) { - tqDebug("s-task:%s vgId:%d update the checkpoint from %" PRId64 " to %" PRId64, pTask->id.idStr, vgId, - pTask->chkInfo.checkpointId, req.checkpointId); + tqDebug("s-task:%s vgId:%d update the checkpoint from %" PRId64 " to %" PRId64" transId:%d", pTask->id.idStr, vgId, + pTask->chkInfo.checkpointId, req.checkpointId, req.transId); pTask->chkInfo.checkpointId = req.checkpointId; tqSetRestoreVersionInfo(pTask); } else { - tqDebug("s-task:%s vgId:%d consensus-checkpointId:%" PRId64 " equals to current checkpointId, no need to update", - pTask->id.idStr, vgId, req.checkpointId); + tqDebug("s-task:%s vgId:%d consensus-checkpointId:%" PRId64 " equals to current id, transId:%d not update", + pTask->id.idStr, vgId, req.checkpointId, req.transId); } + pTask->chkInfo.consensusTransId = req.transId; taosThreadMutexUnlock(&pTask->lock); if (pMeta->role == NODE_ROLE_LEADER) { diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 9006d36e65..fb09a46e75 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -630,6 +630,11 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg goto _err; } } break; + case TDMT_STREAM_CONSEN_CHKPT: { + if (pVnode->restored) { + tqProcessTaskConsenChkptIdReq(pVnode->pTq, pMsg); + } + } break; case TDMT_STREAM_TASK_PAUSE: { if (pVnode->restored && vnodeIsLeader(pVnode) && tqProcessTaskPauseReq(pVnode->pTq, ver, pMsg->pCont, pMsg->contLen) < 0) { @@ -647,11 +652,6 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg tqProcessTaskResetReq(pVnode->pTq, pMsg); } } break; - case TDMT_MND_STREAM_CHKPT_CONSEN_RSP: { - if (pVnode->restored) { - tqProcessTaskConsensusChkptRsp(pVnode->pTq, pMsg); - } - } break; case TDMT_VND_ALTER_CONFIRM: needCommit = pVnode->config.hashChange; if (vnodeProcessAlterConfirmReq(pVnode, ver, pReq, len, pRsp) < 0) { @@ -738,7 +738,7 @@ int32_t vnodePreprocessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg) { return qWorkerPreprocessQueryMsg(pVnode->pQuery, pMsg, TDMT_SCH_QUERY == pMsg->msgType); } -int32_t vnodeProcessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo* pInfo) { +int32_t vnodeProcessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) { vTrace("message in vnode query queue is processing"); if ((pMsg->msgType == TDMT_SCH_QUERY || pMsg->msgType == TDMT_VND_TMQ_CONSUME || pMsg->msgType == TDMT_VND_TMQ_CONSUME_PUSH) && @@ -1978,6 +1978,9 @@ _exit: return code; } +extern int32_t tsdbDisableAndCancelAllBgTask(STsdb *pTsdb); +extern int32_t tsdbEnableBgTask(STsdb *pTsdb); + static int32_t vnodeProcessAlterConfigReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp) { bool walChanged = false; bool tsdbChanged = false; @@ -2075,7 +2078,14 @@ static int32_t vnodeProcessAlterConfigReq(SVnode *pVnode, int64_t ver, void *pRe } if (req.sttTrigger != -1 && req.sttTrigger != pVnode->config.sttTrigger) { - pVnode->config.sttTrigger = req.sttTrigger; + if (req.sttTrigger > 1 && pVnode->config.sttTrigger > 1) { + pVnode->config.sttTrigger = req.sttTrigger; + } else { + vnodeAWait(&pVnode->commitTask); + tsdbDisableAndCancelAllBgTask(pVnode->pTsdb); + pVnode->config.sttTrigger = req.sttTrigger; + tsdbEnableBgTask(pVnode->pTsdb); + } } if (req.minRows != -1 && req.minRows != pVnode->config.tsdbCfg.minRows) { diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 490f6b86fa..ce4915ca4d 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -2187,6 +2187,17 @@ static void rebuildDeleteBlockData(SSDataBlock* pBlock, STimeWindow* pWindow, co taosMemoryFree(p); } +static int32_t colIdComparFn(const void* param1, const void * param2) { + int32_t p1 = *(int32_t*) param1; + int32_t p2 = *(int32_t*) param2; + + if (p1 == p2) { + return 0; + } else { + return (p1 < p2)? -1:1; + } +} + static int32_t setBlockIntoRes(SStreamScanInfo* pInfo, const SSDataBlock* pBlock, STimeWindow* pTimeWindow, bool filter) { SDataBlockInfo* pBlockInfo = &pInfo->pRes->info; SOperatorInfo* pOperator = pInfo->pStreamScanOp; @@ -2203,6 +2214,8 @@ static int32_t setBlockIntoRes(SStreamScanInfo* pInfo, const SSDataBlock* pBlock STableScanInfo* pTableScanInfo = pInfo->pTableScanOp->info; pBlockInfo->id.groupId = tableListGetTableGroupId(pTableScanInfo->base.pTableListInfo, pBlock->info.id.uid); + SArray* pColList = taosArrayInit(4, sizeof(int32_t)); + // todo extract method for (int32_t i = 0; i < taosArrayGetSize(pInfo->matchInfo.pList); ++i) { SColMatchItem* pColMatchInfo = taosArrayGet(pInfo->matchInfo.pList, i); @@ -2217,6 +2230,7 @@ static int32_t setBlockIntoRes(SStreamScanInfo* pInfo, const SSDataBlock* pBlock SColumnInfoData* pDst = taosArrayGet(pInfo->pRes->pDataBlock, pColMatchInfo->dstSlotId); colDataAssign(pDst, pResCol, pBlock->info.rows, &pInfo->pRes->info); colExists = true; + taosArrayPush(pColList, &pColMatchInfo->dstSlotId); break; } } @@ -2225,6 +2239,7 @@ static int32_t setBlockIntoRes(SStreamScanInfo* pInfo, const SSDataBlock* pBlock if (!colExists) { SColumnInfoData* pDst = taosArrayGet(pInfo->pRes->pDataBlock, pColMatchInfo->dstSlotId); colDataSetNNULL(pDst, 0, pBlockInfo->rows); + taosArrayPush(pColList, &pColMatchInfo->dstSlotId); } } @@ -2240,8 +2255,35 @@ static int32_t setBlockIntoRes(SStreamScanInfo* pInfo, const SSDataBlock* pBlock // reset the error code. terrno = 0; + + for(int32_t i = 0; i < pInfo->numOfPseudoExpr; ++i) { + taosArrayPush(pColList, &pInfo->pPseudoExpr[i].base.resSchema.slotId); + } } + taosArraySort(pColList, colIdComparFn); + + int32_t i = 0, j = 0; + while(i < taosArrayGetSize(pColList)) { + int32_t slot1 = *(int32_t*)taosArrayGet(pColList, i); + if (slot1 > j) { + SColumnInfoData* pDst = taosArrayGet(pInfo->pRes->pDataBlock, j); + colDataSetNNULL(pDst, 0, pBlockInfo->rows); + j += 1; + } else { + i += 1; + j += 1; + } + } + + while(j < taosArrayGetSize(pInfo->pRes->pDataBlock)) { + SColumnInfoData* pDst = taosArrayGet(pInfo->pRes->pDataBlock, j); + colDataSetNNULL(pDst, 0, pBlockInfo->rows); + j += 1; + } + + taosArrayDestroy(pColList); + if (filter) { doFilter(pInfo->pRes, pOperator->exprSupp.pFilterInfo, NULL); } diff --git a/source/libs/monitor/src/monMain.c b/source/libs/monitor/src/monMain.c index 21c196872c..3389780916 100644 --- a/source/libs/monitor/src/monMain.c +++ b/source/libs/monitor/src/monMain.c @@ -568,6 +568,25 @@ void monSendReport(SMonInfo *pMonitor){ } } +void monSendReportBasic(SMonInfo *pMonitor) { + char *pCont = tjsonToString(pMonitor->pJson); + if (tsMonitorLogProtocol) { + if (pCont != NULL) { + uInfoL("report cont basic:\n%s", pCont); + } else { + uInfo("report cont basic is null"); + } + } + if (pCont != NULL) { + EHttpCompFlag flag = tsMonitor.cfg.comp ? HTTP_GZIP : HTTP_FLAT; + if (taosSendHttpReport(tsMonitor.cfg.server, tsMonFwBasicUri, tsMonitor.cfg.port, pCont, strlen(pCont), flag) != + 0) { + uError("failed to send monitor msg"); + } + taosMemoryFree(pCont); + } +} + void monGenAndSendReport() { SMonInfo *pMonitor = monCreateMonitorInfo(); if (pMonitor == NULL) return; @@ -595,38 +614,11 @@ void monGenAndSendReport() { monGenVnodeRoleTable(pMonitor); monSendPromReport(); - } - - monCleanupMonitorInfo(pMonitor); -} - -void monSendReportBasic(SMonInfo *pMonitor){ - char *pCont = tjsonToString(pMonitor->pJson); - if(tsMonitorLogProtocol){ - if(pCont != NULL){ - uInfoL("report cont basic:\n%s", pCont); + if (pMonitor->mmInfo.cluster.first_ep_dnode_id != 0) { + monGenBasicJsonBasic(pMonitor); + monGenClusterJsonBasic(pMonitor); + monSendReportBasic(pMonitor); } - else{ - uInfo("report cont basic is null"); - } - } - if (pCont != NULL) { - EHttpCompFlag flag = tsMonitor.cfg.comp ? HTTP_GZIP : HTTP_FLAT; - if (taosSendHttpReport(tsMonitor.cfg.server, tsMonFwBasicUri, tsMonitor.cfg.port, pCont, strlen(pCont), flag) != 0) { - uError("failed to send monitor msg"); - } - taosMemoryFree(pCont); - } -} - -void monGenAndSendReportBasic() { - SMonInfo *pMonitor = monCreateMonitorInfo(); - - monGenBasicJsonBasic(pMonitor); - monGenClusterJsonBasic(pMonitor); - - if (pMonitor->mmInfo.cluster.first_ep_dnode_id != 0) { - monSendReportBasic(pMonitor); } monCleanupMonitorInfo(pMonitor); diff --git a/source/libs/parser/src/parInsertSql.c b/source/libs/parser/src/parInsertSql.c index 0289469221..313d9449d2 100644 --- a/source/libs/parser/src/parInsertSql.c +++ b/source/libs/parser/src/parInsertSql.c @@ -30,6 +30,7 @@ typedef struct SInsertParseContext { bool forceUpdate; bool needTableTagVal; bool needRequest; // whether or not request server + bool isStmtBind; // whether is stmt bind } SInsertParseContext; typedef int32_t (*_row_append_fn_t)(SMsgBuf* pMsgBuf, const void* value, int32_t len, void* param); @@ -1978,7 +1979,6 @@ static int32_t parseOneStbRow(SInsertParseContext* pCxt, SVnodeModifyOpStmt* pSt static int parseOneRow(SInsertParseContext* pCxt, const char** pSql, STableDataCxt* pTableCxt, bool* pGotRow, SToken* pToken) { SBoundColInfo* pCols = &pTableCxt->boundColsInfo; - bool isParseBindParam = false; SSchema* pSchemas = getTableColumnSchema(pTableCxt->pMeta); int32_t code = TSDB_CODE_SUCCESS; @@ -1996,7 +1996,7 @@ static int parseOneRow(SInsertParseContext* pCxt, const char** pSql, STableDataC SColVal* pVal = taosArrayGet(pTableCxt->pValues, pCols->pColIndex[i]); if (pToken->type == TK_NK_QUESTION) { - isParseBindParam = true; + pCxt->isStmtBind = true; if (NULL == pCxt->pComCxt->pStmtCb) { code = buildSyntaxErrMsg(&pCxt->msg, "? only used in stmt", pToken->z); break; @@ -2007,8 +2007,8 @@ static int parseOneRow(SInsertParseContext* pCxt, const char** pSql, STableDataC break; } - if (isParseBindParam) { - code = buildInvalidOperationMsg(&pCxt->msg, "no mix usage for ? and values"); + if (pCxt->isStmtBind) { + code = buildInvalidOperationMsg(&pCxt->msg, "stmt bind param does not support normal value in sql"); break; } @@ -2025,7 +2025,7 @@ static int parseOneRow(SInsertParseContext* pCxt, const char** pSql, STableDataC } } - if (TSDB_CODE_SUCCESS == code && !isParseBindParam) { + if (TSDB_CODE_SUCCESS == code && !pCxt->isStmtBind) { SRow** pRow = taosArrayReserve(pTableCxt->pData->aRowP, 1); code = tRowBuild(pTableCxt->pValues, pTableCxt->pSchema, pRow); if (TSDB_CODE_SUCCESS == code) { @@ -2035,7 +2035,7 @@ static int parseOneRow(SInsertParseContext* pCxt, const char** pSql, STableDataC } } - if (TSDB_CODE_SUCCESS == code && !isParseBindParam) { + if (TSDB_CODE_SUCCESS == code && !pCxt->isStmtBind) { *pGotRow = true; } @@ -2410,6 +2410,7 @@ static int32_t checkTableClauseFirstToken(SInsertParseContext* pCxt, SVnodeModif } if (TK_NK_QUESTION == pTbName->type) { + pCxt->isStmtBind = true; if (NULL == pCxt->pComCxt->pStmtCb) { return buildSyntaxErrMsg(&pCxt->msg, "? only used in stmt", pTbName->z); } @@ -2443,6 +2444,13 @@ static int32_t checkTableClauseFirstToken(SInsertParseContext* pCxt, SVnodeModif pTbName->n = strlen(tbName); } + if (pCxt->isStmtBind) { + if (TK_NK_ID == pTbName->type || (tbNameAfterDbName != NULL && *(tbNameAfterDbName + 1) != '?')) { + // In SQL statements, the table name has already been specified. + parserWarn("0x%" PRIx64 " table name is specified in sql, ignore the table name in bind param", pCxt->pComCxt->requestId); + } + } + *pHasData = true; return TSDB_CODE_SUCCESS; } @@ -2936,7 +2944,8 @@ int32_t parseInsertSql(SParseContext* pCxt, SQuery** pQuery, SCatalogReq* pCatal .missCache = false, .usingDuplicateTable = false, .needRequest = true, - .forceUpdate = (NULL != pCatalogReq ? pCatalogReq->forceUpdate : false)}; + .forceUpdate = (NULL != pCatalogReq ? pCatalogReq->forceUpdate : false), + .isStmtBind = pCxt->isStmtBind}; int32_t code = initInsertQuery(&context, pCatalogReq, pMetaData, pQuery); if (TSDB_CODE_SUCCESS == code) { diff --git a/source/libs/scheduler/test/schedulerTests.cpp b/source/libs/scheduler/test/schedulerTests.cpp index 6d2215264e..78e876f82c 100644 --- a/source/libs/scheduler/test/schedulerTests.cpp +++ b/source/libs/scheduler/test/schedulerTests.cpp @@ -36,9 +36,9 @@ #include "tdatablock.h" #include "tdef.h" #include "tglobal.h" +#include "tmisce.h" #include "trpc.h" #include "tvariant.h" -#include "tmisce.h" #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wwrite-strings" @@ -54,7 +54,8 @@ namespace { -extern "C" int32_t schHandleResponseMsg(SSchJob *pJob, SSchTask *pTask, int32_t execId, SDataBuf *pMsg, int32_t rspCode); +extern "C" int32_t schHandleResponseMsg(SSchJob *pJob, SSchTask *pTask, int32_t execId, SDataBuf *pMsg, + int32_t rspCode); extern "C" int32_t schHandleCallback(void *param, const SDataBuf *pMsg, int32_t rspCode); int64_t insertJobRefId = 0; @@ -74,8 +75,9 @@ int32_t schtStartFetch = 0; void schtInitLogFile() { const char *defaultLogFileNamePrefix = "taoslog"; const int32_t maxLogFileNum = 10; - + rpcInit(); tsAsyncLog = 0; + rpcInit(); qDebugFlag = 159; strcpy(tsLogDir, TD_LOG_DIR_PATH); @@ -84,12 +86,10 @@ void schtInitLogFile() { } } -void schtQueryCb(SExecResult *pResult, void *param, int32_t code) { - *(int32_t *)param = 1; -} +void schtQueryCb(SExecResult *pResult, void *param, int32_t code) { *(int32_t *)param = 1; } -int32_t schtBuildQueryRspMsg(uint32_t *msize, void** rspMsg) { - SQueryTableRsp rsp = {0}; +int32_t schtBuildQueryRspMsg(uint32_t *msize, void **rspMsg) { + SQueryTableRsp rsp = {0}; rsp.code = 0; rsp.affectedRows = 0; rsp.tbVerInfo = NULL; @@ -99,7 +99,7 @@ int32_t schtBuildQueryRspMsg(uint32_t *msize, void** rspMsg) { qError("tSerializeSQueryTableRsp failed"); return TSDB_CODE_OUT_OF_MEMORY; } - + void *pRsp = taosMemoryCalloc(msgSize, 1); if (NULL == pRsp) { qError("rpcMallocCont %d failed", msgSize); @@ -117,9 +117,8 @@ int32_t schtBuildQueryRspMsg(uint32_t *msize, void** rspMsg) { return TSDB_CODE_SUCCESS; } - -int32_t schtBuildFetchRspMsg(uint32_t *msize, void** rspMsg) { - SRetrieveTableRsp* rsp = (SRetrieveTableRsp*)taosMemoryCalloc(sizeof(SRetrieveTableRsp), 1); +int32_t schtBuildFetchRspMsg(uint32_t *msize, void **rspMsg) { + SRetrieveTableRsp *rsp = (SRetrieveTableRsp *)taosMemoryCalloc(sizeof(SRetrieveTableRsp), 1); rsp->completed = 1; rsp->numOfRows = 10; rsp->compLen = 0; @@ -130,14 +129,14 @@ int32_t schtBuildFetchRspMsg(uint32_t *msize, void** rspMsg) { return TSDB_CODE_SUCCESS; } -int32_t schtBuildSubmitRspMsg(uint32_t *msize, void** rspMsg) { +int32_t schtBuildSubmitRspMsg(uint32_t *msize, void **rspMsg) { SSubmitRsp2 submitRsp = {0}; - int32_t msgSize = 0, ret = 0; - SEncoder ec = {0}; - + int32_t msgSize = 0, ret = 0; + SEncoder ec = {0}; + tEncodeSize(tEncodeSSubmitRsp2, &submitRsp, msgSize, ret); - void* msg = taosMemoryCalloc(1, msgSize); - tEncoderInit(&ec, (uint8_t*)msg, msgSize); + void *msg = taosMemoryCalloc(1, msgSize); + tEncoderInit(&ec, (uint8_t *)msg, msgSize); tEncodeSSubmitRsp2(&ec, &submitRsp); tEncoderClear(&ec); @@ -147,7 +146,6 @@ int32_t schtBuildSubmitRspMsg(uint32_t *msize, void** rspMsg) { return TSDB_CODE_SUCCESS; } - void schtBuildQueryDag(SQueryPlan *dag) { uint64_t qId = schtQueryId; @@ -157,8 +155,8 @@ void schtBuildQueryDag(SQueryPlan *dag) { SNodeListNode *scan = (SNodeListNode *)nodesMakeNode(QUERY_NODE_NODE_LIST); SNodeListNode *merge = (SNodeListNode *)nodesMakeNode(QUERY_NODE_NODE_LIST); - SSubplan *scanPlan = (SSubplan*)nodesMakeNode(QUERY_NODE_PHYSICAL_SUBPLAN); - SSubplan *mergePlan = (SSubplan*)nodesMakeNode(QUERY_NODE_PHYSICAL_SUBPLAN); + SSubplan *scanPlan = (SSubplan *)nodesMakeNode(QUERY_NODE_PHYSICAL_SUBPLAN); + SSubplan *mergePlan = (SSubplan *)nodesMakeNode(QUERY_NODE_PHYSICAL_SUBPLAN); scanPlan->id.queryId = qId; scanPlan->id.groupId = 0x0000000000000002; @@ -210,7 +208,7 @@ void schtBuildQueryFlowCtrlDag(SQueryPlan *dag) { SNodeListNode *scan = (SNodeListNode *)nodesMakeNode(QUERY_NODE_NODE_LIST); SNodeListNode *merge = (SNodeListNode *)nodesMakeNode(QUERY_NODE_NODE_LIST); - SSubplan *mergePlan = (SSubplan*)nodesMakeNode(QUERY_NODE_PHYSICAL_SUBPLAN); + SSubplan *mergePlan = (SSubplan *)nodesMakeNode(QUERY_NODE_PHYSICAL_SUBPLAN); merge->pNodeList = nodesMakeList(); scan->pNodeList = nodesMakeList(); @@ -218,7 +216,7 @@ void schtBuildQueryFlowCtrlDag(SQueryPlan *dag) { mergePlan->pChildren = nodesMakeList(); for (int32_t i = 0; i < scanPlanNum; ++i) { - SSubplan *scanPlan = (SSubplan*)nodesMakeNode(QUERY_NODE_PHYSICAL_SUBPLAN); + SSubplan *scanPlan = (SSubplan *)nodesMakeNode(QUERY_NODE_PHYSICAL_SUBPLAN); scanPlan->id.queryId = qId; scanPlan->id.groupId = 0x0000000000000002; scanPlan->id.subplanId = 0x0000000000000003 + i; @@ -272,7 +270,7 @@ void schtBuildInsertDag(SQueryPlan *dag) { SNodeListNode *inserta = (SNodeListNode *)nodesMakeNode(QUERY_NODE_NODE_LIST); inserta->pNodeList = nodesMakeList(); - SSubplan *insertPlan = (SSubplan*)nodesMakeNode(QUERY_NODE_PHYSICAL_SUBPLAN); + SSubplan *insertPlan = (SSubplan *)nodesMakeNode(QUERY_NODE_PHYSICAL_SUBPLAN); insertPlan->id.queryId = qId; insertPlan->id.groupId = 0x0000000000000003; @@ -287,14 +285,14 @@ void schtBuildInsertDag(SQueryPlan *dag) { insertPlan->pChildren = NULL; insertPlan->pParents = NULL; insertPlan->pNode = NULL; - insertPlan->pDataSink = (SDataSinkNode*)nodesMakeNode(QUERY_NODE_PHYSICAL_PLAN_INSERT); - ((SDataInserterNode*)insertPlan->pDataSink)->size = 1; - ((SDataInserterNode*)insertPlan->pDataSink)->pData = taosMemoryCalloc(1, 1); + insertPlan->pDataSink = (SDataSinkNode *)nodesMakeNode(QUERY_NODE_PHYSICAL_PLAN_INSERT); + ((SDataInserterNode *)insertPlan->pDataSink)->size = 1; + ((SDataInserterNode *)insertPlan->pDataSink)->pData = taosMemoryCalloc(1, 1); insertPlan->msgType = TDMT_VND_SUBMIT; nodesListAppend(inserta->pNodeList, (SNode *)insertPlan); - insertPlan = (SSubplan*)nodesMakeNode(QUERY_NODE_PHYSICAL_SUBPLAN); + insertPlan = (SSubplan *)nodesMakeNode(QUERY_NODE_PHYSICAL_SUBPLAN); insertPlan->id.queryId = qId; insertPlan->id.groupId = 0x0000000000000003; @@ -309,9 +307,9 @@ void schtBuildInsertDag(SQueryPlan *dag) { insertPlan->pChildren = NULL; insertPlan->pParents = NULL; insertPlan->pNode = NULL; - insertPlan->pDataSink = (SDataSinkNode*)nodesMakeNode(QUERY_NODE_PHYSICAL_PLAN_INSERT); - ((SDataInserterNode*)insertPlan->pDataSink)->size = 1; - ((SDataInserterNode*)insertPlan->pDataSink)->pData = taosMemoryCalloc(1, 1); + insertPlan->pDataSink = (SDataSinkNode *)nodesMakeNode(QUERY_NODE_PHYSICAL_PLAN_INSERT); + ((SDataInserterNode *)insertPlan->pDataSink)->size = 1; + ((SDataInserterNode *)insertPlan->pDataSink)->pData = taosMemoryCalloc(1, 1); insertPlan->msgType = TDMT_VND_SUBMIT; nodesListAppend(inserta->pNodeList, (SNode *)insertPlan); @@ -389,7 +387,8 @@ void schtSetRpcSendRequest() { } } -int32_t schtAsyncSendMsgToServer(void *pTransporter, SEpSet *epSet, int64_t *pTransporterId, SMsgSendInfo *pInfo, bool persistHandle, void* rpcCtx) { +int32_t schtAsyncSendMsgToServer(void *pTransporter, SEpSet *epSet, int64_t *pTransporterId, SMsgSendInfo *pInfo, + bool persistHandle, void *rpcCtx) { if (pInfo) { taosMemoryFreeClear(pInfo->param); taosMemoryFreeClear(pInfo->msgInfo.pData); @@ -439,11 +438,11 @@ void *schtSendRsp(void *param) { SSchTask *task = *(SSchTask **)pIter; SDataBuf msg = {0}; - void* rmsg = NULL; + void *rmsg = NULL; schtBuildSubmitRspMsg(&msg.len, &rmsg); msg.msgType = TDMT_VND_SUBMIT_RSP; msg.pData = rmsg; - + schHandleResponseMsg(pJob, task, task->execId, &msg, 0); pIter = taosHashIterate(pJob->execTasks, pIter); @@ -452,7 +451,7 @@ void *schtSendRsp(void *param) { schReleaseJob(job); schtJobDone = true; - + return NULL; } @@ -462,13 +461,13 @@ void *schtCreateFetchRspThread(void *param) { taosSsleep(1); - int32_t code = 0; + int32_t code = 0; SDataBuf msg = {0}; - void* rmsg = NULL; + void *rmsg = NULL; schtBuildFetchRspMsg(&msg.len, &rmsg); msg.msgType = TDMT_SCH_MERGE_FETCH_RSP; msg.pData = rmsg; - + code = schHandleResponseMsg(pJob, pJob->fetchTask, pJob->fetchTask->execId, &msg, 0); schReleaseJob(job); @@ -529,7 +528,7 @@ void *schtRunJobThread(void *aa) { char *dbname = "1.db1"; char *tablename = "table1"; SVgroupInfo vgInfo = {0}; - SQueryPlan* dag = (SQueryPlan*)nodesMakeNode(QUERY_NODE_PHYSICAL_PLAN); + SQueryPlan *dag = (SQueryPlan *)nodesMakeNode(QUERY_NODE_PHYSICAL_PLAN); schtInitLogFile(); @@ -601,7 +600,7 @@ void *schtRunJobThread(void *aa) { param->taskId = task->taskId; SDataBuf msg = {0}; - void* rmsg = NULL; + void *rmsg = NULL; schtBuildQueryRspMsg(&msg.len, &rmsg); msg.msgType = TDMT_SCH_QUERY_RSP; msg.pData = rmsg; @@ -622,7 +621,7 @@ void *schtRunJobThread(void *aa) { param->taskId = task->taskId - 1; SDataBuf msg = {0}; - void* rmsg = NULL; + void *rmsg = NULL; schtBuildQueryRspMsg(&msg.len, &rmsg); msg.msgType = TDMT_SCH_QUERY_RSP; msg.pData = rmsg; @@ -686,7 +685,6 @@ void *schtFreeJobThread(void *aa) { return NULL; } - } // namespace TEST(queryTest, normalCase) { @@ -696,7 +694,7 @@ TEST(queryTest, normalCase) { char *tablename = "table1"; SVgroupInfo vgInfo = {0}; int64_t job = 0; - SQueryPlan* dag = (SQueryPlan*)nodesMakeNode(QUERY_NODE_PHYSICAL_PLAN); + SQueryPlan *dag = (SQueryPlan *)nodesMakeNode(QUERY_NODE_PHYSICAL_PLAN); SArray *qnodeList = taosArrayInit(1, sizeof(SQueryNodeLoad)); @@ -737,13 +735,13 @@ TEST(queryTest, normalCase) { SSchTask *task = *(SSchTask **)pIter; SDataBuf msg = {0}; - void* rmsg = NULL; + void *rmsg = NULL; schtBuildQueryRspMsg(&msg.len, &rmsg); msg.msgType = TDMT_SCH_QUERY_RSP; msg.pData = rmsg; - + code = schHandleResponseMsg(pJob, task, task->execId, &msg, 0); - + ASSERT_EQ(code, 0); pIter = taosHashIterate(pJob->execTasks, pIter); } @@ -753,13 +751,13 @@ TEST(queryTest, normalCase) { SSchTask *task = *(SSchTask **)pIter; if (JOB_TASK_STATUS_EXEC == task->status) { SDataBuf msg = {0}; - void* rmsg = NULL; + void *rmsg = NULL; schtBuildQueryRspMsg(&msg.len, &rmsg); msg.msgType = TDMT_SCH_QUERY_RSP; msg.pData = rmsg; - + code = schHandleResponseMsg(pJob, task, task->execId, &msg, 0); - + ASSERT_EQ(code, 0); } @@ -793,7 +791,7 @@ TEST(queryTest, normalCase) { taosMemoryFreeClear(data); schReleaseJob(job); - + schedulerDestroy(); schedulerFreeJob(&job, 0); @@ -808,7 +806,7 @@ TEST(queryTest, readyFirstCase) { char *tablename = "table1"; SVgroupInfo vgInfo = {0}; int64_t job = 0; - SQueryPlan* dag = (SQueryPlan*)nodesMakeNode(QUERY_NODE_PHYSICAL_PLAN); + SQueryPlan *dag = (SQueryPlan *)nodesMakeNode(QUERY_NODE_PHYSICAL_PLAN); SArray *qnodeList = taosArrayInit(1, sizeof(SQueryNodeLoad)); @@ -816,7 +814,7 @@ TEST(queryTest, readyFirstCase) { load.addr.epSet.numOfEps = 1; strcpy(load.addr.epSet.eps[0].fqdn, "qnode0.ep"); load.addr.epSet.eps[0].port = 6031; - taosArrayPush(qnodeList, &load); + taosArrayPush(qnodeList, &load); int32_t code = schedulerInit(); ASSERT_EQ(code, 0); @@ -848,11 +846,11 @@ TEST(queryTest, readyFirstCase) { SSchTask *task = *(SSchTask **)pIter; SDataBuf msg = {0}; - void* rmsg = NULL; + void *rmsg = NULL; schtBuildQueryRspMsg(&msg.len, &rmsg); msg.msgType = TDMT_SCH_QUERY_RSP; msg.pData = rmsg; - + code = schHandleResponseMsg(pJob, task, task->execId, &msg, 0); ASSERT_EQ(code, 0); @@ -865,13 +863,13 @@ TEST(queryTest, readyFirstCase) { if (JOB_TASK_STATUS_EXEC == task->status) { SDataBuf msg = {0}; - void* rmsg = NULL; + void *rmsg = NULL; schtBuildQueryRspMsg(&msg.len, &rmsg); msg.msgType = TDMT_SCH_QUERY_RSP; msg.pData = rmsg; - + code = schHandleResponseMsg(pJob, task, task->execId, &msg, 0); - + ASSERT_EQ(code, 0); } @@ -919,7 +917,7 @@ TEST(queryTest, flowCtrlCase) { char *tablename = "table1"; SVgroupInfo vgInfo = {0}; int64_t job = 0; - SQueryPlan* dag = (SQueryPlan*)nodesMakeNode(QUERY_NODE_PHYSICAL_PLAN); + SQueryPlan *dag = (SQueryPlan *)nodesMakeNode(QUERY_NODE_PHYSICAL_PLAN); schtInitLogFile(); @@ -933,7 +931,6 @@ TEST(queryTest, flowCtrlCase) { load.addr.epSet.eps[0].port = 6031; taosArrayPush(qnodeList, &load); - int32_t code = schedulerInit(); ASSERT_EQ(code, 0); @@ -968,13 +965,13 @@ TEST(queryTest, flowCtrlCase) { if (JOB_TASK_STATUS_EXEC == task->status && 0 != task->lastMsgType) { SDataBuf msg = {0}; - void* rmsg = NULL; + void *rmsg = NULL; schtBuildQueryRspMsg(&msg.len, &rmsg); msg.msgType = TDMT_SCH_QUERY_RSP; msg.pData = rmsg; - + code = schHandleResponseMsg(pJob, task, task->execId, &msg, 0); - + ASSERT_EQ(code, 0); } @@ -1005,7 +1002,7 @@ TEST(queryTest, flowCtrlCase) { schedulerFreeJob(&job, 0); - taosThreadJoin(thread1, NULL); + taosThreadJoin(thread1, NULL); } TEST(insertTest, normalCase) { @@ -1014,7 +1011,7 @@ TEST(insertTest, normalCase) { char *dbname = "1.db1"; char *tablename = "table1"; SVgroupInfo vgInfo = {0}; - SQueryPlan* dag = (SQueryPlan*)nodesMakeNode(QUERY_NODE_PHYSICAL_PLAN); + SQueryPlan *dag = (SQueryPlan *)nodesMakeNode(QUERY_NODE_PHYSICAL_PLAN); uint64_t numOfRows = 0; SArray *qnodeList = taosArrayInit(1, sizeof(SQueryNodeLoad)); @@ -1067,7 +1064,7 @@ TEST(insertTest, normalCase) { schedulerDestroy(); - taosThreadJoin(thread1, NULL); + taosThreadJoin(thread1, NULL); } TEST(multiThread, forceFree) { @@ -1076,7 +1073,7 @@ TEST(multiThread, forceFree) { TdThread thread1, thread2, thread3; taosThreadCreate(&(thread1), &thattr, schtRunJobThread, NULL); -// taosThreadCreate(&(thread2), &thattr, schtFreeJobThread, NULL); + // taosThreadCreate(&(thread2), &thattr, schtFreeJobThread, NULL); taosThreadCreate(&(thread3), &thattr, schtFetchRspThread, NULL); while (true) { @@ -1089,7 +1086,7 @@ TEST(multiThread, forceFree) { } schtTestStop = true; - //taosSsleep(3); + // taosSsleep(3); } TEST(otherTest, otherCase) { @@ -1097,12 +1094,13 @@ TEST(otherTest, otherCase) { schReleaseJob(0); schFreeRpcCtx(NULL); - ASSERT_EQ(schDumpEpSet(NULL), (char*)NULL); + ASSERT_EQ(schDumpEpSet(NULL), (char *)NULL); ASSERT_EQ(strcmp(schGetOpStr(SCH_OP_NULL), "NULL"), 0); ASSERT_EQ(strcmp(schGetOpStr((SCH_OP_TYPE)100), "UNKNOWN"), 0); } int main(int argc, char **argv) { + schtInitLogFile(); taosSeedRand(taosGetTimestampSec()); testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); diff --git a/source/libs/stream/inc/streamBackendRocksdb.h b/source/libs/stream/inc/streamBackendRocksdb.h index 6b81ac87ee..f0647f44a3 100644 --- a/source/libs/stream/inc/streamBackendRocksdb.h +++ b/source/libs/stream/inc/streamBackendRocksdb.h @@ -131,20 +131,21 @@ typedef struct { TdThreadRwlock rwLock; } SBkdMgt; -bool streamBackendDataIsExist(const char* path, int64_t chkpId, int32_t vgId); +#define META_ON_S3_FORMATE "%s_%" PRId64 "\n%s_%" PRId64 "\n%s_%" PRId64 "" + +bool streamBackendDataIsExist(const char* path, int64_t chkpId); void* streamBackendInit(const char* path, int64_t chkpId, int32_t vgId); void streamBackendCleanup(void* arg); void streamBackendHandleCleanup(void* arg); int32_t streamBackendLoadCheckpointInfo(void* pMeta); -int32_t streamBackendDoCheckpoint(void* pMeta, int64_t checkpointId); +int32_t streamBackendDoCheckpoint(void* pMeta, int64_t checkpointId, int64_t processver); SListNode* streamBackendAddCompare(void* backend, void* arg); void streamBackendDelCompare(void* backend, void* arg); int32_t streamStateCvtDataFormat(char* path, char* key, void* cfInst); -STaskDbWrapper* taskDbOpen(const char* path, const char* key, int64_t chkptId); +STaskDbWrapper* taskDbOpen(const char* path, const char* key, int64_t chkptId, int64_t* processVer); void taskDbDestroy(void* pBackend, bool flush); void taskDbDestroy2(void* pBackend); -int32_t taskDbDoCheckpoint(void* arg, int64_t chkpId); void taskDbUpdateChkpId(void* pTaskDb, int64_t chkpId); @@ -249,7 +250,7 @@ int32_t streamBackendDelInUseChkp(void* arg, int64_t chkpId); int32_t taskDbBuildSnap(void* arg, SArray* pSnap); int32_t taskDbDestroySnap(void* arg, SArray* pSnapInfo); -int32_t taskDbDoCheckpoint(void* arg, int64_t chkpId); +int32_t taskDbDoCheckpoint(void* arg, int64_t chkpId, int64_t processId); SBkdMgt* bkdMgtCreate(char* path); int32_t bkdMgtAddChkp(SBkdMgt* bm, char* task, char* path); @@ -259,6 +260,7 @@ void bkdMgtDestroy(SBkdMgt* bm); int32_t taskDbGenChkpUploadData(void* arg, void* bkdMgt, int64_t chkpId, int8_t type, char** path, SArray* list, const char* id); +int32_t remoteChkpGetDelFile(char* path, SArray* toDel); void* taskAcquireDb(int64_t refId); void taskReleaseDb(int64_t refId); diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index f5c0ac57df..8b87019ee0 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -150,6 +150,9 @@ static rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const cha void taskDbRefChkp(STaskDbWrapper* pTaskDb, int64_t chkp); void taskDbUnRefChkp(STaskDbWrapper* pTaskDb, int64_t chkp); +int32_t chkpAddExtraInfo(char* pChkpIdDir, int64_t chkpId, int64_t processId); +int32_t chkpLoadExtraInfo(char* pChkpIdDir, int64_t* chkpId, int64_t* processId); + #define GEN_COLUMN_FAMILY_NAME(name, idstr, SUFFIX) sprintf(name, "%s_%s", idstr, (SUFFIX)); int32_t copyFiles(const char* src, const char* dst); uint32_t nextPow2(uint32_t x); @@ -194,28 +197,50 @@ int32_t getCfIdx(const char* cfName) { return idx; } -bool isValidCheckpoint(const char* dir) { return true; } +bool isValidCheckpoint(const char* dir) { + // not implement yet + return true; +} +/* + *copy pChkpIdDir's file to state dir + */ int32_t rebuildDirFromCheckpoint(const char* path, int64_t chkpId, char** dst) { // impl later int32_t code = 0; + int32_t cap = strlen(path) + 64; + int32_t nBytes = 0; + + char* state = taosMemoryCalloc(1, cap); + if (state == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + nBytes = snprintf(state, cap, "%s%s%s", path, TD_DIRSEP, "state"); + if (nBytes <= 0 || nBytes >= cap) { + taosMemoryFree(state); + return TSDB_CODE_OUT_OF_RANGE; + } - /*param@1: checkpointId dir - param@2: state - copy pChkpIdDir's file to state dir - opt to set hard link to previous file - */ - char* state = taosMemoryCalloc(1, strlen(path) + 32); - sprintf(state, "%s%s%s", path, TD_DIRSEP, "state"); if (chkpId != 0) { - char* chkp = taosMemoryCalloc(1, strlen(path) + 64); - sprintf(chkp, "%s%s%s%scheckpoint%" PRId64 "", path, TD_DIRSEP, "checkpoints", TD_DIRSEP, chkpId); + char* chkp = taosMemoryCalloc(1, cap); + if (chkp == NULL) { + taosMemoryFree(state); + return TSDB_CODE_OUT_OF_MEMORY; + } + + nBytes = snprintf(chkp, cap, "%s%s%s%scheckpoint%" PRId64 "", path, TD_DIRSEP, "checkpoints", TD_DIRSEP, chkpId); + if (nBytes <= 0 || nBytes >= cap) { + taosMemoryFree(state); + taosMemoryFree(chkp); + return TSDB_CODE_OUT_OF_RANGE; + } + if (taosIsDir(chkp) && isValidCheckpoint(chkp)) { cleanDir(state, ""); code = backendCopyFiles(chkp, state); - stInfo("copy snap file from %s to %s", chkp, state); if (code != 0) { - stError("failed to restart stream backend from %s, reason: %s", chkp, tstrerror(TAOS_SYSTEM_ERROR(errno))); + stError("failed to restart stream backend from %s, reason: %s", chkp, tstrerror(TAOS_SYSTEM_ERROR(code))); } else { stInfo("start to restart stream backend at checkpoint path: %s", chkp); } @@ -223,101 +248,175 @@ int32_t rebuildDirFromCheckpoint(const char* path, int64_t chkpId, char** dst) { } else { stError("failed to start stream backend at %s, reason: %s, restart from default state dir:%s", chkp, tstrerror(TAOS_SYSTEM_ERROR(errno)), state); - taosMkDir(state); + code = taosMkDir(state); + if (code != 0) { + code = TAOS_SYSTEM_ERROR(errno); + } } taosMemoryFree(chkp); } *dst = state; - return 0; + return code; } -int32_t remoteChkp_readMetaData(char* path, SArray* list) { - char* metaPath = taosMemoryCalloc(1, strlen(path)); - sprintf(metaPath, "%s%s%s", path, TD_DIRSEP, "META"); +typedef struct { + char pCurrName[24]; + int64_t currChkptId; - TdFilePtr pFile = taosOpenFile(path, TD_FILE_READ); - if (pFile == NULL) { - return -1; + char pManifestName[24]; + int64_t manifestChkptId; + + char processName[24]; + int64_t processId; +} SSChkpMetaOnS3; + +int32_t remoteChkp_readMetaData(char* path, SSChkpMetaOnS3** pMeta) { + int32_t code = 0; + int32_t cap = strlen(path) + 32; + TdFilePtr pFile = NULL; + + char* metaPath = taosMemoryCalloc(1, cap); + if (metaPath == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; } - char buf[128] = {0}; - if (taosReadFile(pFile, buf, sizeof(buf)) <= 0) { + int32_t n = snprintf(metaPath, cap, "%s%s%s", path, TD_DIRSEP, "META"); + if (n <= 0 || n >= cap) { taosMemoryFree(metaPath); - taosCloseFile(&pFile); - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } - int32_t len = strnlen(buf, tListLen(buf)); - for (int i = 0; i < len; i++) { - if (buf[i] == '\n') { - char* item = taosMemoryCalloc(1, i + 1); - memcpy(item, buf, i); - taosArrayPush(list, &item); - - item = taosMemoryCalloc(1, len - i); - memcpy(item, buf + i + 1, len - i - 1); - taosArrayPush(list, &item); - } + pFile = taosOpenFile(path, TD_FILE_READ); + if (pFile == NULL) { + code = TAOS_SYSTEM_ERROR(errno); + goto _EXIT; } + char buf[256] = {0}; + if (taosReadFile(pFile, buf, sizeof(buf)) <= 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _EXIT; + } + + SSChkpMetaOnS3* p = taosMemoryCalloc(1, sizeof(SSChkpMetaOnS3)); + if (p == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _EXIT; + } + n = sscanf(buf, META_ON_S3_FORMATE, p->pCurrName, &p->currChkptId, p->pManifestName, &p->manifestChkptId, + p->processName, &p->processId); + if (n != 6) { + code = TSDB_CODE_INVALID_MSG; + taosMemoryFree(p); + goto _EXIT; + } + + if (p->currChkptId != p->manifestChkptId) { + code = TSDB_CODE_INVALID_MSG; + taosMemoryFree(p); + goto _EXIT; + } + *pMeta = p; + code = 0; +_EXIT: taosCloseFile(&pFile); taosMemoryFree(metaPath); - return 0; + return code; } -int32_t remoteChkp_validMetaFile(char* name, char* prename, int64_t chkpId) { - int8_t valid = 0; - for (int i = 0; i < strlen(name); i++) { - if (name[i] == '_') { - memcpy(prename, name, i); - if (taosStr2int64(name + i + 1) != chkpId) { - break; - } else { - valid = 1; - } - } + +int32_t remoteChkp_validAndCvtMeta(char* path, SSChkpMetaOnS3* pMeta, int64_t chkpId) { + int32_t code = 0; + int32_t nBytes = 0; + + int32_t cap = strlen(path) + 64; + char* src = taosMemoryCalloc(1, cap); + char* dst = taosMemoryCalloc(1, cap); + if (src == NULL || dst == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _EXIT; } - return valid; -} -int32_t remoteChkp_validAndCvtMeta(char* path, SArray* list, int64_t chkpId) { - int32_t complete = 1; - int32_t len = strlen(path) + 32; - char* src = taosMemoryCalloc(1, len); - char* dst = taosMemoryCalloc(1, len); - int8_t count = 0; - for (int i = 0; i < taosArrayGetSize(list); i++) { - char* p = taosArrayGetP(list, i); - sprintf(src, "%s%s%s", path, TD_DIRSEP, p); + if (pMeta->currChkptId != chkpId || pMeta->manifestChkptId != chkpId) { + code = TSDB_CODE_INVALID_CFG; + goto _EXIT; + } + // rename current_chkp/mainfest to current + for (int i = 0; i < 2; i++) { + char* key = (i == 0 ? pMeta->pCurrName : pMeta->pManifestName); + if (strlen(key) <= 0) { + code = TSDB_CODE_INVALID_PARA; + goto _EXIT; + } + + nBytes = snprintf(src, cap, "%s%s%s_%" PRId64 "", path, TD_DIRSEP, key, pMeta->currChkptId); + if (nBytes <= 0 || nBytes >= cap) { + code = TSDB_CODE_OUT_OF_RANGE; + goto _EXIT; + } - // check file exist if (taosStatFile(src, NULL, NULL, NULL) != 0) { - complete = 0; - break; + code = TAOS_SYSTEM_ERROR(errno); + goto _EXIT; } - // check file name - char temp[64] = {0}; - if (remoteChkp_validMetaFile(p, temp, chkpId)) { - count++; + nBytes = snprintf(dst, cap, "%s%s%s", path, TD_DIRSEP, key); + if (nBytes <= 0 || nBytes >= cap) { + code = TSDB_CODE_OUT_OF_RANGE; + goto _EXIT; } - // rename file - sprintf(dst, "%s%s%s", path, TD_DIRSEP, temp); - taosRenameFile(src, dst); + if (taosRenameFile(src, dst) != 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _EXIT; + } - memset(src, 0, len); - memset(dst, 0, len); - } - if (count != taosArrayGetSize(list)) { - complete = 0; + memset(src, 0, cap); + memset(dst, 0, cap); } + code = 0; +// rename manifest_chkp to manifest +_EXIT: taosMemoryFree(src); taosMemoryFree(dst); + return code; +} +int32_t remoteChkpGetDelFile(char* path, SArray* toDel) { + int32_t code = 0; + int32_t nBytes = 0; - return complete == 1 ? 0 : -1; + SSChkpMetaOnS3* pMeta = NULL; + code = remoteChkp_readMetaData(path, &pMeta); + if (code != 0) { + return code; + } + + for (int i = 0; i < 2; i++) { + char* key = (i == 0 ? pMeta->pCurrName : pMeta->pManifestName); + + int32_t cap = strlen(key) + 32; + char* p = taosMemoryCalloc(1, cap); + if (p == NULL) { + taosMemoryFree(pMeta); + return TSDB_CODE_OUT_OF_MEMORY; + } + + nBytes = snprintf(p, cap, "%s_%" PRId64 "", key, pMeta->currChkptId); + if (nBytes <= 0 || nBytes >= cap) { + taosMemoryFree(pMeta); + taosMemoryFree(p); + return TSDB_CODE_OUT_OF_RANGE; + } + if (taosArrayPush(toDel, &p) == NULL) { + taosMemoryFree(pMeta); + taosMemoryFree(p); + return TSDB_CODE_OUT_OF_MEMORY; + } + } + + return 0; } void cleanDir(const char* pPath, const char* id) { @@ -330,73 +429,126 @@ void cleanDir(const char* pPath, const char* id) { } } -void validateDir(const char* pPath) { +int32_t createDirIfNotExist(const char* pPath) { if (!taosIsDir(pPath)) { - taosMulMkDir(pPath); + return taosMulMkDir(pPath); + } else { + return 0; } } -int32_t rebuildFromRemoteChkp_rsync(const char* key, char* chkptPath, int64_t checkpointId, char* defaultPath) { +int32_t rebuildFromRemoteChkp_rsync(const char* key, char* checkpointPath, int64_t checkpointId, char* defaultPath) { int32_t code = 0; - if (taosIsDir(chkptPath)) { - taosRemoveDir(chkptPath); - stDebug("remove local checkpoint data dir:%s succ", chkptPath); + if (taosIsDir(checkpointPath)) { + taosRemoveDir(checkpointPath); + stDebug("remove local checkpoint data dir:%s succ", checkpointPath); } cleanDir(defaultPath, key); stDebug("clear local default dir before downloading checkpoint data:%s succ", defaultPath); - code = streamTaskDownloadCheckpointData(key, chkptPath); + code = streamTaskDownloadCheckpointData(key, checkpointPath); if (code != 0) { stError("failed to download checkpoint data:%s", key); return code; } stDebug("download remote checkpoint data for checkpointId:%" PRId64 ", %s", checkpointId, key); - return backendCopyFiles(chkptPath, defaultPath); + return backendCopyFiles(checkpointPath, defaultPath); +} + +int32_t rebuildDataFromS3(char* chkpPath, int64_t chkpId) { + SSChkpMetaOnS3* pMeta = NULL; + + int32_t code = remoteChkp_readMetaData(chkpPath, &pMeta); + if (code != 0) { + return code; + } + + if (pMeta->currChkptId != chkpId || pMeta->manifestChkptId != chkpId) { + taosMemoryFree(pMeta); + return TSDB_CODE_INVALID_PARA; + } + + code = remoteChkp_validAndCvtMeta(chkpPath, pMeta, chkpId); + if (code != 0) { + taosMemoryFree(pMeta); + return code; + } + taosMemoryFree(pMeta); + + return chkpAddExtraInfo(chkpPath, chkpId, pMeta->processId); } int32_t rebuildFromRemoteChkp_s3(const char* key, char* chkpPath, int64_t chkpId, char* defaultPath) { + int8_t rename = 0; int32_t code = streamTaskDownloadCheckpointData(key, chkpPath); if (code != 0) { return code; } - int32_t len = strlen(defaultPath) + 32; - char* tmp = taosMemoryCalloc(1, len); - sprintf(tmp, "%s%s", defaultPath, "_tmp"); - if (taosIsDir(tmp)) taosRemoveDir(tmp); - if (taosIsDir(defaultPath)) taosRenameFile(defaultPath, tmp); + int32_t cap = strlen(defaultPath) + 32; - SArray* list = taosArrayInit(2, sizeof(void*)); - code = remoteChkp_readMetaData(chkpPath, list); - if (code == 0) { - code = remoteChkp_validAndCvtMeta(chkpPath, list, chkpId); - } - taosArrayDestroyP(list, taosMemoryFree); - - if (code == 0) { - taosMkDir(defaultPath); - code = backendCopyFiles(chkpPath, defaultPath); + char* defaultTmp = taosMemoryCalloc(1, cap); + if (defaultTmp == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; } - if (code != 0) { - if (taosIsDir(defaultPath)) taosRemoveDir(defaultPath); - if (taosIsDir(tmp)) taosRenameFile(tmp, defaultPath); + int32_t nBytes = snprintf(defaultPath, cap, "%s%s", defaultPath, "_tmp"); + if (nBytes <= 0 || nBytes >= cap) { + taosMemoryFree(defaultPath); + return TSDB_CODE_OUT_OF_RANGE; + } + + if (taosIsDir(defaultTmp)) taosRemoveDir(defaultTmp); + if (taosIsDir(defaultPath)) { + code = taosRenameFile(defaultPath, defaultTmp); + if (code != 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _EXIT; + } else { + rename = 1; + } } else { - taosRemoveDir(tmp); + code = taosMkDir(defaultPath); + if (code != 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _EXIT; + } } - taosMemoryFree(tmp); + code = rebuildDataFromS3(chkpPath, chkpId); + if (code != 0) { + goto _EXIT; + } + + code = backendCopyFiles(chkpPath, defaultPath); + if (code != 0) { + goto _EXIT; + } + code = 0; + +_EXIT: + if (code != 0) { + if (rename) { + taosRenameFile(defaultTmp, defaultPath); + } + } + + if (taosIsDir(defaultPath)) { + taosRemoveDir(defaultPath); + } + + taosMemoryFree(defaultTmp); return code; } -int32_t rebuildFromRemoteCheckpoint(const char* key, char* chkptPath, int64_t checkpointId, char* defaultPath) { +int32_t rebuildFromRemoteCheckpoint(const char* key, char* checkpointPath, int64_t checkpointId, char* defaultPath) { ECHECKPOINT_BACKUP_TYPE type = streamGetCheckpointBackupType(); if (type == DATA_UPLOAD_S3) { - return rebuildFromRemoteChkp_s3(key, chkptPath, checkpointId, defaultPath); + return rebuildFromRemoteChkp_s3(key, checkpointPath, checkpointId, defaultPath); } else if (type == DATA_UPLOAD_RSYNC) { - return rebuildFromRemoteChkp_rsync(key, chkptPath, checkpointId, defaultPath); + return rebuildFromRemoteChkp_rsync(key, checkpointPath, checkpointId, defaultPath); } else { stError("%s no remote backup checkpoint data for:%" PRId64, key, checkpointId); } @@ -423,46 +575,70 @@ int32_t backendFileCopyFilesImpl(const char* src, const char* dst) { const char* current = "CURRENT"; size_t currLen = strlen(current); + const char* info = "info"; + size_t infoLen = strlen(info); + int32_t code = 0; int32_t sLen = strlen(src); int32_t dLen = strlen(dst); - char* srcName = taosMemoryCalloc(1, sLen + 64); - char* dstName = taosMemoryCalloc(1, dLen + 64); + int32_t cap = TMAX(sLen, dLen) + 64; + int32_t nBytes = 0; + + char* srcName = taosMemoryCalloc(1, cap); + char* dstName = taosMemoryCalloc(1, cap); + if (srcName == NULL || dstName == NULL) { + taosMemoryFree(srcName); + taosMemoryFree(dstName); + code = TSDB_CODE_OUT_OF_MEMORY; + return code; + } // copy file to dst TdDirPtr pDir = taosOpenDir(src); if (pDir == NULL) { - taosMemoryFree(srcName); - taosMemoryFree(dstName); code = TAOS_SYSTEM_ERROR(errno); - - errno = 0; - return code; + goto _ERROR; } errno = 0; TdDirEntryPtr de = NULL; - while ((de = taosReadDir(pDir)) != NULL) { char* name = taosGetDirEntryName(de); if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) { continue; } - sprintf(srcName, "%s%s%s", src, TD_DIRSEP, name); - sprintf(dstName, "%s%s%s", dst, TD_DIRSEP, name); + nBytes = snprintf(srcName, cap, "%s%s%s", src, TD_DIRSEP, name); + if (nBytes <= 0 || nBytes >= cap) { + code = TSDB_CODE_OUT_OF_RANGE; + goto _ERROR; + } + + nBytes = snprintf(dstName, cap, "%s%s%s", dst, TD_DIRSEP, name); + if (nBytes <= 0 || nBytes >= cap) { + code = TSDB_CODE_OUT_OF_RANGE; + goto _ERROR; + } if (strncmp(name, current, strlen(name) <= currLen ? strlen(name) : currLen) == 0) { code = copyFiles_create(srcName, dstName, 0); if (code != 0) { - code = TAOS_SYSTEM_ERROR(code); + code = TAOS_SYSTEM_ERROR(errno); stError("failed to copy file, detail: %s to %s reason:%s", srcName, dstName, tstrerror(code)); goto _ERROR; } + } else if (strncmp(name, info, strlen(name) <= infoLen ? strlen(name) : infoLen) == 0) { + code = copyFiles_create(srcName, dstName, 0); + if (code != 0) { + code = TAOS_SYSTEM_ERROR(errno); + stError("failed to copy file, detail: %s to %s reason:%s", srcName, dstName, tstrerror(code)); + goto _ERROR; + } + } else { code = copyFiles_hardlink(srcName, dstName, 0); if (code != 0) { - code = TAOS_SYSTEM_ERROR(code); + code = TAOS_SYSTEM_ERROR(errno); stError("failed to hard link file, detail:%s to %s, reason:%s", srcName, dstName, tstrerror(code)); goto _ERROR; } else { @@ -470,28 +646,26 @@ int32_t backendFileCopyFilesImpl(const char* src, const char* dst) { } } - memset(srcName, 0, sLen + 64); - memset(dstName, 0, dLen + 64); + memset(srcName, 0, cap); + memset(dstName, 0, cap); } taosMemoryFreeClear(srcName); taosMemoryFreeClear(dstName); taosCloseDir(&pDir); - errno = 0; return code; _ERROR: taosMemoryFreeClear(srcName); taosMemoryFreeClear(dstName); taosCloseDir(&pDir); - errno = 0; return code; } int32_t backendCopyFiles(const char* src, const char* dst) { return backendFileCopyFilesImpl(src, dst); } static int32_t rebuildFromLocalCheckpoint(const char* pTaskIdStr, const char* checkpointPath, int64_t checkpointId, - const char* defaultPath) { + const char* defaultPath, int64_t* processVer) { int32_t code = 0; cleanDir(defaultPath, pTaskIdStr); @@ -502,7 +676,7 @@ static int32_t rebuildFromLocalCheckpoint(const char* pTaskIdStr, const char* ch if (code != TSDB_CODE_SUCCESS) { cleanDir(defaultPath, pTaskIdStr); stError("%s failed to start stream backend from local %s, reason:%s, try download checkpoint from remote", - pTaskIdStr, checkpointPath, tstrerror(TAOS_SYSTEM_ERROR(errno))); + pTaskIdStr, checkpointPath, tstrerror(TAOS_SYSTEM_ERROR(code))); code = TSDB_CODE_SUCCESS; } else { stInfo("%s copy checkpoint data from:%s to:%s succ, try to start stream backend", pTaskIdStr, checkpointPath, @@ -516,41 +690,81 @@ static int32_t rebuildFromLocalCheckpoint(const char* pTaskIdStr, const char* ch return code; } -int32_t restoreCheckpointData(const char* path, const char* key, int64_t chkptId, char** dbPrefixPath, char** dbPath) { +int32_t restoreCheckpointData(const char* path, const char* key, int64_t chkptId, char** dbPrefixPath, char** dbPath, + int64_t* processVer) { int32_t code = 0; - char* prefixPath = taosMemoryCalloc(1, strlen(path) + 128); - sprintf(prefixPath, "%s%s%s", path, TD_DIRSEP, key); + char* prefixPath = NULL; + char* defaultPath = NULL; + char* checkpointPath = NULL; + char* checkpointRoot = NULL; - validateDir(prefixPath); + int32_t cap = strlen(path) + 128; + int32_t nBytes; - char* defaultPath = taosMemoryCalloc(1, strlen(path) + 256); - sprintf(defaultPath, "%s%s%s", prefixPath, TD_DIRSEP, "state"); + // alloc buf + prefixPath = taosMemoryCalloc(1, cap); + defaultPath = taosMemoryCalloc(1, cap); + checkpointPath = taosMemoryCalloc(1, cap); + checkpointRoot = taosMemoryCalloc(1, cap); + if (prefixPath == NULL || defaultPath == NULL || checkpointPath == NULL || checkpointRoot == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _EXIT; + } - validateDir(defaultPath); - int32_t pathLen = strlen(path) + 256; + nBytes = snprintf(prefixPath, cap, "%s%s%s", path, TD_DIRSEP, key); + if (nBytes <= 0 || nBytes >= cap) { + code = TSDB_CODE_OUT_OF_RANGE; + goto _EXIT; + } - char* checkpointRoot = taosMemoryCalloc(1, pathLen); - sprintf(checkpointRoot, "%s%s%s", prefixPath, TD_DIRSEP, "checkpoints"); + code = createDirIfNotExist(prefixPath); + if (code != 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _EXIT; + } - validateDir(checkpointRoot); - taosMemoryFree(checkpointRoot); + nBytes = snprintf(defaultPath, cap, "%s%s%s", prefixPath, TD_DIRSEP, "state"); + if (nBytes <= 0 || nBytes >= cap) { + code = TSDB_CODE_OUT_OF_RANGE; + goto _EXIT; + } + + code = createDirIfNotExist(defaultPath); + if (code != 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _EXIT; + } + + nBytes = snprintf(checkpointRoot, cap, "%s%s%s", prefixPath, TD_DIRSEP, "checkpoints"); + if (nBytes <= 0 || nBytes >= cap) { + code = TSDB_CODE_OUT_OF_RANGE; + goto _EXIT; + } + + code = createDirIfNotExist(checkpointRoot); + if (code != 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _EXIT; + } stDebug("%s check local backend dir:%s, checkpointId:%" PRId64 " succ", key, defaultPath, chkptId); - - char* chkptPath = taosMemoryCalloc(1, pathLen); if (chkptId > 0) { - snprintf(chkptPath, pathLen, "%s%s%s%s%s%" PRId64 "", prefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", - chkptId); + nBytes = snprintf(checkpointPath, cap, "%s%s%s%s%s%" PRId64 "", prefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, + "checkpoint", chkptId); + if (nBytes <= 0 || nBytes >= cap) { + code = TSDB_CODE_OUT_OF_RANGE; + goto _EXIT; + } - code = rebuildFromLocalCheckpoint(key, chkptPath, chkptId, defaultPath); + code = rebuildFromLocalCheckpoint(key, checkpointPath, chkptId, defaultPath, processVer); if (code != 0) { - code = rebuildFromRemoteCheckpoint(key, chkptPath, chkptId, defaultPath); + code = rebuildFromRemoteCheckpoint(key, checkpointPath, chkptId, defaultPath); } if (code != 0) { - stError("failed to start stream backend at %s, reason: %s, restart from default defaultPath:%s", chkptPath, - tstrerror(code), defaultPath); + stError("failed to start stream backend at %s, restart from default defaultPath:%s, reason:%s", checkpointPath, + defaultPath, tstrerror(code)); code = 0; // reset the error code } } else { // no valid checkpoint id @@ -559,21 +773,40 @@ int32_t restoreCheckpointData(const char* path, const char* key, int64_t chkptId cleanDir(defaultPath, key); } - taosMemoryFree(chkptPath); - *dbPath = defaultPath; *dbPrefixPath = prefixPath; + defaultPath = NULL; + prefixPath = NULL; + code = 0; + +_EXIT: + taosMemoryFree(defaultPath); + taosMemoryFree(prefixPath); + taosMemoryFree(checkpointPath); + taosMemoryFree(checkpointRoot); return code; } +bool streamBackendDataIsExist(const char* path, int64_t chkpId) { + bool exist = true; + int32_t cap = strlen(path) + 32; -bool streamBackendDataIsExist(const char* path, int64_t chkpId, int32_t vgId) { - bool exist = true; - char* state = taosMemoryCalloc(1, strlen(path) + 32); - sprintf(state, "%s%s%s", path, TD_DIRSEP, "state"); - if (!taosDirExist(state)) { - exist = false; + char* state = taosMemoryCalloc(1, cap); + if (state == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return false; } + + int16_t nBytes = snprintf(state, cap, "%s%s%s", path, TD_DIRSEP, "state"); + if (nBytes <= 0 || nBytes >= cap) { + terrno = TSDB_CODE_OUT_OF_RANGE; + exist = false; + } else { + if (!taosDirExist(state)) { + exist = false; + } + } + taosMemoryFree(state); return exist; } @@ -1074,12 +1307,14 @@ int32_t chkpDoDbCheckpoint(rocksdb_t* db, char* path) { if (cp == NULL || err != NULL) { stError("failed to do checkpoint at:%s, reason:%s", path, err); taosMemoryFreeClear(err); + code = TSDB_CODE_THIRDPARTY_ERROR; goto _ERROR; } rocksdb_checkpoint_create(cp, path, UINT64_MAX, &err); if (err != NULL) { stError("failed to do checkpoint at:%s, reason:%s", path, err); taosMemoryFreeClear(err); + code = TSDB_CODE_THIRDPARTY_ERROR; } else { code = 0; } @@ -1093,13 +1328,17 @@ int32_t chkpPreFlushDb(rocksdb_t* db, rocksdb_column_family_handle_t** cf, int32 char* err = NULL; rocksdb_flushoptions_t* flushOpt = rocksdb_flushoptions_create(); + if (flushOpt == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + rocksdb_flushoptions_set_wait(flushOpt, 1); rocksdb_flush_cfs(db, flushOpt, cf, nCf, &err); if (err != NULL) { stError("failed to flush db before streamBackend clean up, reason:%s", err); taosMemoryFree(err); - code = -1; + code = TSDB_CODE_THIRDPARTY_ERROR; } rocksdb_flushoptions_destroy(flushOpt); return code; @@ -1107,31 +1346,51 @@ int32_t chkpPreFlushDb(rocksdb_t* db, rocksdb_column_family_handle_t** cf, int32 int32_t chkpPreBuildDir(char* path, int64_t chkpId, char** chkpDir, char** chkpIdDir) { int32_t code = 0; - char* pChkpDir = taosMemoryCalloc(1, 256); - char* pChkpIdDir = taosMemoryCalloc(1, 256); + int32_t cap = strlen(path) + 256; + int32_t nBytes = 0; - sprintf(pChkpDir, "%s%s%s", path, TD_DIRSEP, "checkpoints"); - code = taosMulModeMkDir(pChkpDir, 0755, true); - if (code != 0) { - stError("failed to prepare checkpoint dir, path:%s, reason:%s", path, tstrerror(code)); - taosMemoryFree(pChkpDir); - taosMemoryFree(pChkpIdDir); - code = -1; - return code; + char* pChkpDir = taosMemoryCalloc(1, cap); + char* pChkpIdDir = taosMemoryCalloc(1, cap); + if (pChkpDir == NULL || pChkpIdDir == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _EXIT; + } + + nBytes = snprintf(pChkpDir, cap, "%s%s%s", path, TD_DIRSEP, "checkpoints"); + if (nBytes <= 0 || nBytes >= cap) { + code = TSDB_CODE_OUT_OF_RANGE; + goto _EXIT; + } + + nBytes = snprintf(pChkpIdDir, cap, "%s%s%s%" PRId64, pChkpDir, TD_DIRSEP, "checkpoint", chkpId); + if (nBytes <= 0 || nBytes >= cap) { + code = TSDB_CODE_OUT_OF_RANGE; + goto _EXIT; + } + + code = taosMulModeMkDir(pChkpDir, 0755, true); + if (code != 0) { + code = TAOS_SYSTEM_ERROR(errno); + stError("failed to prepare checkpoint dir, path:%s, reason:%s", path, tstrerror(code)); + goto _EXIT; } - sprintf(pChkpIdDir, "%s%s%s%" PRId64, pChkpDir, TD_DIRSEP, "checkpoint", chkpId); if (taosIsDir(pChkpIdDir)) { stInfo("stream rm exist checkpoint%s", pChkpIdDir); taosRemoveDir(pChkpIdDir); } + *chkpDir = pChkpDir; *chkpIdDir = pChkpIdDir; - return 0; +_EXIT: + taosMemoryFree(pChkpDir); + taosMemoryFree(pChkpIdDir); + return code; } int32_t taskDbBuildSnap(void* arg, SArray* pSnap) { + // vnode task->db SStreamMeta* pMeta = arg; taosThreadMutexLock(&pMeta->backendMutex); @@ -1140,27 +1399,42 @@ int32_t taskDbBuildSnap(void* arg, SArray* pSnap) { while (pIter) { STaskDbWrapper* pTaskDb = *(STaskDbWrapper**)pIter; - taskDbAddRef(pTaskDb); - int64_t chkpId = pTaskDb->chkpId; - taskDbRefChkp(pTaskDb, chkpId); - code = taskDbDoCheckpoint(pTaskDb, chkpId); - if (code != 0) { - taskDbUnRefChkp(pTaskDb, chkpId); + void* p = taskDbAddRef(pTaskDb); + if (p == NULL) { + terrno = 0; + pIter = taosHashIterate(pMeta->pTaskDbUnique, pIter); + continue; } - taskDbRemoveRef(pTaskDb); + // add chkpId to in-use-ckpkIdSet + taskDbRefChkp(pTaskDb, pTaskDb->chkpId); + + code = taskDbDoCheckpoint(pTaskDb, pTaskDb->chkpId, ((SStreamTask*)pTaskDb->pTask)->chkInfo.processedVer); + if (code != 0) { + // remove chkpId from in-use-ckpkIdSet + taskDbUnRefChkp(pTaskDb, pTaskDb->chkpId); + taskDbRemoveRef(pTaskDb); + break; + } SStreamTask* pTask = pTaskDb->pTask; SStreamTaskSnap snap = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId, .chkpId = pTaskDb->chkpId, .dbPrefixPath = taosStrdup(pTaskDb->path)}; + if (snap.dbPrefixPath == NULL) { + // remove chkpid from chkp-in-use set + taskDbUnRefChkp(pTaskDb, pTaskDb->chkpId); + taskDbRemoveRef(pTaskDb); + code = TSDB_CODE_OUT_OF_MEMORY; + break; + } taosArrayPush(pSnap, &snap); + pIter = taosHashIterate(pMeta->pTaskDbUnique, pIter); } taosThreadMutexUnlock(&pMeta->backendMutex); - return code; } int32_t taskDbDestroySnap(void* arg, SArray* pSnapInfo) { @@ -1232,20 +1506,131 @@ int64_t taskGetDBRef(void* arg) { return pDb->refId; } -int32_t taskDbDoCheckpoint(void* arg, int64_t chkpId) { +int32_t chkpLoadExtraInfo(char* pChkpIdDir, int64_t* chkpId, int64_t* processId) { + TdFilePtr pFile = NULL; + int32_t code = 0; + + char buf[256] = {0}; + int32_t nBytes = 0; + + int32_t len = strlen(pChkpIdDir); + if (len == 0) { + code = TSDB_CODE_INVALID_PARA; + stError("failed to load extra info, dir:%s, reason:%s", pChkpIdDir, tstrerror(code)); + return code; + } + + int32_t cap = len + 64; + char* pDst = taosMemoryCalloc(1, cap); + if (pDst == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + stError("failed to alloc memory to load extra info, dir:%s", pChkpIdDir); + goto _EXIT; + } + + nBytes = snprintf(pDst, cap, "%s%sinfo", pChkpIdDir, TD_DIRSEP); + if (nBytes <= 0 || nBytes >= cap) { + code = TSDB_CODE_OUT_OF_RANGE; + stError("failed to build dst to load extra info, dir:%s", pChkpIdDir); + goto _EXIT; + } + + pFile = taosOpenFile(pDst, TD_FILE_READ); + if (pFile == NULL) { + // compatible with previous version + *processId = -1; + code = 0; + stError("failed to open file to load extra info, file:%s, reason:%s", pDst, tstrerror(TAOS_SYSTEM_ERROR(errno))); + goto _EXIT; + } + + if (taosReadFile(pFile, buf, sizeof(buf)) <= 0) { + code = TAOS_SYSTEM_ERROR(errno); + stError("failed to read file to load extra info, file:%s, reason:%s", pDst, tstrerror(code)); + goto _EXIT; + } + + if (sscanf(buf, "%" PRId64 " %" PRId64 "", chkpId, processId) < 2) { + code = TSDB_CODE_INVALID_PARA; + stError("failed to read file content to load extra info, file:%s, reason:%s", pDst, tstrerror(code)); + goto _EXIT; + } + code = 0; +_EXIT: + taosMemoryFree(pDst); + taosCloseFile(&pFile); + return code; +} +int32_t chkpAddExtraInfo(char* pChkpIdDir, int64_t chkpId, int64_t processId) { + int32_t code = 0; + + TdFilePtr pFile = NULL; + + char buf[256] = {0}; + int32_t nBytes = 0; + + int32_t len = strlen(pChkpIdDir); + if (len == 0) { + code = TSDB_CODE_INVALID_PARA; + stError("failed to add extra info, dir:%s, reason:%s", pChkpIdDir, tstrerror(code)); + return code; + } + + int32_t cap = len + 64; + char* pDst = taosMemoryCalloc(1, cap); + if (pDst == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + stError("failed to alloc memory to add extra info, dir:%s", pChkpIdDir); + goto _EXIT; + } + + nBytes = snprintf(pDst, cap, "%s%sinfo", pChkpIdDir, TD_DIRSEP); + if (nBytes <= 0 || nBytes >= cap) { + code = TSDB_CODE_OUT_OF_RANGE; + stError("failed to build dst to add extra info, dir:%s, reason:%s", pChkpIdDir, tstrerror(code)); + goto _EXIT; + } + + pFile = taosOpenFile(pDst, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + if (pFile == NULL) { + code = TAOS_SYSTEM_ERROR(errno); + stError("failed to open file to add extra info, file:%s, reason:%s", pDst, tstrerror(code)); + goto _EXIT; + } + + nBytes = snprintf(buf, sizeof(buf), "%" PRId64 " %" PRId64 "", chkpId, processId); + if (nBytes <= 0 || nBytes >= sizeof(buf)) { + code = TSDB_CODE_OUT_OF_RANGE; + stError("failed to build content to add extra info, dir:%s,reason:%s", pChkpIdDir, tstrerror(code)); + goto _EXIT; + } + + if (nBytes != taosWriteFile(pFile, buf, nBytes)) { + code = TAOS_SYSTEM_ERROR(errno); + stError("failed to write file to add extra info, file:%s, reason:%s", pDst, tstrerror(code)); + goto _EXIT; + } + code = 0; + +_EXIT: + taosCloseFile(&pFile); + taosMemoryFree(pDst); + return code; +} +int32_t taskDbDoCheckpoint(void* arg, int64_t chkpId, int64_t processId) { STaskDbWrapper* pTaskDb = arg; int64_t st = taosGetTimestampMs(); - int32_t code = -1; + int32_t code = 0; int64_t refId = pTaskDb->refId; if (taosAcquireRef(taskDbWrapperId, refId) == NULL) { - return -1; + code = terrno; + return code; } char* pChkpDir = NULL; char* pChkpIdDir = NULL; - if (chkpPreBuildDir(pTaskDb->path, chkpId, &pChkpDir, &pChkpIdDir) != 0) { - code = -1; + if ((code = chkpPreBuildDir(pTaskDb->path, chkpId, &pChkpDir, &pChkpIdDir)) < 0) { goto _EXIT; } // Get all cf and acquire cfWrappter @@ -1256,32 +1641,58 @@ int32_t taskDbDoCheckpoint(void* arg, int64_t chkpId) { int64_t written = atomic_load_64(&pTaskDb->dataWritten); + // flush db if (written > 0) { stDebug("stream backend:%p start to flush db at:%s, data written:%" PRId64 "", pTaskDb, pChkpIdDir, written); code = chkpPreFlushDb(pTaskDb->db, ppCf, nCf); + if (code != 0) goto _EXIT; } else { stDebug("stream backend:%p not need flush db at:%s, data written:%" PRId64 "", pTaskDb, pChkpIdDir, written); } + + // do checkpoint if ((code = chkpDoDbCheckpoint(pTaskDb->db, pChkpIdDir)) != 0) { stError("stream backend:%p failed to do checkpoint at:%s", pTaskDb, pChkpIdDir); + goto _EXIT; } else { stDebug("stream backend:%p end to do checkpoint at:%s, time cost:%" PRId64 "ms", pTaskDb, pChkpIdDir, taosGetTimestampMs() - st); } + // add extra info to checkpoint + if ((code = chkpAddExtraInfo(pChkpIdDir, chkpId, processId)) != 0) { + stError("stream backend:%p failed to add extra info to checkpoint at:%s", pTaskDb, pChkpIdDir); + goto _EXIT; + } + + // delete ttl checkpoint code = chkpMayDelObsolete(pTaskDb, chkpId, pChkpDir); + if (code < 0) { + goto _EXIT; + } + atomic_store_64(&pTaskDb->dataWritten, 0); pTaskDb->chkpId = chkpId; _EXIT: - taosMemoryFree(pChkpDir); + + // clear checkpoint dir if failed + if (code != 0 && pChkpDir != NULL) { + if (taosDirExist(pChkpIdDir)) { + taosRemoveDir(pChkpIdDir); + } + } taosMemoryFree(pChkpIdDir); + taosMemoryFree(pChkpDir); + taosReleaseRef(taskDbWrapperId, refId); taosMemoryFree(ppCf); return code; } -int32_t streamBackendDoCheckpoint(void* arg, int64_t chkpId) { return taskDbDoCheckpoint(arg, chkpId); } +int32_t streamBackendDoCheckpoint(void* arg, int64_t chkpId, int64_t processVer) { + return taskDbDoCheckpoint(arg, chkpId, processVer); +} SListNode* streamBackendAddCompare(void* backend, void* arg) { SBackendWrapper* pHandle = (SBackendWrapper*)backend; @@ -1659,7 +2070,9 @@ int32_t valueEncode(void* value, int32_t vlen, int64_t ttl, char** dest) { len += taosEncodeFixedI32((void**)&buf, key.len); len += taosEncodeFixedI32((void**)&buf, key.rawLen); len += taosEncodeFixedI8((void**)&buf, key.compress); - len += taosEncodeBinary((void**)&buf, (char*)value, key.len); + if (value != NULL && key.len != 0) { + len += taosEncodeBinary((void**)&buf, (char*)value, key.len); + } *dest = p; } else { char* buf = *dest; @@ -1667,7 +2080,9 @@ int32_t valueEncode(void* value, int32_t vlen, int64_t ttl, char** dest) { len += taosEncodeFixedI32((void**)&buf, key.len); len += taosEncodeFixedI32((void**)&buf, key.rawLen); len += taosEncodeFixedI8((void**)&buf, key.compress); - len += taosEncodeBinary((void**)&buf, (char*)value, key.len); + if (value != NULL && key.len != 0) { + len += taosEncodeBinary((void**)&buf, (char*)value, key.len); + } } taosMemoryFree(dst); @@ -2005,12 +2420,16 @@ void taskDbDestroyChkpOpt(STaskDbWrapper* pTaskDb) { int32_t taskDbBuildFullPath(char* path, char* key, char** dbFullPath, char** stateFullPath) { int32_t code = 0; + char* statePath = taosMemoryCalloc(1, strlen(path) + 128); + if (statePath == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } - char* statePath = taosMemoryCalloc(1, strlen(path) + 128); sprintf(statePath, "%s%s%s", path, TD_DIRSEP, key); if (!taosDirExist(statePath)) { code = taosMulMkDir(statePath); if (code != 0) { + code = TAOS_SYSTEM_ERROR(errno); stError("failed to create dir: %s, reason:%s", statePath, tstrerror(code)); taosMemoryFree(statePath); return code; @@ -2018,10 +2437,16 @@ int32_t taskDbBuildFullPath(char* path, char* key, char** dbFullPath, char** sta } char* dbPath = taosMemoryCalloc(1, strlen(statePath) + 128); + if (dbPath == NULL) { + taosMemoryFree(statePath); + return TSDB_CODE_OUT_OF_MEMORY; + } + sprintf(dbPath, "%s%s%s", statePath, TD_DIRSEP, "state"); if (!taosDirExist(dbPath)) { code = taosMulMkDir(dbPath); if (code != 0) { + code = TAOS_SYSTEM_ERROR(errno); stError("failed to create dir: %s, reason:%s", dbPath, tstrerror(code)); taosMemoryFree(statePath); taosMemoryFree(dbPath); @@ -2099,15 +2524,32 @@ _EXIT: return NULL; } -STaskDbWrapper* taskDbOpen(const char* path, const char* key, int64_t chkptId) { +STaskDbWrapper* taskDbOpen(const char* path, const char* key, int64_t chkptId, int64_t* processVer) { char* statePath = NULL; char* dbPath = NULL; - - if (restoreCheckpointData(path, key, chkptId, &statePath, &dbPath) != 0) { + int code = 0; + terrno = 0; + if ((code = restoreCheckpointData(path, key, chkptId, &statePath, &dbPath, processVer)) < 0) { + terrno = code; + stError("failed to restore checkpoint data, path:%s, key:%s, checkpointId: %" PRId64 "reason:%s", path, key, + chkptId, tstrerror(terrno)); return NULL; } STaskDbWrapper* pTaskDb = taskDbOpenImpl(key, statePath, dbPath); + if (pTaskDb != NULL) { + int64_t chkpId = -1, ver = -1; + if ((code = chkpLoadExtraInfo(dbPath, &chkpId, &ver) == 0)) { + *processVer = ver; + } else { + terrno = code; + stError("failed to load extra info, path:%s, key:%s, checkpointId: %" PRId64 "reason:%s", path, key, chkptId, + tstrerror(terrno)); + taskDbDestroy(pTaskDb, false); + return NULL; + } + } + taosMemoryFree(dbPath); taosMemoryFree(statePath); return pTaskDb; @@ -2194,15 +2636,31 @@ void taskDbDestroy(void* pDb, bool flush) { void taskDbDestroy2(void* pDb) { taskDbDestroy(pDb, true); } int32_t taskDbGenChkpUploadData__rsync(STaskDbWrapper* pDb, int64_t chkpId, char** path) { - int32_t code = -1; + int32_t code = 0; int64_t refId = pDb->refId; + int32_t nBytes = 0; if (taosAcquireRef(taskDbWrapperId, refId) == NULL) { - return -1; + code = terrno; + return code; + } + + int32_t cap = strlen(pDb->path) + 128; + + char* buf = taosMemoryCalloc(1, cap); + if (buf == NULL) { + taosReleaseRef(taskDbWrapperId, refId); + return TSDB_CODE_OUT_OF_MEMORY; + } + + nBytes = + snprintf(buf, cap, "%s%s%s%s%s%" PRId64 "", pDb->path, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", chkpId); + if (nBytes <= 0 || nBytes >= cap) { + taosMemoryFree(buf); + taosReleaseRef(taskDbWrapperId, refId); + return TSDB_CODE_OUT_OF_RANGE; } - char* buf = taosMemoryCalloc(1, strlen(pDb->path) + 128); - sprintf(buf, "%s%s%s%s%s%" PRId64 "", pDb->path, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", chkpId); if (taosIsDir(buf)) { code = 0; *path = buf; @@ -2217,15 +2675,28 @@ int32_t taskDbGenChkpUploadData__rsync(STaskDbWrapper* pDb, int64_t chkpId, char int32_t taskDbGenChkpUploadData__s3(STaskDbWrapper* pDb, void* bkdChkpMgt, int64_t chkpId, char** path, SArray* list, const char* idStr) { int32_t code = 0; + int32_t cap = strlen(pDb->path) + 32; SBkdMgt* p = (SBkdMgt*)bkdChkpMgt; - char* temp = taosMemoryCalloc(1, strlen(pDb->path) + 32); - sprintf(temp, "%s%s%s%" PRId64, pDb->path, TD_DIRSEP, "tmp", chkpId); + char* temp = taosMemoryCalloc(1, cap); + if (temp == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + int32_t nBytes = snprintf(temp, cap, "%s%s%s%" PRId64, pDb->path, TD_DIRSEP, "tmp", chkpId); + if (nBytes <= 0 || nBytes >= cap) { + taosMemoryFree(temp); + return TSDB_CODE_OUT_OF_RANGE; + } if (taosDirExist(temp)) { cleanDir(temp, idStr); } else { - taosMkDir(temp); + code = taosMkDir(temp); + if (code != 0) { + taosMemoryFree(temp); + return TAOS_SYSTEM_ERROR(errno); + } } code = bkdMgtGetDelta(p, pDb->idstr, chkpId, list, temp); @@ -2321,7 +2792,8 @@ int32_t streamStateCvtDataFormat(char* path, char* key, void* pCfInst) { int32_t code = 0; - STaskDbWrapper* pTaskDb = taskDbOpen(path, key, 0); + int64_t processVer = -1; + STaskDbWrapper* pTaskDb = taskDbOpen(path, key, 0, &processVer); RocksdbCfInst* pSrcBackend = pCfInst; for (int i = 0; i < nCf; i++) { @@ -3955,6 +4427,9 @@ int32_t compareHashTableImpl(SHashObj* p1, SHashObj* p2, SArray* diff) { char* name = taosHashGetKey(pIter, &len); if (!isBkdDataMeta(name, len) && !taosHashGet(p1, name, len)) { char* fname = taosMemoryCalloc(1, len + 1); + if (fname == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } strncpy(fname, name, len); taosArrayPush(diff, &fname); } @@ -3966,7 +4441,9 @@ int32_t compareHashTable(SHashObj* p1, SHashObj* p2, SArray* add, SArray* del) { int32_t code = 0; code = compareHashTableImpl(p1, p2, add); - code = compareHashTableImpl(p2, p1, del); + if (code != 0) { + code = compareHashTableImpl(p2, p1, del); + } return code; } @@ -4007,26 +4484,29 @@ void strArrayDebugInfo(SArray* pArr, char** buf) { *buf = p; } void dbChkpDebugInfo(SDbChkp* pDb) { - // stTrace("chkp get file list: curr"); - char* p[4] = {NULL}; + if (stDebugFlag & DEBUG_INFO) { + char* p[4] = {NULL}; - hashTableToDebug(pDb->pSstTbl[pDb->idx], &p[0]); - stTrace("chkp previous file: [%s]", p[0]); + hashTableToDebug(pDb->pSstTbl[pDb->idx], &p[0]); + stTrace("chkp previous file: [%s]", p[0]); - hashTableToDebug(pDb->pSstTbl[1 - pDb->idx], &p[1]); - stTrace("chkp curr file: [%s]", p[1]); + hashTableToDebug(pDb->pSstTbl[1 - pDb->idx], &p[1]); + stTrace("chkp curr file: [%s]", p[1]); - strArrayDebugInfo(pDb->pAdd, &p[2]); - stTrace("chkp newly addded file: [%s]", p[2]); + strArrayDebugInfo(pDb->pAdd, &p[2]); + stTrace("chkp newly addded file: [%s]", p[2]); - strArrayDebugInfo(pDb->pDel, &p[3]); - stTrace("chkp newly deleted file: [%s]", p[3]); + strArrayDebugInfo(pDb->pDel, &p[3]); + stTrace("chkp newly deleted file: [%s]", p[3]); - for (int i = 0; i < 4; i++) { - taosMemoryFree(p[i]); + for (int i = 0; i < 4; i++) { + taosMemoryFree(p[i]); + } } } int32_t dbChkpGetDelta(SDbChkp* p, int64_t chkpId, SArray* list) { + int32_t code = 0; + int32_t nBytes; taosThreadRwlockWrlock(&p->rwLock); p->preCkptId = p->curChkpId; @@ -4041,13 +4521,24 @@ int32_t dbChkpGetDelta(SDbChkp* p, int64_t chkpId, SArray* list) { int32_t sstLen = strlen(pSST); memset(p->buf, 0, p->len); - sprintf(p->buf, "%s%s%s%scheckpoint%" PRId64 "", p->path, TD_DIRSEP, "checkpoints", TD_DIRSEP, chkpId); + + nBytes = + snprintf(p->buf, p->len, "%s%s%s%scheckpoint%" PRId64 "", p->path, TD_DIRSEP, "checkpoints", TD_DIRSEP, chkpId); + if (nBytes <= 0 || nBytes >= p->len) { + taosThreadRwlockUnlock(&p->rwLock); + return TSDB_CODE_OUT_OF_RANGE; + } taosArrayClearP(p->pAdd, taosMemoryFree); taosArrayClearP(p->pDel, taosMemoryFree); taosHashClear(p->pSstTbl[1 - p->idx]); - TdDirPtr pDir = taosOpenDir(p->buf); + TdDirPtr pDir = taosOpenDir(p->buf); + if (pDir == NULL) { + taosThreadRwlockUnlock(&p->rwLock); + return TAOS_SYSTEM_ERROR(errno); + } + TdDirEntryPtr de = NULL; int8_t dummy = 0; while ((de = taosReadDir(pDir)) != NULL) { @@ -4055,23 +4546,36 @@ int32_t dbChkpGetDelta(SDbChkp* p, int64_t chkpId, SArray* list) { if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) continue; if (strlen(name) == currLen && strcmp(name, pCurrent) == 0) { taosMemoryFreeClear(p->pCurrent); + p->pCurrent = taosStrdup(name); - // taosHashPut(p->pSstTbl[1 - p->idx], name, strlen(name), &dummy, sizeof(dummy)); + if (p->pCurrent == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + break; + } continue; } if (strlen(name) >= maniLen && strncmp(name, pManifest, maniLen) == 0) { taosMemoryFreeClear(p->pManifest); p->pManifest = taosStrdup(name); - // taosHashPut(p->pSstTbl[1 - p->idx], name, strlen(name), &dummy, sizeof(dummy)); + if (p->pManifest == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + break; + } continue; } if (strlen(name) >= sstLen && strncmp(name + strlen(name) - 4, pSST, sstLen) == 0) { - taosHashPut(p->pSstTbl[1 - p->idx], name, strlen(name), &dummy, sizeof(dummy)); + if (taosHashPut(p->pSstTbl[1 - p->idx], name, strlen(name), &dummy, sizeof(dummy)) != 0) { + break; + } continue; } } taosCloseDir(&pDir); + if (code != 0) { + taosThreadRwlockUnlock(&p->rwLock); + return code; + } if (p->init == 0) { void* pIter = taosHashIterate(p->pSstTbl[1 - p->idx], NULL); @@ -4080,6 +4584,11 @@ int32_t dbChkpGetDelta(SDbChkp* p, int64_t chkpId, SArray* list) { char* name = taosHashGetKey(pIter, &len); if (name != NULL && !isBkdDataMeta(name, len)) { char* fname = taosMemoryCalloc(1, len + 1); + if (fname == NULL) { + taosThreadRwlockUnlock(&p->rwLock); + return TSDB_CODE_OUT_OF_MEMORY; + } + strncpy(fname, name, len); taosArrayPush(p->pAdd, &fname); } @@ -4115,34 +4624,78 @@ int32_t dbChkpGetDelta(SDbChkp* p, int64_t chkpId, SArray* list) { taosThreadRwlockUnlock(&p->rwLock); - return 0; + return code; } +void dbChkpDestroy(SDbChkp* pChkp); + SDbChkp* dbChkpCreate(char* path, int64_t initChkpId) { SDbChkp* p = taosMemoryCalloc(1, sizeof(SDbChkp)); + if (p == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _EXIT; + } + p->curChkpId = initChkpId; p->preCkptId = -1; p->pSST = taosArrayInit(64, sizeof(void*)); + if (p->pSST == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + dbChkpDestroy(p); + return NULL; + } + p->path = path; p->len = strlen(path) + 128; p->buf = taosMemoryCalloc(1, p->len); + if (p->buf == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _EXIT; + } p->idx = 0; p->pSstTbl[0] = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); + if (p->pSstTbl[0] == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _EXIT; + } + p->pSstTbl[1] = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); + if (p->pSstTbl[1] == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _EXIT; + } p->pAdd = taosArrayInit(64, sizeof(void*)); + if (p->pAdd == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _EXIT; + } + p->pDel = taosArrayInit(64, sizeof(void*)); + if (p->pDel == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _EXIT; + } + p->update = 0; taosThreadRwlockInit(&p->rwLock, NULL); SArray* list = NULL; int32_t code = dbChkpGetDelta(p, initChkpId, list); + if (code != 0) { + goto _EXIT; + } return p; +_EXIT: + dbChkpDestroy(p); + return NULL; } void dbChkpDestroy(SDbChkp* pChkp) { + if (pChkp == NULL) return; + taosMemoryFree(pChkp->buf); taosMemoryFree(pChkp->path); @@ -4164,35 +4717,71 @@ int32_t dbChkpInit(SDbChkp* p) { } #endif int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) { + static char* chkpMeta = "META"; + int32_t code = 0; + taosThreadRwlockRdlock(&p->rwLock); - int32_t code = -1; - int32_t len = p->len + 128; - char* srcBuf = taosMemoryCalloc(1, len); - char* dstBuf = taosMemoryCalloc(1, len); + int32_t cap = p->len + 128; - char* srcDir = taosMemoryCalloc(1, len); - char* dstDir = taosMemoryCalloc(1, len); + char* buffer = taosMemoryCalloc(4, cap); + if (buffer == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _ERROR; + } - sprintf(srcDir, "%s%s%s%s%s%" PRId64 "", p->path, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", p->curChkpId); - sprintf(dstDir, "%s", dname); + char* srcBuf = buffer; + char* dstBuf = &srcBuf[cap]; + char* srcDir = &dstBuf[cap]; + char* dstDir = &srcDir[cap]; + + int nBytes = snprintf(srcDir, cap, "%s%s%s%s%s%" PRId64 "", p->path, TD_DIRSEP, "checkpoints", TD_DIRSEP, + "checkpoint", p->curChkpId); + if (nBytes <= 0 || nBytes >= cap) { + code = TSDB_CODE_OUT_OF_RANGE; + goto _ERROR; + } + + nBytes = snprintf(dstDir, cap, "%s", dname); + if (nBytes <= 0 || nBytes >= cap) { + code = TSDB_CODE_OUT_OF_RANGE; + goto _ERROR; + } if (!taosDirExist(srcDir)) { stError("failed to dump srcDir %s, reason: not exist such dir", srcDir); + code = TSDB_CODE_INVALID_PARA; + goto _ERROR; + } + int64_t chkpId = 0, processId = -1; + code = chkpLoadExtraInfo(srcDir, &chkpId, &processId); + if (code < 0) { + stError("failed to load extra info from %s, reason:%s", srcDir, code != 0 ? "unkown" : tstrerror(code)); + goto _ERROR; } // add file to $name dir for (int i = 0; i < taosArrayGetSize(p->pAdd); i++) { - memset(srcBuf, 0, len); - memset(dstBuf, 0, len); + memset(srcBuf, 0, cap); + memset(dstBuf, 0, cap); char* filename = taosArrayGetP(p->pAdd, i); - sprintf(srcBuf, "%s%s%s", srcDir, TD_DIRSEP, filename); - sprintf(dstBuf, "%s%s%s", dstDir, TD_DIRSEP, filename); + nBytes = snprintf(srcBuf, cap, "%s%s%s", srcDir, TD_DIRSEP, filename); + if (nBytes <= 0 || nBytes >= cap) { + code = TSDB_CODE_OUT_OF_RANGE; + goto _ERROR; + } + + nBytes = snprintf(dstBuf, cap, "%s%s%s", dstDir, TD_DIRSEP, filename); + if (nBytes <= 0 || nBytes >= cap) { + code = TSDB_CODE_OUT_OF_RANGE; + goto _ERROR; + } if (taosCopyFile(srcBuf, dstBuf) < 0) { - stError("failed to copy file from %s to %s", srcBuf, dstBuf); + code = TAOS_SYSTEM_ERROR(errno); + stError("failed to copy file from %s to %s, reason:%s", srcBuf, dstBuf, tstrerror(code)); goto _ERROR; } } @@ -4200,44 +4789,84 @@ int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) { for (int i = 0; i < taosArrayGetSize(p->pDel); i++) { char* filename = taosArrayGetP(p->pDel, i); char* p = taosStrdup(filename); + if (p == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _ERROR; + } taosArrayPush(list, &p); } // copy current file to dst dir - memset(srcBuf, 0, len); - memset(dstBuf, 0, len); - sprintf(srcBuf, "%s%s%s", srcDir, TD_DIRSEP, p->pCurrent); - sprintf(dstBuf, "%s%s%s_%" PRId64 "", dstDir, TD_DIRSEP, p->pCurrent, p->curChkpId); + memset(srcBuf, 0, cap); + memset(dstBuf, 0, cap); + + nBytes = snprintf(srcBuf, cap, "%s%s%s", srcDir, TD_DIRSEP, p->pCurrent); + if (nBytes <= 0 || nBytes >= cap) { + code = TSDB_CODE_OUT_OF_RANGE; + goto _ERROR; + } + + nBytes = snprintf(dstBuf, cap, "%s%s%s_%" PRId64 "", dstDir, TD_DIRSEP, p->pCurrent, p->curChkpId); + if (nBytes <= 0 || nBytes >= cap) { + code = TSDB_CODE_OUT_OF_RANGE; + goto _ERROR; + } + if (taosCopyFile(srcBuf, dstBuf) < 0) { - stError("failed to copy file from %s to %s", srcBuf, dstBuf); + code = TAOS_SYSTEM_ERROR(errno); + stError("failed to copy file from %s to %s, reason:%s", srcBuf, dstBuf, tstrerror(code)); goto _ERROR; } // copy manifest file to dst dir - memset(srcBuf, 0, len); - memset(dstBuf, 0, len); - sprintf(srcBuf, "%s%s%s", srcDir, TD_DIRSEP, p->pManifest); - sprintf(dstBuf, "%s%s%s_%" PRId64 "", dstDir, TD_DIRSEP, p->pManifest, p->curChkpId); - if (taosCopyFile(srcBuf, dstBuf) < 0) { - stError("failed to copy file from %s to %s", srcBuf, dstBuf); + memset(srcBuf, 0, cap); + memset(dstBuf, 0, cap); + + nBytes = snprintf(srcBuf, cap, "%s%s%s", srcDir, TD_DIRSEP, p->pManifest); + if (nBytes <= 0 || nBytes >= cap) { + code = TSDB_CODE_OUT_OF_RANGE; goto _ERROR; } - static char* chkpMeta = "META"; - memset(dstBuf, 0, len); - sprintf(dstBuf, "%s%s%s", dstDir, TD_DIRSEP, chkpMeta); - memcpy(dstDir, dstBuf, strlen(dstBuf)); + nBytes = snprintf(dstBuf, cap, "%s%s%s_%" PRId64 "", dstDir, TD_DIRSEP, p->pManifest, p->curChkpId); + if (nBytes <= 0 || nBytes >= cap) { + code = TSDB_CODE_OUT_OF_RANGE; + goto _ERROR; + } + + if (taosCopyFile(srcBuf, dstBuf) < 0) { + code = TAOS_SYSTEM_ERROR(errno); + stError("failed to copy file from %s to %s, reason:%s", srcBuf, dstBuf, tstrerror(code)); + goto _ERROR; + } + memset(dstBuf, 0, cap); + nBytes = snprintf(dstDir, cap, "%s%s%s", dstDir, TD_DIRSEP, chkpMeta); + if (nBytes <= 0 || nBytes >= cap) { + code = TSDB_CODE_OUT_OF_RANGE; + goto _ERROR; + } TdFilePtr pFile = taosOpenFile(dstDir, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); if (pFile == NULL) { - stError("chkp failed to create meta file: %s", dstDir); + code = TAOS_SYSTEM_ERROR(errno); + stError("chkp failed to create meta file: %s, reason:%s", dstDir, tstrerror(code)); goto _ERROR; } - char content[128] = {0}; - snprintf(content, sizeof(content), "%s_%" PRId64 "\n%s_%" PRId64 "", p->pCurrent, p->curChkpId, p->pManifest, - p->curChkpId); - if (taosWriteFile(pFile, content, strlen(content)) <= 0) { - stError("chkp failed to write meta file: %s", dstDir); + + char content[256] = {0}; + nBytes = snprintf(content, sizeof(content), META_ON_S3_FORMATE, p->pCurrent, p->curChkpId, p->pManifest, p->curChkpId, + "processVer", processId); + if (nBytes <= 0 || nBytes >= sizeof(content)) { + code = TSDB_CODE_OUT_OF_RANGE; + stError("chkp failed to format meta file: %s, reason: invalid msg", dstDir); + taosCloseFile(&pFile); + goto _ERROR; + } + + nBytes = taosWriteFile(pFile, content, strlen(content)); + if (nBytes != strlen(content)) { + code = TAOS_SYSTEM_ERROR(errno); + stError("chkp failed to write meta file: %s,reason:%s", dstDir, tstrerror(code)); taosCloseFile(&pFile); goto _ERROR; } @@ -4249,18 +4878,39 @@ int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) { code = 0; _ERROR: + taosMemoryFree(buffer); taosThreadRwlockUnlock(&p->rwLock); - taosMemoryFree(srcBuf); - taosMemoryFree(dstBuf); - taosMemoryFree(srcDir); - taosMemoryFree(dstDir); return code; } + SBkdMgt* bkdMgtCreate(char* path) { + terrno = 0; SBkdMgt* p = taosMemoryCalloc(1, sizeof(SBkdMgt)); + if (p == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + p->pDbChkpTbl = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); + if (p->pDbChkpTbl == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + bkdMgtDestroy(p); + return NULL; + } + p->path = taosStrdup(path); - taosThreadRwlockInit(&p->rwLock, NULL); + if (p->path == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + bkdMgtDestroy(p); + return NULL; + } + + if (taosThreadRwlockInit(&p->rwLock, NULL) != 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + bkdMgtDestroy(p); + return NULL; + } + return p; } @@ -4282,28 +4932,52 @@ void bkdMgtDestroy(SBkdMgt* bm) { } int32_t bkdMgtGetDelta(SBkdMgt* bm, char* taskId, int64_t chkpId, SArray* list, char* dname) { int32_t code = 0; - taosThreadRwlockWrlock(&bm->rwLock); SDbChkp** ppChkp = taosHashGet(bm->pDbChkpTbl, taskId, strlen(taskId)); SDbChkp* pChkp = ppChkp != NULL ? *ppChkp : NULL; if (pChkp == NULL) { - char* path = taosMemoryCalloc(1, strlen(bm->path) + 64); - sprintf(path, "%s%s%s", bm->path, TD_DIRSEP, taskId); + int32_t cap = strlen(bm->path) + 64; + char* path = taosMemoryCalloc(1, cap); + if (path == NULL) { + taosThreadRwlockUnlock(&bm->rwLock); + return TSDB_CODE_OUT_OF_MEMORY; + } + + int32_t nBytes = snprintf(path, cap, "%s%s%s", bm->path, TD_DIRSEP, taskId); + if (nBytes <= 0 || nBytes >= cap) { + taosMemoryFree(path); + taosThreadRwlockUnlock(&bm->rwLock); + code = TSDB_CODE_OUT_OF_RANGE; + return code; + } SDbChkp* p = dbChkpCreate(path, chkpId); - taosHashPut(bm->pDbChkpTbl, taskId, strlen(taskId), &p, sizeof(void*)); + if (p == NULL) { + taosMemoryFree(path); + taosThreadRwlockUnlock(&bm->rwLock); + code = terrno; + return code; + } + + if (taosHashPut(bm->pDbChkpTbl, taskId, strlen(taskId), &p, sizeof(void*)) != 0) { + dbChkpDestroy(p); + taosThreadRwlockUnlock(&bm->rwLock); + code = terrno; + return code; + } pChkp = p; - code = dbChkpDumpTo(pChkp, dname, list); taosThreadRwlockUnlock(&bm->rwLock); return code; + } else { + code = dbChkpGetDelta(pChkp, chkpId, NULL); + if (code == 0) { + code = dbChkpDumpTo(pChkp, dname, list); + } } - code = dbChkpGetDelta(pChkp, chkpId, NULL); - code = dbChkpDumpTo(pChkp, dname, list); - taosThreadRwlockUnlock(&bm->rwLock); return code; } diff --git a/source/libs/stream/src/streamCheckStatus.c b/source/libs/stream/src/streamCheckStatus.c index 8778e3314a..226a06be7e 100644 --- a/source/libs/stream/src/streamCheckStatus.c +++ b/source/libs/stream/src/streamCheckStatus.c @@ -666,13 +666,18 @@ void rspMonitorFn(void* param, void* tmrId) { stDebug("s-task:%s status:%s vgId:%d quit from monitor check-rsp tmr, ref:%d", id, pStat->name, vgId, ref); streamTaskCompleteCheckRsp(pInfo, true, id); - addDownstreamFailedStatusResultAsync(pTask->pMsgCb, vgId, pTask->id.streamId, pTask->id.taskId); + + // not record the failed of the current task if try to close current vnode + // otherwise, the put of message operation may incur invalid read of message queue. + if (!pMeta->closeFlag) { + addDownstreamFailedStatusResultAsync(pTask->pMsgCb, vgId, pTask->id.streamId, pTask->id.taskId); + } streamMetaReleaseTask(pMeta, pTask); return; } - if (state == TASK_STATUS__DROPPING || state == TASK_STATUS__READY || state == TASK_STATUS__PAUSE) { + if (state == TASK_STATUS__DROPPING || state == TASK_STATUS__READY) { int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); stDebug("s-task:%s status:%s vgId:%d quit from monitor check-rsp tmr, ref:%d", id, pStat->name, vgId, ref); @@ -698,31 +703,31 @@ void rspMonitorFn(void* param, void* tmrId) { if (pStat->state == TASK_STATUS__UNINIT) { getCheckRspStatus(pInfo, timeoutDuration, &numOfReady, &numOfFault, &numOfNotRsp, pTimeoutList, pNotReadyList, id); + + numOfNotReady = (int32_t)taosArrayGetSize(pNotReadyList); + numOfTimeout = (int32_t)taosArrayGetSize(pTimeoutList); + + // fault tasks detected, not try anymore + ASSERT((numOfReady + numOfFault + numOfNotReady + numOfTimeout + numOfNotRsp) == total); + if (numOfFault > 0) { + int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); + stDebug( + "s-task:%s status:%s vgId:%d all rsp. quit from monitor rsp tmr, since vnode-transfer/leader-change/restart " + "detected, total:%d, notRsp:%d, notReady:%d, fault:%d, timeout:%d, ready:%d ref:%d", + id, pStat->name, vgId, total, numOfNotRsp, numOfNotReady, numOfFault, numOfTimeout, numOfReady, ref); + + streamTaskCompleteCheckRsp(pInfo, false, id); + taosThreadMutexUnlock(&pInfo->checkInfoLock); + streamMetaReleaseTask(pMeta, pTask); + + taosArrayDestroy(pNotReadyList); + taosArrayDestroy(pTimeoutList); + return; + } } else { // unexpected status stError("s-task:%s unexpected task status:%s during waiting for check rsp", id, pStat->name); } - numOfNotReady = (int32_t)taosArrayGetSize(pNotReadyList); - numOfTimeout = (int32_t)taosArrayGetSize(pTimeoutList); - - // fault tasks detected, not try anymore - ASSERT((numOfReady + numOfFault + numOfNotReady + numOfTimeout + numOfNotRsp) == total); - if (numOfFault > 0) { - int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); - stDebug( - "s-task:%s status:%s vgId:%d all rsp. quit from monitor rsp tmr, since vnode-transfer/leader-change/restart " - "detected, total:%d, notRsp:%d, notReady:%d, fault:%d, timeout:%d, ready:%d ref:%d", - id, pStat->name, vgId, total, numOfNotRsp, numOfNotReady, numOfFault, numOfTimeout, numOfReady, ref); - - streamTaskCompleteCheckRsp(pInfo, false, id); - taosThreadMutexUnlock(&pInfo->checkInfoLock); - streamMetaReleaseTask(pMeta, pTask); - - taosArrayDestroy(pNotReadyList); - taosArrayDestroy(pTimeoutList); - return; - } - // checking of downstream tasks has been stopped by other threads if (pInfo->stopCheckProcess == 1) { int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index bc973f17d7..a3a7035905 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -57,6 +57,13 @@ SStreamDataBlock* createChkptTriggerBlock(SStreamTask* pTask, int32_t checkpoint pBlock->info.childId = pTask->info.selfChildId; pChkpoint->blocks = taosArrayInit(4, sizeof(SSDataBlock)); // pBlock; + if (pChkpoint->blocks == NULL) { + taosMemoryFree(pBlock); + taosFreeQitem(pChkpoint); + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + taosArrayPush(pChkpoint->blocks, pBlock); taosMemoryFree(pBlock); @@ -112,7 +119,12 @@ int32_t streamTaskProcessCheckpointTriggerRsp(SStreamTask* pTask, SCheckpointTri int32_t streamTaskSendCheckpointTriggerMsg(SStreamTask* pTask, int32_t dstTaskId, int32_t downstreamNodeId, SRpcHandleInfo* pRpcInfo, int32_t code) { int32_t size = sizeof(SMsgHead) + sizeof(SCheckpointTriggerRsp); - void* pBuf = rpcMallocCont(size); + + void* pBuf = rpcMallocCont(size); + if (pBuf == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return terrno; + } SCheckpointTriggerRsp* pRsp = POINTER_SHIFT(pBuf, sizeof(SMsgHead)); @@ -133,6 +145,7 @@ int32_t streamTaskSendCheckpointTriggerMsg(SStreamTask* pTask, int32_t dstTaskId SRpcMsg rspMsg = {.code = 0, .pCont = pBuf, .contLen = size, .info = *pRpcInfo}; tmsgSendRsp(&rspMsg); + return 0; } @@ -405,12 +418,14 @@ int32_t streamTaskProcessCheckpointReadyRsp(SStreamTask* pTask, int32_t upstream void streamTaskClearCheckInfo(SStreamTask* pTask, bool clearChkpReadyMsg) { pTask->chkInfo.startTs = 0; // clear the recorded start time - - streamTaskClearActiveInfo(pTask->chkInfo.pActiveInfo); streamTaskOpenAllUpstreamInput(pTask); // open inputQ for all upstream tasks + + taosThreadMutexLock(&pTask->chkInfo.pActiveInfo->lock); + streamTaskClearActiveInfo(pTask->chkInfo.pActiveInfo); if (clearChkpReadyMsg) { streamClearChkptReadyMsg(pTask->chkInfo.pActiveInfo); } + taosThreadMutexUnlock(&pTask->chkInfo.pActiveInfo->lock); } int32_t streamTaskUpdateTaskCheckpointInfo(SStreamTask* pTask, bool restored, SVUpdateCheckpointInfoReq* pReq) { @@ -447,14 +462,6 @@ int32_t streamTaskUpdateTaskCheckpointInfo(SStreamTask* pTask, bool restored, SV SStreamTaskState* pStatus = streamTaskGetStatus(pTask); - if (restored && (pStatus->state != TASK_STATUS__CK) && (pMeta->role == NODE_ROLE_LEADER)) { - stDebug("s-task:0x%x vgId:%d restored:%d status:%s not update checkpoint-info, checkpointId:%" PRId64 "->%" PRId64 - " failed", - pReq->taskId, vgId, restored, pStatus->name, pInfo->checkpointId, pReq->checkpointId); - taosThreadMutexUnlock(&pTask->lock); - return TSDB_CODE_STREAM_TASK_IVLD_STATUS; - } - if (!restored) { // during restore procedure, do update checkpoint-info stDebug("s-task:%s vgId:%d status:%s update the checkpoint-info during restore, checkpointId:%" PRId64 "->%" PRId64 " checkpointVer:%" PRId64 "->%" PRId64 " checkpointTs:%" PRId64 "->%" PRId64, @@ -528,65 +535,57 @@ void streamTaskSetFailedCheckpointId(SStreamTask* pTask) { } static int32_t getCheckpointDataMeta(const char* id, const char* path, SArray* list) { - char buf[128] = {0}; + int32_t code = 0; + int32_t cap = strlen(path) + 64; - char* file = taosMemoryCalloc(1, strlen(path) + 32); - sprintf(file, "%s%s%s", path, TD_DIRSEP, "META_TMP"); + char* filePath = taosMemoryCalloc(1, cap); + if (filePath == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } - int32_t code = downloadCheckpointDataByName(id, "META", file); + int32_t nBytes = snprintf(filePath, cap, "%s%s%s", path, TD_DIRSEP, "META_TMP"); + if (nBytes <= 0 || nBytes >= cap) { + taosMemoryFree(filePath); + return TSDB_CODE_OUT_OF_RANGE; + } + + code = downloadCheckpointDataByName(id, "META", filePath); if (code != 0) { - stDebug("%s chkp failed to download meta file:%s", id, file); - taosMemoryFree(file); + stError("%s chkp failed to download meta file:%s", id, filePath); + taosMemoryFree(filePath); return code; } - TdFilePtr pFile = taosOpenFile(file, TD_FILE_READ); - if (pFile == NULL) { - stError("%s failed to open meta file:%s for checkpoint", id, file); - code = -1; - return code; + code = remoteChkpGetDelFile(filePath, list); + if (code != 0) { + stError("%s chkp failed to get to del:%s", id, filePath); + taosMemoryFree(filePath); } - - if (taosReadFile(pFile, buf, sizeof(buf)) <= 0) { - stError("%s failed to read meta file:%s for checkpoint", id, file); - code = -1; - } else { - int32_t len = strnlen(buf, tListLen(buf)); - for (int i = 0; i < len; i++) { - if (buf[i] == '\n') { - char* item = taosMemoryCalloc(1, i + 1); - memcpy(item, buf, i); - taosArrayPush(list, &item); - - item = taosMemoryCalloc(1, len - i); - memcpy(item, buf + i + 1, len - i - 1); - taosArrayPush(list, &item); - } - } - } - - taosCloseFile(&pFile); - taosRemoveFile(file); - taosMemoryFree(file); - return code; + return 0; } int32_t uploadCheckpointData(SStreamTask* pTask, int64_t checkpointId, int64_t dbRefId, ECHECKPOINT_BACKUP_TYPE type) { - char* path = NULL; - int32_t code = 0; - SArray* toDelFiles = taosArrayInit(4, POINTER_BYTES); - int64_t now = taosGetTimestampMs(); + int32_t code = 0; + char* path = NULL; + SStreamMeta* pMeta = pTask->pMeta; const char* idStr = pTask->id.idStr; + int64_t now = taosGetTimestampMs(); + + SArray* toDelFiles = taosArrayInit(4, POINTER_BYTES); + if (toDelFiles == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } if ((code = taskDbGenChkpUploadData(pTask->pBackend, pMeta->bkdChkptMgt, checkpointId, type, &path, toDelFiles, pTask->id.idStr)) != 0) { - stError("s-task:%s failed to gen upload checkpoint:%" PRId64, idStr, checkpointId); + stError("s-task:%s failed to gen upload checkpoint:%" PRId64 ", reason:%s", idStr, checkpointId, tstrerror(code)); } if (type == DATA_UPLOAD_S3) { if (code == TSDB_CODE_SUCCESS && (code = getCheckpointDataMeta(idStr, path, toDelFiles)) != 0) { - stError("s-task:%s failed to get checkpointData for checkpointId:%" PRId64 " meta", idStr, checkpointId); + stError("s-task:%s failed to get checkpointData for checkpointId:%" PRId64 ", reason:%s", idStr, checkpointId, + tstrerror(code)); } } @@ -595,7 +594,8 @@ int32_t uploadCheckpointData(SStreamTask* pTask, int64_t checkpointId, int64_t d if (code == TSDB_CODE_SUCCESS) { stDebug("s-task:%s upload checkpointId:%" PRId64 " to remote succ", idStr, checkpointId); } else { - stError("s-task:%s failed to upload checkpointId:%" PRId64 " data:%s", idStr, checkpointId, path); + stError("s-task:%s failed to upload checkpointId:%" PRId64 " path:%s,reason:%s", idStr, checkpointId, path, + tstrerror(code)); } } @@ -668,7 +668,8 @@ int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) { if (pTask->info.taskLevel != TASK_LEVEL__SINK) { stDebug("s-task:%s level:%d start gen checkpoint, checkpointId:%" PRId64, id, pTask->info.taskLevel, ckId); - code = streamBackendDoCheckpoint(pTask->pBackend, ckId); + int64_t ver = pTask->chkInfo.processedVer; + code = streamBackendDoCheckpoint(pTask->pBackend, ckId, ver); if (code != TSDB_CODE_SUCCESS) { stError("s-task:%s gen checkpoint:%" PRId64 " failed, code:%s", id, ckId, tstrerror(terrno)); } @@ -776,6 +777,11 @@ void checkpointTriggerMonitorFn(void* param, void* tmrId) { SArray* pList = pTask->upstreamInfo.pList; ASSERT(pTask->info.taskLevel > TASK_LEVEL__SOURCE); SArray* pNotSendList = taosArrayInit(4, sizeof(SStreamUpstreamEpInfo)); + if (pNotSendList == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + stDebug("s-task:%s start to triggerMonitor, reason:%s", id, tstrerror(terrno)); + return; + } for (int32_t i = 0; i < taosArrayGetSize(pList); ++i) { SStreamUpstreamEpInfo* pInfo = taosArrayGetP(pList, i); @@ -982,52 +988,77 @@ void streamTaskSetTriggerDispatchConfirmed(SStreamTask* pTask, int32_t vgId) { } static int32_t uploadCheckpointToS3(const char* id, const char* path) { + int32_t code = 0; + int32_t nBytes = 0; + + if (s3Init() != 0) { + return TSDB_CODE_THIRDPARTY_ERROR; + } + TdDirPtr pDir = taosOpenDir(path); - if (pDir == NULL) return -1; + if (pDir == NULL) { + return TAOS_SYSTEM_ERROR(errno); + } TdDirEntryPtr de = NULL; - s3Init(); while ((de = taosReadDir(pDir)) != NULL) { char* name = taosGetDirEntryName(de); if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0 || taosDirEntryIsDir(de)) continue; char filename[PATH_MAX] = {0}; if (path[strlen(path) - 1] == TD_DIRSEP_CHAR) { - snprintf(filename, sizeof(filename), "%s%s", path, name); + nBytes = snprintf(filename, sizeof(filename), "%s%s", path, name); + if (nBytes <= 0 || nBytes >= sizeof(filename)) { + code = TSDB_CODE_OUT_OF_RANGE; + break; + } } else { - snprintf(filename, sizeof(filename), "%s%s%s", path, TD_DIRSEP, name); + nBytes = snprintf(filename, sizeof(filename), "%s%s%s", path, TD_DIRSEP, name); + if (nBytes <= 0 || nBytes >= sizeof(filename)) { + code = TSDB_CODE_OUT_OF_RANGE; + break; + } } char object[PATH_MAX] = {0}; - snprintf(object, sizeof(object), "%s%s%s", id, TD_DIRSEP, name); - - if (s3PutObjectFromFile2(filename, object, 0) != 0) { - taosCloseDir(&pDir); - return -1; + nBytes = snprintf(object, sizeof(object), "%s%s%s", id, TD_DIRSEP, name); + if (nBytes <= 0 || nBytes >= sizeof(object)) { + code = TSDB_CODE_OUT_OF_RANGE; + break; } - stDebug("[s3] upload checkpoint:%s", filename); - // break; - } + code = s3PutObjectFromFile2(filename, object, 0); + if (code != 0) { + stError("[s3] failed to upload checkpoint:%s, reason:%s", filename, tstrerror(code)); + } else { + stDebug("[s3] upload checkpoint:%s", filename); + } + } taosCloseDir(&pDir); - return 0; + return code; } int32_t downloadCheckpointByNameS3(const char* id, const char* fname, const char* dstName) { - int32_t code = 0; - char* buf = taosMemoryCalloc(1, strlen(id) + strlen(dstName) + 4); + int32_t nBytes; + int32_t cap = strlen(id) + strlen(dstName) + 16; + + char* buf = taosMemoryCalloc(1, cap); if (buf == NULL) { - code = terrno = TSDB_CODE_OUT_OF_MEMORY; - return code; + return TSDB_CODE_OUT_OF_MEMORY; } - sprintf(buf, "%s/%s", id, fname); - if (s3GetObjectToFile(buf, dstName) != 0) { - code = errno; + nBytes = snprintf(buf, cap, "%s/%s", id, fname); + if (nBytes <= 0 || nBytes >= cap) { + taosMemoryFree(buf); + return TSDB_CODE_OUT_OF_RANGE; + } + int32_t code = s3GetObjectToFile(buf, dstName); + if (code != 0) { + taosMemoryFree(buf); + return TAOS_SYSTEM_ERROR(errno); } - taosMemoryFree(buf); - return code; + return 0; } ECHECKPOINT_BACKUP_TYPE streamGetCheckpointBackupType() { @@ -1041,13 +1072,17 @@ ECHECKPOINT_BACKUP_TYPE streamGetCheckpointBackupType() { } int32_t streamTaskUploadCheckpoint(const char* id, const char* path) { + int32_t code = 0; if (id == NULL || path == NULL || strlen(id) == 0 || strlen(path) == 0 || strlen(path) >= PATH_MAX) { stError("invalid parameters in upload checkpoint, %s", id); - return -1; + return TSDB_CODE_INVALID_CFG; } if (strlen(tsSnodeAddress) != 0) { - return uploadByRsync(id, path); + code = uploadByRsync(id, path); + if (code != 0) { + return TAOS_SYSTEM_ERROR(errno); + } } else if (tsS3StreamEnabled) { return uploadCheckpointToS3(id, path); } @@ -1059,7 +1094,7 @@ int32_t streamTaskUploadCheckpoint(const char* id, const char* path) { int32_t downloadCheckpointDataByName(const char* id, const char* fname, const char* dstName) { if (id == NULL || fname == NULL || strlen(id) == 0 || strlen(fname) == 0 || strlen(fname) >= PATH_MAX) { stError("down load checkpoint data parameters invalid"); - return -1; + return TSDB_CODE_INVALID_PARA; } if (strlen(tsSnodeAddress) != 0) { @@ -1089,7 +1124,7 @@ int32_t streamTaskDownloadCheckpointData(const char* id, char* path) { int32_t deleteCheckpoint(const char* id) { if (id == NULL || strlen(id) == 0) { stError("deleteCheckpoint parameters invalid"); - return -1; + return TSDB_CODE_INVALID_PARA; } if (strlen(tsSnodeAddress) != 0) { return deleteRsync(id); @@ -1101,11 +1136,18 @@ int32_t deleteCheckpoint(const char* id) { int32_t deleteCheckpointFile(const char* id, const char* name) { char object[128] = {0}; - snprintf(object, sizeof(object), "%s/%s", id, name); - char* tmp = object; - s3DeleteObjects((const char**)&tmp, 1); - return 0; + int32_t nBytes = snprintf(object, sizeof(object), "%s/%s", id, name); + if (nBytes <= 0 || nBytes >= sizeof(object)) { + return TSDB_CODE_OUT_OF_RANGE; + } + + char* tmp = object; + int32_t code = s3DeleteObjects((const char**)&tmp, 1); + if (code != 0) { + return TSDB_CODE_THIRDPARTY_ERROR; + } + return code; } int32_t streamTaskSendRestoreChkptMsg(SStreamTask* pTask) { @@ -1115,8 +1157,20 @@ int32_t streamTaskSendRestoreChkptMsg(SStreamTask* pTask) { const char* id = pTask->id.idStr; SCheckpointInfo* pInfo = &pTask->chkInfo; - ASSERT(pTask->pBackend == NULL); + taosThreadMutexLock(&pTask->lock); + if (pTask->status.sendConsensusChkptId == true) { + stDebug("s-task:%s already start to consensus-checkpointId, not start again before it completed", id); + taosThreadMutexUnlock(&pTask->lock); + return TSDB_CODE_SUCCESS; + } else { + pTask->status.sendConsensusChkptId = true; + } + taosThreadMutexUnlock(&pTask->lock); + + ASSERT(pTask->pBackend == NULL); + pTask->status.requireConsensusChkptId = true; +#if 0 SRestoreCheckpointInfo req = { .streamId = pTask->id.streamId, .taskId = pTask->id.taskId, @@ -1128,14 +1182,14 @@ int32_t streamTaskSendRestoreChkptMsg(SStreamTask* pTask) { tEncodeSize(tEncodeRestoreCheckpointInfo, &req, tlen, code); if (code < 0) { stError("s-task:%s vgId:%d encode stream task latest-checkpoint-id failed, code:%s", id, vgId, tstrerror(code)); - return -1; + return TSDB_CODE_INVALID_MSG; } void* buf = rpcMallocCont(tlen); if (buf == NULL) { stError("s-task:%s vgId:%d encode stream task latest-checkpoint-id msg failed, code:%s", id, vgId, tstrerror(TSDB_CODE_OUT_OF_MEMORY)); - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } SEncoder encoder; @@ -1148,10 +1202,27 @@ int32_t streamTaskSendRestoreChkptMsg(SStreamTask* pTask) { tEncoderClear(&encoder); SRpcMsg msg = {0}; - initRpcMsg(&msg, TDMT_MND_STREAM_CHKPT_CONSEN, buf, tlen); + initRpcMsg(&msg, TDMT_MND_STREAM_REQ_CONSEN_CHKPT, buf, tlen); stDebug("s-task:%s vgId:%d send latest checkpointId:%" PRId64 " to mnode to get the consensus checkpointId", id, vgId, pInfo->checkpointId); tmsgSendReq(&pTask->info.mnodeEpset, &msg); +#endif return 0; +} + +int32_t streamTaskSendPreparedCheckpointsourceRsp(SStreamTask* pTask) { + int32_t code = 0; + if (pTask->info.taskLevel != TASK_LEVEL__SOURCE) { + return code; + } + + taosThreadMutexLock(&pTask->lock); + SStreamTaskState* p = streamTaskGetStatus(pTask); + if (p->state == TASK_STATUS__CK) { + code = streamTaskSendCheckpointSourceRsp(pTask); + } + taosThreadMutexUnlock(&pTask->lock); + + return code; } \ No newline at end of file diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index 83e73e8c88..617adaa016 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -813,9 +813,20 @@ static void checkpointReadyMsgSendMonitorFn(void* param, void* tmrId) { taosThreadMutexLock(&pActiveInfo->lock); SArray* pList = pActiveInfo->pReadyMsgList; + int32_t num = taosArrayGetSize(pList); + + // active checkpoint info is cleared for now + if ((pActiveInfo->activeId == 0) && (pActiveInfo->transId == 0) && (num == 0) && (pTask->chkInfo.startTs == 0)) { + taosThreadMutexUnlock(&pActiveInfo->lock); + int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); + stWarn("s-task:%s vgId:%d active checkpoint may be cleared, quit from readyMsg send tmr, ref:%d", id, vgId, ref); + + streamMetaReleaseTask(pTask->pMeta, pTask); + return; + } + SArray* pNotRspList = taosArrayInit(4, sizeof(int32_t)); - int32_t num = taosArrayGetSize(pList); ASSERT(taosArrayGetSize(pTask->upstreamInfo.pList) == num); for (int32_t i = 0; i < num; ++i) { diff --git a/source/libs/stream/src/streamHb.c b/source/libs/stream/src/streamHb.c index d6411e25f2..16cb23de10 100644 --- a/source/libs/stream/src/streamHb.c +++ b/source/libs/stream/src/streamHb.c @@ -168,7 +168,9 @@ int32_t streamMetaSendHbHelper(SStreamMeta* pMeta) { continue; } + taosThreadMutexLock(&(*pTask)->lock); STaskStatusEntry entry = streamTaskGetStatusEntry(*pTask); + taosThreadMutexUnlock(&(*pTask)->lock); entry.inputRate = entry.inputQUsed * 100.0 / (2 * STREAM_TASK_QUEUE_CAPACITY_IN_SIZE); if ((*pTask)->info.taskLevel == TASK_LEVEL__SINK) { @@ -192,6 +194,12 @@ int32_t streamMetaSendHbHelper(SStreamMeta* pMeta) { } } + if ((*pTask)->status.requireConsensusChkptId) { + entry.checkpointInfo.consensusChkptId = 1; + (*pTask)->status.requireConsensusChkptId = false; + stDebug("s-task:%s vgId:%d set the require consensus-checkpointId in hbMsg", (*pTask)->id.idStr, pMeta->vgId); + } + if ((*pTask)->exec.pWalReader != NULL) { entry.processedVer = walReaderGetCurrentVer((*pTask)->exec.pWalReader) - 1; if (entry.processedVer < 0) { @@ -324,7 +332,7 @@ int32_t streamProcessHeartbeatRsp(SStreamMeta* pMeta, SMStreamHbRspMsg* pRsp) { stDebug("vgId:%d process hbMsg rsp, msgId:%d rsp confirmed", pMeta->vgId, pRsp->msgId); SMetaHbInfo* pInfo = pMeta->pHbInfo; - streamMetaRLock(pMeta); + streamMetaWLock(pMeta); // current waiting rsp recved if (pRsp->msgId == pInfo->hbCount) { @@ -337,6 +345,6 @@ int32_t streamProcessHeartbeatRsp(SStreamMeta* pMeta, SMStreamHbRspMsg* pRsp) { stWarn("vgId:%d recv expired hb rsp, msgId:%d, discarded", pMeta->vgId, pRsp->msgId); } - streamMetaRUnLock(pMeta); + streamMetaWUnLock(pMeta); return TSDB_CODE_SUCCESS; } \ No newline at end of file diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 7b94f642e2..fa3a3ea07d 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -182,9 +182,10 @@ int32_t streamMetaCheckBackendCompatible(SStreamMeta* pMeta) { int32_t streamMetaCvtDbFormat(SStreamMeta* pMeta) { int32_t code = 0; int64_t chkpId = streamMetaGetLatestCheckpointId(pMeta); - - bool exist = streamBackendDataIsExist(pMeta->path, chkpId, pMeta->vgId); + terrno = 0; + bool exist = streamBackendDataIsExist(pMeta->path, chkpId); if (exist == false) { + code = terrno; return code; } @@ -252,8 +253,9 @@ int32_t streamTaskSetDb(SStreamMeta* pMeta, SStreamTask* pTask, const char* key) } STaskDbWrapper* pBackend = NULL; + int64_t processVer = -1; while (1) { - pBackend = taskDbOpen(pMeta->path, key, chkpId); + pBackend = taskDbOpen(pMeta->path, key, chkpId, &processVer); if (pBackend != NULL) { break; } @@ -271,6 +273,8 @@ int32_t streamTaskSetDb(SStreamMeta* pMeta, SStreamTask* pTask, const char* key) pBackend->pTask = pTask; pBackend->pMeta = pMeta; + if (processVer != -1) pTask->chkInfo.processedVer = processVer; + taosHashPut(pMeta->pTaskDbUnique, key, strlen(key), &pBackend, sizeof(void*)); taosThreadMutexUnlock(&pMeta->backendMutex); @@ -308,7 +312,8 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskBuild buildTas } if (streamMetaMayCvtDbFormat(pMeta) < 0) { - stError("vgId:%d convert sub info format failed, open stream meta failed", pMeta->vgId); + stError("vgId:%d convert sub info format failed, open stream meta failed, reason: %s", pMeta->vgId, + tstrerror(terrno)); goto _err; } @@ -393,6 +398,9 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskBuild buildTas pMeta->qHandle = taosInitScheduler(32, 1, "stream-chkp", NULL); pMeta->bkdChkptMgt = bkdMgtCreate(tpath); + if (pMeta->bkdChkptMgt == NULL) { + goto _err; + } taosThreadMutexInit(&pMeta->backendMutex, NULL); return pMeta; @@ -408,9 +416,10 @@ _err: if (pMeta->updateInfo.pTasks) taosHashCleanup(pMeta->updateInfo.pTasks); if (pMeta->startInfo.pReadyTaskSet) taosHashCleanup(pMeta->startInfo.pReadyTaskSet); if (pMeta->startInfo.pFailedTaskSet) taosHashCleanup(pMeta->startInfo.pFailedTaskSet); + if (pMeta->bkdChkptMgt) bkdMgtDestroy(pMeta->bkdChkptMgt); taosMemoryFree(pMeta); - stError("failed to open stream meta"); + stError("failed to open stream meta, reason:%s", tstrerror(terrno)); return NULL; } @@ -900,7 +909,7 @@ void streamMetaLoadAllTasks(SStreamMeta* pMeta) { if (p == NULL) { code = pMeta->buildTaskFn(pMeta->ahandle, pTask, pTask->chkInfo.checkpointVer + 1); if (code < 0) { - stError("failed to load s-task:0x%"PRIx64", code:%s, continue", id.taskId, tstrerror(terrno)); + stError("failed to load s-task:0x%" PRIx64 ", code:%s, continue", id.taskId, tstrerror(terrno)); tFreeStreamTask(pTask); continue; } @@ -985,7 +994,7 @@ void streamMetaNotifyClose(SStreamMeta* pMeta) { streamMetaGetHbSendInfo(pMeta->pHbInfo, &startTs, &sendCount); stInfo("vgId:%d notify all stream tasks that current vnode is closing. isLeader:%d startHb:%" PRId64 ", totalHb:%d", - vgId, (pMeta->role == NODE_ROLE_LEADER), startTs, sendCount); + vgId, (pMeta->role == NODE_ROLE_LEADER), startTs, sendCount); // wait for the stream meta hb function stopping streamMetaWaitForHbTmrQuit(pMeta); @@ -1031,10 +1040,11 @@ void streamMetaResetStartInfo(STaskStartInfo* pStartInfo, int32_t vgId) { taosHashClear(pStartInfo->pFailedTaskSet); pStartInfo->tasksWillRestart = 0; pStartInfo->readyTs = 0; + pStartInfo->elapsedTime = 0; // reset the sentinel flag value to be 0 pStartInfo->startAllTasks = 0; - stDebug("vgId:%d clear all start-all-task info", vgId); + stDebug("vgId:%d clear start-all-task info", vgId); } void streamMetaRLock(SStreamMeta* pMeta) { @@ -1170,7 +1180,7 @@ int32_t streamMetaStartAllTasks(SStreamMeta* pMeta) { int64_t now = taosGetTimestampMs(); int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); - stInfo("vgId:%d start to consensus checkpointId for all %d task(s), start ts:%"PRId64, vgId, numOfTasks, now); + stInfo("vgId:%d start to consensus checkpointId for all %d task(s), start ts:%" PRId64, vgId, numOfTasks, now); if (numOfTasks == 0) { stInfo("vgId:%d no tasks exist, quit from consensus checkpointId", pMeta->vgId); @@ -1198,7 +1208,7 @@ int32_t streamMetaStartAllTasks(SStreamMeta* pMeta) { continue; } - if ((pTask->pBackend == NULL) && (pTask->info.fillHistory == 1 || HAS_RELATED_FILLHISTORY_TASK(pTask))) { + if ((pTask->pBackend == NULL) && ((pTask->info.fillHistory == 1) || HAS_RELATED_FILLHISTORY_TASK(pTask))) { code = pMeta->expandTaskFn(pTask); if (code != TSDB_CODE_SUCCESS) { stError("s-task:0x%x vgId:%d failed to expand stream backend", pTaskId->taskId, vgId); @@ -1392,17 +1402,24 @@ int32_t streamMetaAddTaskLaunchResult(SStreamMeta* pMeta, int64_t streamId, int3 streamMetaWLock(pMeta); - if (pStartInfo->startAllTasks != 1) { - int64_t el = endTs - startTs; - stDebug("vgId:%d not start all task(s), not record status, s-task:0x%x launch succ:%d elapsed time:%" PRId64 "ms", - pMeta->vgId, taskId, ready, el); + SStreamTask** p = taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); + if (p == NULL) { // task does not exists in current vnode, not record the complete info + stError("vgId:%d s-task:0x%x not exists discard the check downstream info", pMeta->vgId, taskId); streamMetaWUnLock(pMeta); return 0; } - void* p = taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); - if (p == NULL) { // task does not exists in current vnode, not record the complete info - stError("vgId:%d s-task:0x%x not exists discard the check downstream info", pMeta->vgId, taskId); + // clear the send consensus-checkpointId flag + taosThreadMutexLock(&(*p)->lock); + (*p)->status.sendConsensusChkptId = false; + taosThreadMutexUnlock(&(*p)->lock); + + if (pStartInfo->startAllTasks != 1) { + int64_t el = endTs - startTs; + stDebug( + "vgId:%d not in start all task(s) process, not record launch result status, s-task:0x%x launch succ:%d elapsed " + "time:%" PRId64 "ms", + pMeta->vgId, taskId, ready, el); streamMetaWUnLock(pMeta); return 0; } diff --git a/source/libs/stream/src/streamMsg.c b/source/libs/stream/src/streamMsg.c index e0435156e2..40582b5144 100644 --- a/source/libs/stream/src/streamMsg.c +++ b/source/libs/stream/src/streamMsg.c @@ -349,6 +349,8 @@ int32_t tEncodeStreamHbMsg(SEncoder* pEncoder, const SStreamHbMsg* pReq) { if (tEncodeI64(pEncoder, ps->checkpointInfo.latestTime) < 0) return -1; if (tEncodeI64(pEncoder, ps->checkpointInfo.latestSize) < 0) return -1; if (tEncodeI8(pEncoder, ps->checkpointInfo.remoteBackup) < 0) return -1; + if (tEncodeI8(pEncoder, ps->checkpointInfo.consensusChkptId) < 0) return -1; + if (tEncodeI64(pEncoder, ps->checkpointInfo.consensusTs) < 0) return -1; if (tEncodeI64(pEncoder, ps->startTime) < 0) return -1; if (tEncodeI64(pEncoder, ps->startCheckpointId) < 0) return -1; if (tEncodeI64(pEncoder, ps->startCheckpointVer) < 0) return -1; @@ -403,6 +405,8 @@ int32_t tDecodeStreamHbMsg(SDecoder* pDecoder, SStreamHbMsg* pReq) { if (tDecodeI64(pDecoder, &entry.checkpointInfo.latestTime) < 0) return -1; if (tDecodeI64(pDecoder, &entry.checkpointInfo.latestSize) < 0) return -1; if (tDecodeI8(pDecoder, &entry.checkpointInfo.remoteBackup) < 0) return -1; + if (tDecodeI8(pDecoder, &entry.checkpointInfo.consensusChkptId) < 0) return -1; + if (tDecodeI64(pDecoder, &entry.checkpointInfo.consensusTs) < 0) return -1; if (tDecodeI64(pDecoder, &entry.startTime) < 0) return -1; if (tDecodeI64(pDecoder, &entry.startCheckpointId) < 0) return -1; if (tDecodeI64(pDecoder, &entry.startCheckpointVer) < 0) return -1; @@ -634,6 +638,7 @@ int32_t tEncodeRestoreCheckpointInfo (SEncoder* pEncoder, const SRestoreCheckpoi if (tEncodeI64(pEncoder, pReq->startTs) < 0) return -1; if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1; if (tEncodeI64(pEncoder, pReq->checkpointId) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->transId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->taskId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->nodeId) < 0) return -1; tEndEncode(pEncoder); @@ -645,28 +650,9 @@ int32_t tDecodeRestoreCheckpointInfo(SDecoder* pDecoder, SRestoreCheckpointInfo* if (tDecodeI64(pDecoder, &pReq->startTs) < 0) return -1; if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1; if (tDecodeI64(pDecoder, &pReq->checkpointId) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->transId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->taskId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->nodeId) < 0) return -1; tEndDecode(pDecoder); return 0; } - -int32_t tEncodeRestoreCheckpointInfoRsp(SEncoder* pCoder, const SRestoreCheckpointInfoRsp* pInfo) { - if (tStartEncode(pCoder) < 0) return -1; - if (tEncodeI64(pCoder, pInfo->startTs) < 0) return -1; - if (tEncodeI64(pCoder, pInfo->streamId) < 0) return -1; - if (tEncodeI32(pCoder, pInfo->taskId) < 0) return -1; - if (tEncodeI64(pCoder, pInfo->checkpointId) < 0) return -1; - tEndEncode(pCoder); - return 0; -} - -int32_t tDecodeRestoreCheckpointInfoRsp(SDecoder* pCoder, SRestoreCheckpointInfoRsp* pInfo) { - if (tStartDecode(pCoder) < 0) return -1; - if (tDecodeI64(pCoder, &pInfo->startTs) < 0) return -1; - if (tDecodeI64(pCoder, &pInfo->streamId) < 0) return -1; - if (tDecodeI32(pCoder, &pInfo->taskId) < 0) return -1; - if (tDecodeI64(pCoder, &pInfo->checkpointId) < 0) return -1; - tEndDecode(pCoder); - return 0; -} \ No newline at end of file diff --git a/source/libs/stream/src/streamSnapshot.c b/source/libs/stream/src/streamSnapshot.c index 0871ff5eb7..02e4ed8d8b 100644 --- a/source/libs/stream/src/streamSnapshot.c +++ b/source/libs/stream/src/streamSnapshot.c @@ -24,6 +24,7 @@ enum SBackendFileType { ROCKSDB_SST_TYPE = 3, ROCKSDB_CURRENT_TYPE = 4, ROCKSDB_CHECKPOINT_META_TYPE = 5, + ROCKSDB_CHECKPOINT_SELFCHECK_TYPE = 6, }; typedef struct SBackendFileItem { @@ -49,6 +50,7 @@ typedef struct SBackendSnapFiles2 { char* pOptions; SArray* pSst; char* pCheckpointMeta; + char* pCheckpointSelfcheck; char* path; int64_t checkpointId; @@ -111,6 +113,7 @@ const char* ROCKSDB_MAINFEST = "MANIFEST"; const char* ROCKSDB_SST = "sst"; const char* ROCKSDB_CURRENT = "CURRENT"; const char* ROCKSDB_CHECKPOINT_META = "CHECKPOINT"; +const char* ROCKSDB_CHECKPOINT_SELF_CHECK = "info"; static int64_t kBlockSize = 64 * 1024; int32_t streamSnapHandleInit(SStreamSnapHandle* handle, char* path, void* pMeta); @@ -127,6 +130,7 @@ int32_t streamGetFileSize(char* path, char* name, int64_t* sz) { int32_t ret = 0; char* fullname = taosMemoryCalloc(1, strlen(path) + 32); + sprintf(fullname, "%s%s%s", path, TD_DIRSEP, name); ret = taosStatFile(fullname, sz, NULL, NULL); @@ -148,8 +152,20 @@ int32_t streamDestroyTaskDbSnapInfo(void* arg, SArray* snap) { return taskDbDest void snapFileDebugInfo(SBackendSnapFile2* pSnapFile) { if (qDebugFlag & DEBUG_DEBUG) { - char* buf = taosMemoryCalloc(1, 512); - sprintf(buf + strlen(buf), "["); + int16_t cap = 512; + + char* buf = taosMemoryCalloc(1, cap); + if (buf == NULL) { + stError("%s failed to alloc memory, reason:%s", STREAM_STATE_TRANSFER, tstrerror(TSDB_CODE_OUT_OF_MEMORY)); + return; + } + + int32_t nBytes = snprintf(buf + strlen(buf), cap, "["); + if (nBytes <= 0 || nBytes >= cap) { + taosMemoryFree(buf); + stError("%s failed to write buf, reason:%s", STREAM_STATE_TRANSFER, tstrerror(TSDB_CODE_OUT_OF_RANGE)); + return; + } if (pSnapFile->pCurrent) sprintf(buf, "current: %s,", pSnapFile->pCurrent); if (pSnapFile->pMainfest) sprintf(buf + strlen(buf), "MANIFEST: %s,", pSnapFile->pMainfest); @@ -157,10 +173,10 @@ void snapFileDebugInfo(SBackendSnapFile2* pSnapFile) { if (pSnapFile->pSst) { for (int32_t i = 0; i < taosArrayGetSize(pSnapFile->pSst); i++) { char* name = taosArrayGetP(pSnapFile->pSst, i); - sprintf(buf + strlen(buf), "%s,", name); + if (strlen(buf) + strlen(name) < cap) sprintf(buf + strlen(buf), "%s,", name); } } - sprintf(buf + strlen(buf) - 1, "]"); + if ((strlen(buf)) < cap) sprintf(buf + strlen(buf) - 1, "]"); stInfo("%s %" PRId64 "-%" PRId64 " get file list: %s", STREAM_STATE_TRANSFER, pSnapFile->snapInfo.streamId, pSnapFile->snapInfo.taskId, buf); @@ -199,16 +215,25 @@ int32_t snapFileGenMeta(SBackendSnapFile2* pSnapFile) { // meta item.name = pSnapFile->pCheckpointMeta; item.type = ROCKSDB_CHECKPOINT_META_TYPE; + if (streamGetFileSize(pSnapFile->path, item.name, &item.size) == 0) { + taosArrayPush(pSnapFile->pFileList, &item); + } + + item.name = pSnapFile->pCheckpointSelfcheck; + item.type = ROCKSDB_CHECKPOINT_SELFCHECK_TYPE; + if (streamGetFileSize(pSnapFile->path, item.name, &item.size) == 0) { taosArrayPush(pSnapFile->pFileList, &item); } return 0; } int32_t snapFileReadMeta(SBackendSnapFile2* pSnapFile) { + int32_t code = 0; TdDirPtr pDir = taosOpenDir(pSnapFile->path); if (NULL == pDir) { - stError("%s failed to open %s", STREAM_STATE_TRANSFER, pSnapFile->path); - return -1; + code = TAOS_SYSTEM_ERROR(errno); + stError("%s failed to open %s, reason:%s", STREAM_STATE_TRANSFER, pSnapFile->path, tstrerror(code)); + return code; } TdDirEntryPtr pDirEntry; @@ -216,43 +241,88 @@ int32_t snapFileReadMeta(SBackendSnapFile2* pSnapFile) { char* name = taosGetDirEntryName(pDirEntry); if (strlen(name) >= strlen(ROCKSDB_CURRENT) && 0 == strncmp(name, ROCKSDB_CURRENT, strlen(ROCKSDB_CURRENT))) { pSnapFile->pCurrent = taosStrdup(name); + if (pSnapFile->pCurrent == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + break; + } continue; } if (strlen(name) >= strlen(ROCKSDB_MAINFEST) && 0 == strncmp(name, ROCKSDB_MAINFEST, strlen(ROCKSDB_MAINFEST))) { pSnapFile->pMainfest = taosStrdup(name); + if (pSnapFile->pMainfest == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + break; + } continue; } if (strlen(name) >= strlen(ROCKSDB_OPTIONS) && 0 == strncmp(name, ROCKSDB_OPTIONS, strlen(ROCKSDB_OPTIONS))) { pSnapFile->pOptions = taosStrdup(name); + if (pSnapFile->pOptions == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + break; + } continue; } if (strlen(name) >= strlen(ROCKSDB_CHECKPOINT_META) && 0 == strncmp(name, ROCKSDB_CHECKPOINT_META, strlen(ROCKSDB_CHECKPOINT_META))) { pSnapFile->pCheckpointMeta = taosStrdup(name); + if (pSnapFile->pCheckpointMeta == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + break; + } + continue; + } + if (strlen(name) >= strlen(ROCKSDB_CHECKPOINT_SELF_CHECK) && + 0 == strncmp(name, ROCKSDB_CHECKPOINT_SELF_CHECK, strlen(ROCKSDB_CHECKPOINT_SELF_CHECK))) { + pSnapFile->pCheckpointSelfcheck = taosStrdup(name); + if (pSnapFile->pCheckpointSelfcheck == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + break; + } continue; } if (strlen(name) >= strlen(ROCKSDB_SST) && 0 == strncmp(name + strlen(name) - strlen(ROCKSDB_SST), ROCKSDB_SST, strlen(ROCKSDB_SST))) { char* sst = taosStrdup(name); + if (sst == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + break; + } taosArrayPush(pSnapFile->pSst, &sst); } } taosCloseDir(&pDir); - return 0; + return code; } int32_t streamBackendSnapInitFile(char* metaPath, SStreamTaskSnap* pSnap, SBackendSnapFile2* pSnapFile) { - int32_t code = -1; + int32_t code = 0; + int32_t nBytes = 0; + int32_t cap = strlen(pSnap->dbPrefixPath) + 256; + + char* path = taosMemoryCalloc(1, cap); + if (path == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + nBytes = snprintf(path, cap, "%s%s%s%s%s%" PRId64 "", pSnap->dbPrefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, + "checkpoint", pSnap->chkpId); + if (nBytes <= 0 || nBytes >= cap) { + code = TSDB_CODE_OUT_OF_RANGE; + goto _ERROR; + } - char* path = taosMemoryCalloc(1, strlen(pSnap->dbPrefixPath) + 256); - // char idstr[64] = {0}; - sprintf(path, "%s%s%s%s%s%" PRId64 "", pSnap->dbPrefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", - pSnap->chkpId); if (!taosIsDir(path)) { + code = TSDB_CODE_INVALID_MSG; goto _ERROR; } pSnapFile->pSst = taosArrayInit(16, sizeof(void*)); pSnapFile->pFileList = taosArrayInit(64, sizeof(SBackendFileItem)); + if (pSnapFile->pSst == NULL || pSnapFile->pFileList == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _ERROR; + } + pSnapFile->path = path; pSnapFile->snapInfo = *pSnap; if ((code = snapFileReadMeta(pSnapFile)) != 0) { @@ -264,7 +334,6 @@ int32_t streamBackendSnapInitFile(char* metaPath, SStreamTaskSnap* pSnap, SBacke snapFileDebugInfo(pSnapFile); path = NULL; - code = 0; _ERROR: taosMemoryFree(path); @@ -276,6 +345,7 @@ void snapFileDestroy(SBackendSnapFile2* pSnap) { taosMemoryFree(pSnap->pMainfest); taosMemoryFree(pSnap->pOptions); taosMemoryFree(pSnap->path); + taosMemoryFree(pSnap->pCheckpointSelfcheck); for (int32_t i = 0; i < taosArrayGetSize(pSnap->pSst); i++) { char* sst = taosArrayGetP(pSnap->pSst, i); taosMemoryFree(sst); @@ -295,14 +365,25 @@ void snapFileDestroy(SBackendSnapFile2* pSnap) { } int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, void* pMeta) { // impl later + int32_t code = 0; SArray* pSnapInfoSet = taosArrayInit(4, sizeof(SStreamTaskSnap)); - int32_t code = streamCreateTaskDbSnapInfo(pMeta, path, pSnapInfoSet); + if (pSnapInfoSet == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + code = streamCreateTaskDbSnapInfo(pMeta, path, pSnapInfoSet); if (code != 0) { + stError("failed to do task db snap info, reason:%s", tstrerror(code)); taosArrayDestroy(pSnapInfoSet); - return -1; + return code; } SArray* pDbSnapSet = taosArrayInit(8, sizeof(SBackendSnapFile2)); + if (pDbSnapSet == NULL) { + taosArrayDestroy(pSnapInfoSet); + code = TSDB_CODE_OUT_OF_MEMORY; + return code; + } for (int32_t i = 0; i < taosArrayGetSize(pSnapInfoSet); i++) { SStreamTaskSnap* pSnap = taosArrayGet(pSnapInfoSet, i); @@ -318,6 +399,10 @@ int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, void* pMeta pHandle->currIdx = 0; pHandle->pMeta = pMeta; return 0; + +_err: + streamSnapHandleDestroy(pHandle); + return code; } void streamSnapHandleDestroy(SStreamSnapHandle* handle) { @@ -348,9 +433,10 @@ int32_t streamSnapReaderOpen(void* pMeta, int64_t sver, int64_t chkpId, char* pa return TSDB_CODE_OUT_OF_MEMORY; } - if (streamSnapHandleInit(&pReader->handle, (char*)path, pMeta) < 0) { + int32_t code = streamSnapHandleInit(&pReader->handle, (char*)path, pMeta); + if (code != 0) { taosMemoryFree(pReader); - return -1; + return code; } *ppReader = pReader; @@ -410,10 +496,10 @@ _NEXT: int64_t nread = taosPReadFile(pSnapFile->fd, buf + sizeof(SStreamSnapBlockHdr), kBlockSize, pSnapFile->offset); if (nread == -1) { taosMemoryFree(buf); - code = TAOS_SYSTEM_ERROR(terrno); + code = TAOS_SYSTEM_ERROR(errno); stError("%s snap failed to read snap, file name:%s, type:%d,reason:%s", STREAM_STATE_TRANSFER, item->name, item->type, tstrerror(code)); - return -1; + return code; } else if (nread > 0 && nread <= kBlockSize) { // left bytes less than kBlockSize stDebug("%s read file %s, current offset:%" PRId64 ",size:% " PRId64 ", file no.%d", STREAM_STATE_TRANSFER, @@ -473,6 +559,7 @@ _NEXT: // SMetaSnapWriter ======================================== int32_t streamSnapWriterOpen(void* pMeta, int64_t sver, int64_t ever, char* path, SStreamSnapWriter** ppWriter) { // impl later + int32_t code = 0; SStreamSnapWriter* pWriter = taosMemoryCalloc(1, sizeof(SStreamSnapWriter)); if (pWriter == NULL) { return TSDB_CODE_OUT_OF_MEMORY; @@ -480,11 +567,27 @@ int32_t streamSnapWriterOpen(void* pMeta, int64_t sver, int64_t ever, char* path SStreamSnapHandle* pHandle = &pWriter->handle; pHandle->currIdx = 0; + pHandle->metaPath = taosStrdup(path); + if (pHandle->metaPath == NULL) { + taosMemoryFree(pWriter); + code = TSDB_CODE_OUT_OF_MEMORY; + return code; + } + pHandle->pDbSnapSet = taosArrayInit(8, sizeof(SBackendSnapFile2)); + if (pHandle->pDbSnapSet == NULL) { + streamSnapWriterClose(pWriter, 0); + code = TSDB_CODE_OUT_OF_MEMORY; + return code; + } SBackendSnapFile2 snapFile = {0}; - taosArrayPush(pHandle->pDbSnapSet, &snapFile); + if (taosArrayPush(pHandle->pDbSnapSet, &snapFile) == NULL) { + streamSnapWriterClose(pWriter, 0); + code = TSDB_CODE_OUT_OF_MEMORY; + return code; + } *ppWriter = pWriter; return 0; @@ -506,7 +609,7 @@ int32_t streamSnapWriteImpl(SStreamSnapWriter* pWriter, uint8_t* pData, uint32_t if (pSnapFile->fd == 0) { pSnapFile->fd = streamOpenFile(pSnapFile->path, pItem->name, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND); if (pSnapFile->fd == NULL) { - code = TAOS_SYSTEM_ERROR(terrno); + code = TAOS_SYSTEM_ERROR(errno); stError("%s failed to open file name:%s%s%s, reason:%s", STREAM_STATE_TRANSFER, pHandle->metaPath, TD_DIRSEP, pHdr->name, tstrerror(code)); } @@ -514,7 +617,7 @@ int32_t streamSnapWriteImpl(SStreamSnapWriter* pWriter, uint8_t* pData, uint32_t if (strlen(pHdr->name) == strlen(pItem->name) && strcmp(pHdr->name, pItem->name) == 0) { int64_t bytes = taosPWriteFile(pSnapFile->fd, pHdr->data, pHdr->size, pSnapFile->offset); if (bytes != pHdr->size) { - code = TAOS_SYSTEM_ERROR(terrno); + code = TAOS_SYSTEM_ERROR(errno); stError("%s failed to write snap, file name:%s, reason:%s", STREAM_STATE_TRANSFER, pHdr->name, tstrerror(code)); return code; } else { @@ -535,12 +638,16 @@ int32_t streamSnapWriteImpl(SStreamSnapWriter* pWriter, uint8_t* pData, uint32_t SBackendFileItem* pItem = taosArrayGet(pSnapFile->pFileList, pSnapFile->currFileIdx); pSnapFile->fd = streamOpenFile(pSnapFile->path, pItem->name, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND); if (pSnapFile->fd == NULL) { - code = TAOS_SYSTEM_ERROR(terrno); + code = TAOS_SYSTEM_ERROR(errno); stError("%s failed to open file name:%s%s%s, reason:%s", STREAM_STATE_TRANSFER, pSnapFile->path, TD_DIRSEP, pHdr->name, tstrerror(code)); } - taosPWriteFile(pSnapFile->fd, pHdr->data, pHdr->size, pSnapFile->offset); + if (taosPWriteFile(pSnapFile->fd, pHdr->data, pHdr->size, pSnapFile->offset) != pHdr->size) { + code = TAOS_SYSTEM_ERROR(errno); + stError("%s failed to write snap, file name:%s, reason:%s", STREAM_STATE_TRANSFER, pHdr->name, tstrerror(code)); + return code; + } stInfo("succ to write data %s", pItem->name); pSnapFile->offset += pHdr->size; } diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 1decfe198a..9bcad87264 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -268,13 +268,7 @@ void tFreeStreamTask(SStreamTask* pTask) { } streamTaskCleanupCheckInfo(&pTask->taskCheckInfo); - - if (pTask->pState) { - stDebug("s-task:0x%x start to free task state", taskId); - streamStateClose(pTask->pState, status1 == TASK_STATUS__DROPPING); - taskDbRemoveRef(pTask->pBackend); - pTask->pBackend = NULL; - } + streamFreeTaskState(pTask, status1); if (pTask->pNameMap) { tSimpleHashCleanup(pTask->pNameMap); @@ -311,6 +305,16 @@ void tFreeStreamTask(SStreamTask* pTask) { stDebug("s-task:0x%x free task completed", taskId); } +void streamFreeTaskState(SStreamTask* pTask, ETaskStatus status) { + if (pTask->pState != NULL) { + stDebug("s-task:0x%x start to free task state", pTask->id.taskId); + streamStateClose(pTask->pState, status == TASK_STATUS__DROPPING); + taskDbRemoveRef(pTask->pBackend); + pTask->pBackend = NULL; + pTask->pState = NULL; + } +} + static void setInitialVersionInfo(SStreamTask* pTask, int64_t ver) { SCheckpointInfo* pChkInfo = &pTask->chkInfo; SDataRange* pRange = &pTask->dataRange; @@ -848,6 +852,8 @@ STaskStatusEntry streamTaskGetStatusEntry(SStreamTask* pTask) { .checkpointInfo.latestTime = pTask->chkInfo.checkpointTime, .checkpointInfo.latestSize = 0, .checkpointInfo.remoteBackup = 0, + .checkpointInfo.consensusChkptId = 0, + .checkpointInfo.consensusTs = taosGetTimestampMs(), .hTaskId = pTask->hTaskInfo.id.taskId, .procsTotal = SIZE_IN_MiB(pExecInfo->inputDataSize), .outputTotal = SIZE_IN_MiB(pExecInfo->outputDataSize), diff --git a/source/libs/stream/src/streamTaskSm.c b/source/libs/stream/src/streamTaskSm.c index 85d3e0068a..f2bd99cdaf 100644 --- a/source/libs/stream/src/streamTaskSm.c +++ b/source/libs/stream/src/streamTaskSm.c @@ -79,6 +79,12 @@ static int32_t attachWaitedEvent(SStreamTask* pTask, SFutureHandleEventInfo* pEv return 0; } +static int32_t stopTaskSuccFn(SStreamTask* pTask) { + SStreamTaskSM* pSM = pTask->status.pSM; + streamFreeTaskState(pTask, pSM->current.state); + return TSDB_CODE_SUCCESS; +} + int32_t streamTaskInitStatus(SStreamTask* pTask) { pTask->execInfo.checkTs = taosGetTimestampMs(); stDebug("s-task:%s start init, and check downstream tasks, set the init ts:%" PRId64, pTask->id.idStr, @@ -634,21 +640,21 @@ void doInitStateTransferTable(void) { // resume is completed by restore status of state-machine // stop related event - trans = createStateTransform(TASK_STATUS__READY, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, NULL, NULL); + trans = createStateTransform(TASK_STATUS__READY, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, stopTaskSuccFn, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__DROPPING, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, NULL, NULL); + trans = createStateTransform(TASK_STATUS__DROPPING, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, stopTaskSuccFn, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, NULL, NULL); + trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, stopTaskSuccFn, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__STOP, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, NULL, NULL); + trans = createStateTransform(TASK_STATUS__STOP, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, stopTaskSuccFn, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__SCAN_HISTORY, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, NULL, NULL); + trans = createStateTransform(TASK_STATUS__SCAN_HISTORY, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, stopTaskSuccFn, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__HALT, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, NULL, NULL); + trans = createStateTransform(TASK_STATUS__HALT, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, stopTaskSuccFn, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__PAUSE, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, NULL, NULL); + trans = createStateTransform(TASK_STATUS__PAUSE, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, stopTaskSuccFn, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__CK, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, NULL, NULL); + trans = createStateTransform(TASK_STATUS__CK, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, stopTaskSuccFn, NULL); taosArrayPush(streamTaskSMTrans, &trans); // dropping related event diff --git a/source/libs/stream/test/backendTest.cpp b/source/libs/stream/test/backendTest.cpp index 2fb257fe4e..38d48a2a32 100644 --- a/source/libs/stream/test/backendTest.cpp +++ b/source/libs/stream/test/backendTest.cpp @@ -29,7 +29,7 @@ class BackendEnv : public ::testing::Test { void *backendCreate() { const char *streamPath = "/tmp"; - void * p = NULL; + void *p = NULL; // char *absPath = NULL; // // SBackendWrapper *p = (SBackendWrapper *)streamBackendInit(streamPath, -1, 2); @@ -52,7 +52,7 @@ SStreamState *stateCreate(const char *path) { } void *backendOpen() { streamMetaInit(); - const char * path = "/tmp/backend"; + const char *path = "/tmp/backend"; SStreamState *p = stateCreate(path); ASSERT(p != NULL); @@ -79,7 +79,7 @@ void *backendOpen() { const char *val = "value data"; int32_t len = 0; - char * newVal = NULL; + char *newVal = NULL; streamStateGet_rocksdb(p, &key, (void **)&newVal, &len); ASSERT(len == strlen(val)); } @@ -100,7 +100,7 @@ void *backendOpen() { const char *val = "value data"; int32_t len = 0; - char * newVal = NULL; + char *newVal = NULL; int32_t code = streamStateGet_rocksdb(p, &key, (void **)&newVal, &len); ASSERT(code != 0); } @@ -130,7 +130,7 @@ void *backendOpen() { winkey.groupId = 0; winkey.ts = tsArray[0]; - char * val = NULL; + char *val = NULL; int32_t len = 0; pCurr = streamStateSeekKeyNext_rocksdb(p, &winkey); @@ -157,7 +157,7 @@ void *backendOpen() { key.ts = tsArray[i]; key.exprIdx = i; - char * val = NULL; + char *val = NULL; int32_t len = 0; streamStateFuncGet_rocksdb(p, &key, (void **)&val, &len); ASSERT(len == strlen("Value")); @@ -168,7 +168,7 @@ void *backendOpen() { key.ts = tsArray[i]; key.exprIdx = i; - char * val = NULL; + char *val = NULL; int32_t len = 0; streamStateFuncDel_rocksdb(p, &key); } @@ -213,7 +213,7 @@ void *backendOpen() { { SSessionKey key; memset(&key, 0, sizeof(key)); - char * val = NULL; + char *val = NULL; int32_t vlen = 0; code = streamStateSessionGetKVByCur_rocksdb(pCurr, &key, (void **)&val, &vlen); ASSERT(code == 0); @@ -260,7 +260,7 @@ void *backendOpen() { SWinKey key = {0}; // {.groupId = (uint64_t)(i), .ts = tsArray[i]}; key.groupId = (uint64_t)(i); key.ts = tsArray[i]; - char * val = NULL; + char *val = NULL; int32_t vlen = 0; ASSERT(streamStateFillGet_rocksdb(p, &key, (void **)&val, &vlen) == 0); taosMemoryFreeClear(val); @@ -272,7 +272,7 @@ void *backendOpen() { SStreamStateCur *pCurr = streamStateFillGetCur_rocksdb(p, &key); ASSERT(pCurr != NULL); - char * val = NULL; + char *val = NULL; int32_t vlen = 0; ASSERT(0 == streamStateFillGetKVByCur_rocksdb(pCurr, &key, (const void **)&val, &vlen)); ASSERT(vlen == strlen("Value")); @@ -296,7 +296,7 @@ void *backendOpen() { SWinKey key = {0}; // {.groupId = (uint64_t)(i), .ts = tsArray[i]}; key.groupId = (uint64_t)(i); key.ts = tsArray[i]; - char * val = NULL; + char *val = NULL; int32_t vlen = 0; ASSERT(streamStateFillDel_rocksdb(p, &key) == 0); taosMemoryFreeClear(val); @@ -338,7 +338,7 @@ void *backendOpen() { char key[128] = {0}; sprintf(key, "tbname_%d", i); - char * val = NULL; + char *val = NULL; int32_t len = 0; code = streamDefaultGet_rocksdb(p, key, (void **)&val, &len); ASSERT(code == 0); @@ -354,7 +354,7 @@ TEST_F(BackendEnv, checkOpen) { SStreamState *p = (SStreamState *)backendOpen(); int64_t tsStart = taosGetTimestampMs(); { - void * pBatch = streamStateCreateBatch(); + void *pBatch = streamStateCreateBatch(); int32_t size = 0; for (int i = 0; i < size; i++) { char key[128] = {0}; @@ -368,7 +368,7 @@ TEST_F(BackendEnv, checkOpen) { streamStateDestroyBatch(pBatch); } { - void * pBatch = streamStateCreateBatch(); + void *pBatch = streamStateCreateBatch(); int32_t size = 0; char valBuf[256] = {0}; for (int i = 0; i < size; i++) { @@ -383,9 +383,9 @@ TEST_F(BackendEnv, checkOpen) { streamStateDestroyBatch(pBatch); } // do checkpoint 2 - taskDbDoCheckpoint(p->pTdbState->pOwner->pBackend, 2); + taskDbDoCheckpoint(p->pTdbState->pOwner->pBackend, 2, 0); { - void * pBatch = streamStateCreateBatch(); + void *pBatch = streamStateCreateBatch(); int32_t size = 0; char valBuf[256] = {0}; for (int i = 0; i < size; i++) { @@ -400,17 +400,17 @@ TEST_F(BackendEnv, checkOpen) { streamStateDestroyBatch(pBatch); } - taskDbDoCheckpoint(p->pTdbState->pOwner->pBackend, 3); + taskDbDoCheckpoint(p->pTdbState->pOwner->pBackend, 3, 0); const char *path = "/tmp/backend/stream"; const char *dump = "/tmp/backend/stream/dump"; // taosMkDir(dump); taosMulMkDir(dump); SBkdMgt *mgt = bkdMgtCreate((char *)path); - SArray * result = taosArrayInit(4, sizeof(void *)); + SArray *result = taosArrayInit(4, sizeof(void *)); bkdMgtGetDelta(mgt, p->pTdbState->idstr, 3, result, (char *)dump); - taskDbDoCheckpoint(p->pTdbState->pOwner->pBackend, 4); + taskDbDoCheckpoint(p->pTdbState->pOwner->pBackend, 4, 0); taosArrayClear(result); bkdMgtGetDelta(mgt, p->pTdbState->idstr, 4, result, (char *)dump); diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index ffcb1fbdb5..9818394a2a 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -832,6 +832,9 @@ static int32_t allocConnRef(SCliConn* conn, bool update) { taosInitRWLatch(&exh->latch); exh->refId = transAddExHandle(transGetRefMgt(), exh); + SExHandle* self = transAcquireExHandle(transGetRefMgt(), exh->refId); + ASSERT(exh == self); + QUEUE_INIT(&exh->q); taosInitRWLatch(&exh->latch); @@ -2829,10 +2832,11 @@ int transSetDefaultAddr(void* shandle, const char* ip, const char* fqdn) { int64_t transAllocHandle() { SExHandle* exh = taosMemoryCalloc(1, sizeof(SExHandle)); - QUEUE_INIT(&exh->q); - taosInitRWLatch(&exh->latch); exh->refId = transAddExHandle(transGetRefMgt(), exh); + SExHandle* self = transAcquireExHandle(transGetRefMgt(), exh->refId); + ASSERT(exh == self); + QUEUE_INIT(&exh->q); taosInitRWLatch(&exh->latch); tDebug("pre alloc refId %" PRId64 "", exh->refId); diff --git a/source/util/src/terror.c b/source/util/src/terror.c index e9fa58e6e3..525e87ff2f 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -22,10 +22,12 @@ #define TAOS_ERROR_C static threadlocal int32_t tsErrno; +static threadlocal int32_t tsErrln; static threadlocal char tsErrMsgDetail[ERR_MSG_LEN] = {0}; static threadlocal char tsErrMsgReturn[ERR_MSG_LEN] = {0}; int32_t* taosGetErrno() { return &tsErrno; } +int32_t* taosGetErrln() { return &tsErrln; } char* taosGetErrMsg() { return tsErrMsgDetail; } char* taosGetErrMsgReturn() { return tsErrMsgReturn; } @@ -96,6 +98,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_NOT_FOUND, "Not found") TAOS_DEFINE_ERROR(TSDB_CODE_NO_DISKSPACE, "Out of disk space") TAOS_DEFINE_ERROR(TSDB_CODE_TIMEOUT_ERROR, "Operation timeout") TAOS_DEFINE_ERROR(TSDB_CODE_NO_ENOUGH_DISKSPACE, "No enough disk space") +TAOS_DEFINE_ERROR(TSDB_CODE_THIRDPARTY_ERROR, "third party error, please check the log") TAOS_DEFINE_ERROR(TSDB_CODE_APP_IS_STARTING, "Database is starting up") TAOS_DEFINE_ERROR(TSDB_CODE_APP_IS_STOPPING, "Database is closing down") @@ -104,6 +107,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_INVALID_CFG_VALUE, "Invalid configuration TAOS_DEFINE_ERROR(TSDB_CODE_IP_NOT_IN_WHITE_LIST, "Not allowed to connect") TAOS_DEFINE_ERROR(TSDB_CODE_FAILED_TO_CONNECT_S3, "Failed to connect to s3 server") TAOS_DEFINE_ERROR(TSDB_CODE_MSG_PREPROCESSED, "Message has been processed in preprocess") +TAOS_DEFINE_ERROR(TSDB_CODE_OUT_OF_BUFFER, "Out of buffer") //client TAOS_DEFINE_ERROR(TSDB_CODE_TSC_INVALID_OPERATION, "Invalid operation") diff --git a/source/util/src/tqueue.c b/source/util/src/tqueue.c index 7a4eb09b99..45a8a462fb 100644 --- a/source/util/src/tqueue.c +++ b/source/util/src/tqueue.c @@ -162,7 +162,7 @@ void *taosAllocateQitem(int32_t size, EQItype itype, int64_t dataSize) { int64_t alloced = atomic_add_fetch_64(&tsRpcQueueMemoryUsed, size + dataSize); if (alloced > tsRpcQueueMemoryAllowed) { uError("failed to alloc qitem, size:%" PRId64 " alloc:%" PRId64 " allowed:%" PRId64, size + dataSize, alloced, - tsRpcQueueMemoryUsed); + tsRpcQueueMemoryAllowed); atomic_sub_fetch_64(&tsRpcQueueMemoryUsed, size + dataSize); taosMemoryFree(pNode); terrno = TSDB_CODE_OUT_OF_RPC_MEMORY_QUEUE; @@ -494,6 +494,8 @@ int32_t taosReadAllQitemsFromQset(STaosQset *qset, STaosQall *qall, SQueueInfo * qall->start = queue->head; qall->numOfItems = queue->numOfItems; qall->memOfItems = queue->memOfItems; + qall->unAccessedNumOfItems = queue->numOfItems; + qall->unAccessMemOfItems = queue->memOfItems; code = qall->numOfItems; qinfo->ahandle = queue->ahandle; diff --git a/tests/army/query/queryBugs.py b/tests/army/query/queryBugs.py index ca28ff549c..7583382290 100644 --- a/tests/army/query/queryBugs.py +++ b/tests/army/query/queryBugs.py @@ -28,8 +28,7 @@ from frame import * class TDTestCase(TBase): - - # fix + # fix def FIX_TD_30686(self): tdLog.info("check bug TD_30686 ...\n") sqls = [ @@ -49,6 +48,32 @@ class TDTestCase(TBase): ] tdSql.checkDataMem(sql, results) + def FIX_TS_5105(self): + tdLog.info("check bug TS_5105 ...\n") + ts1 = "2024-07-03 10:00:00.000" + ts2 = "2024-07-03 13:00:00.000" + sqls = [ + "drop database if exists ts_5105", + "create database ts_5105 cachemodel 'both';", + "use ts_5105;", + "CREATE STABLE meters (ts timestamp, current float) TAGS (location binary(64), groupId int);", + "CREATE TABLE d1001 USING meters TAGS ('California.B', 2);", + "CREATE TABLE d1002 USING meters TAGS ('California.S', 3);", + f"INSERT INTO d1001 VALUES ('{ts1}', 10);", + f"INSERT INTO d1002 VALUES ('{ts2}', 13);", + ] + tdSql.executes(sqls) + + sql = "select last(ts), last_row(ts) from meters;" + + # 执行多次,有些时候last_row(ts)会返回错误的值,详见TS-5105 + for i in range(1, 10): + tdLog.debug(f"{i}th execute sql: {sql}") + tdSql.query(sql) + tdSql.checkRows(1) + tdSql.checkData(0, 0, ts2) + tdSql.checkData(0, 1, ts2) + # run def run(self): tdLog.debug(f"start to excute {__file__}") @@ -57,11 +82,10 @@ class TDTestCase(TBase): self.FIX_TD_30686() # TS BUGS - + self.FIX_TS_5105() tdLog.success(f"{__file__} successfully executed") - tdCases.addLinux(__file__, TDTestCase()) tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/army/query/query_last_row_repeatly.py b/tests/army/query/query_last_row_repeatly.py deleted file mode 100644 index 3cca032176..0000000000 --- a/tests/army/query/query_last_row_repeatly.py +++ /dev/null @@ -1,44 +0,0 @@ -# -*- coding: utf-8 -*- - -from frame.log import * -from frame.cases import * -from frame.sql import * -from frame.caseBase import * -from frame import * - - -class TDTestCase(TBase): - def init(self, conn, logSql, replicaVar=1): - self.replicaVar = int(replicaVar) - tdLog.debug("start to execute %s" % __file__) - tdSql.init(conn.cursor(), logSql) - - def run(self): - sqls = [ - "drop database if exists ts_5101", - "create database ts_5101 cachemodel 'both';", - "use ts_5101;", - "CREATE STABLE meters (ts timestamp, current float) TAGS (location binary(64), groupId int);", - "CREATE TABLE d1001 USING meters TAGS ('California.B', 2);", - "CREATE TABLE d1002 USING meters TAGS ('California.S', 3);", - "INSERT INTO d1001 VALUES ('2024-07-03 10:00:00.000', 10);", - "INSERT INTO d1002 VALUES ('2024-07-03 13:00:00.000', 13);", - ] - tdSql.executes(sqls) - - # 执行多次,有些时候last_row(ts)会返回错误的值,详见TS-5105 - for i in range(1, 10): - sql = "select last(ts), last_row(ts) from meters;" - tdLog.debug(f"{i}th execute sql: {sql}") - tdSql.query(sql) - tdSql.checkRows(1) - tdSql.checkData(0, 0, "2024-07-03 13:00:00.000") - tdSql.checkData(0, 1, "2024-07-03 13:00:00.000") - - def stop(self): - tdSql.close() - tdLog.success("%s successfully executed" % __file__) - - -tdCases.addWindows(__file__, TDTestCase()) -tdCases.addLinux(__file__, TDTestCase()) diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index aff5bedaf8..5667255f9f 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -20,7 +20,6 @@ ,,y,army,./pytest.sh python3 ./test.py -f insert/test_column_tag_boundary.py ,,y,army,./pytest.sh python3 ./test.py -f query/fill/fill_desc.py -N 3 -L 3 -D 2 ,,y,army,./pytest.sh python3 ./test.py -f query/fill/fill_null.py -,,y,army,./pytest.sh python3 ./test.py -f query/query_last_row_repeatly.py ,,y,army,./pytest.sh python3 ./test.py -f cluster/incSnapshot.py -N 3 ,,y,army,./pytest.sh python3 ./test.py -f query/query_basic.py -N 3 ,,y,army,./pytest.sh python3 ./test.py -f query/accuracy/test_query_accuracy.py diff --git a/tests/system-test/1-insert/stmt_error.py b/tests/system-test/1-insert/stmt_error.py index c6d747c317..0bfbedb9a1 100644 --- a/tests/system-test/1-insert/stmt_error.py +++ b/tests/system-test/1-insert/stmt_error.py @@ -24,7 +24,7 @@ class TDTestCase: case1 : [TD-11899] : this is an test case for check stmt error use . ''' return - + def init(self, conn, logSql, replicaVar=1): self.replicaVar = int(replicaVar) tdLog.debug("start to execute %s" % __file__) @@ -49,7 +49,7 @@ class TDTestCase: ff float, dd double, bb binary(65059), nn nchar(100), tt timestamp)", ) conn.load_table_info("log") - + stmt = conn.statement("insert into log values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)") params = new_bind_params(16) @@ -123,7 +123,7 @@ class TDTestCase: ff float, dd double, bb binary(100), nn nchar(100), tt timestamp , error_data int )", ) conn.load_table_info("log") - + stmt = conn.statement("insert into log values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,1000)") params = new_bind_params(16) @@ -195,20 +195,74 @@ class TDTestCase: except Exception as err: conn.close() raise err - + + def test_stmt_nornmal_value_error(self, conn): + # type: (TaosConnection) -> None + dbname = "pytest_taos_stmt_error" + try: + conn.execute("drop database if exists %s" % dbname) + conn.execute("create database if not exists %s" % dbname) + conn.select_db(dbname) + + conn.execute( + "create table if not exists log(ts timestamp, bo bool, nil tinyint, ti tinyint, si smallint, ii int,\ + bi bigint, tu tinyint unsigned, su smallint unsigned, iu int unsigned, bu bigint unsigned, \ + ff float, dd double, bb binary(100), nn nchar(100), tt timestamp , error_data int )", + ) + conn.load_table_info("log") + + + stmt = conn.statement("insert into log values(NOW(),?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)") + params = new_bind_params(16) + params[0].timestamp(1626861392589, PrecisionEnum.Milliseconds) + params[1].bool(True) + params[2].tinyint(None) + params[3].tinyint(2) + params[4].smallint(3) + params[5].int(4) + params[6].bigint(5) + params[7].tinyint_unsigned(6) + params[8].smallint_unsigned(7) + params[9].int_unsigned(8) + params[10].bigint_unsigned(9) + params[11].float(10.1) + params[12].double(10.11) + params[13].binary("hello") + params[14].nchar("stmt") + params[15].timestamp(1626861392589, PrecisionEnum.Milliseconds) + + stmt.bind_param(params) + stmt.execute() + + conn.close() + + except Exception as err: + conn.execute("drop database if exists %s" % dbname) + conn.close() + raise err + def run(self): - + self.test_stmt_insert(self.conn()) try: self.test_stmt_insert_error(self.conn()) except Exception as error : - - if str(error)=='[0x0200]: no mix usage for ? and values': + + if str(error)=='[0x0200]: stmt bind param does not support normal value in sql': tdLog.info('=========stmt error occured for bind part column ==============') else: tdLog.exit("expect error(%s) not occured" % str(error)) - try: + try: + self.test_stmt_nornmal_value_error(self.conn()) + except Exception as error : + + if str(error)=='[0x0200]: stmt bind param does not support normal value in sql': + tdLog.info('=========stmt error occured for bind part column ==============') + else: + tdLog.exit("expect error(%s) not occured" % str(error)) + + try: self.test_stmt_insert_error_null_timestamp(self.conn()) tdLog.exit("expect error not occured - 1") except Exception as error : diff --git a/tests/system-test/7-tmq/tmqParamsTest.py b/tests/system-test/7-tmq/tmqParamsTest.py index b25f23ef11..a323dff19e 100644 --- a/tests/system-test/7-tmq/tmqParamsTest.py +++ b/tests/system-test/7-tmq/tmqParamsTest.py @@ -134,6 +134,8 @@ class TDTestCase: if offset_value != "earliest" and offset_value != "": if offset_value == "latest": offset_value_list = list(map(lambda x: (x[-2].replace("wal:", "").replace("earliest", "0").replace("latest", "0").replace(offset_value, "0")), subscription_info)) + if None in offset_value_list: + continue offset_value_list1 = list(map(lambda x: int(x.split("/")[0]), offset_value_list)) offset_value_list2 = list(map(lambda x: int(x.split("/")[1]), offset_value_list)) tdSql.checkEqual(offset_value_list1 == offset_value_list2, True) @@ -142,6 +144,8 @@ class TDTestCase: tdSql.checkEqual(sum(rows_value_list), expected_res) elif offset_value == "none": offset_value_list = list(map(lambda x: x[-2], subscription_info)) + if None in offset_value_list: + continue offset_value_list1 = list(map(lambda x: (x.split("/")[0]), offset_value_list)) tdSql.checkEqual(offset_value_list1, ['none']*len(subscription_info)) rows_value_list = list(map(lambda x: x[-1], subscription_info)) @@ -155,6 +159,8 @@ class TDTestCase: # tdSql.checkEqual(sum(rows_value_list), expected_res) else: offset_value_list = list(map(lambda x: x[-2], subscription_info)) + if None in offset_value_list: + continue offset_value_list1 = list(map(lambda x: (x.split("/")[0]), offset_value_list)) tdSql.checkEqual(offset_value_list1, [None]*len(subscription_info)) rows_value_list = list(map(lambda x: x[-1], subscription_info)) @@ -162,6 +168,8 @@ class TDTestCase: else: if offset_value != "none": offset_value_list = list(map(lambda x: (x[-2].replace("wal:", "").replace("earliest", "0").replace("latest", "0").replace(offset_value, "0")), subscription_info)) + if None in offset_value_list: + continue offset_value_list1 = list(map(lambda x: int(x.split("/")[0]), offset_value_list)) offset_value_list2 = list(map(lambda x: int(x.split("/")[1]), offset_value_list)) tdSql.checkEqual(offset_value_list1 <= offset_value_list2, True) @@ -170,6 +178,8 @@ class TDTestCase: tdSql.checkEqual(sum(rows_value_list), expected_res) else: offset_value_list = list(map(lambda x: x[-2], subscription_info)) + if None in offset_value_list: + continue offset_value_list1 = list(map(lambda x: (x.split("/")[0]), offset_value_list)) tdSql.checkEqual(offset_value_list1, ['none']*len(subscription_info)) rows_value_list = list(map(lambda x: x[-1], subscription_info)) diff --git a/tests/taosc_test/taoscTest.cpp b/tests/taosc_test/taoscTest.cpp index 3f49b11b70..1b051f555e 100644 --- a/tests/taosc_test/taoscTest.cpp +++ b/tests/taosc_test/taoscTest.cpp @@ -30,7 +30,7 @@ #include "taos.h" class taoscTest : public ::testing::Test { - protected: +protected: static void SetUpTestCase() { printf("start test setup.\n"); TAOS* taos = taos_connect("localhost", "root", "taosdata", NULL, 0); @@ -188,7 +188,7 @@ TEST_F(taoscTest, taos_query_test) { void queryCallback2(void* param, void* res, int32_t code) { ASSERT_TRUE(code == 0); ASSERT_TRUE(param == pUserParam); - // After using taos_query_a to query, using taos_fetch_row in the callback will cause blocking. + // After using taos_query_a to query, using taos_fetch_row in the callback will cause blocking. // Reason: schProcessOnCbBegin SCH_LOCK_TASK(pTask) TAOS_ROW row; row = taos_fetch_row(res); @@ -254,7 +254,7 @@ TEST_F(taoscTest, taos_query_a_fetch_row) { printf("taos_query_a_fetch_row taos_fetch_row start...\n"); while ((row = taos_fetch_row(*pres))) { - getRecordCounts++; + getRecordCounts++; } printf("taos_query_a_fetch_row taos_fetch_row end. %p record count:%d.\n", *pres, getRecordCounts); taos_free_result(*pres); @@ -264,4 +264,3 @@ TEST_F(taoscTest, taos_query_a_fetch_row) { printf("taos_query_a_fetch_row test finished.\n"); } -