chore: merge 3.0

This commit is contained in:
kailixu 2024-01-11 20:05:23 +08:00
commit ddefc5f46a
289 changed files with 6467 additions and 7115 deletions

4
.gitignore vendored
View File

@ -1,4 +1,4 @@
build/
*build/
compile_commands.json
CMakeSettings.json
.cache
@ -132,3 +132,5 @@ tools/taos-tools
tools/taosws-rs
tags
.clangd
*CMakeCache*
*CMakeFiles*

View File

@ -1,6 +1,7 @@
# curl
ExternalProject_Add(curl2
URL https://curl.se/download/curl-8.2.1.tar.gz
URL https://github.com/curl/curl/releases/download/curl-8_2_1/curl-8.2.1.tar.gz
#URL https://curl.se/download/curl-8.2.1.tar.gz
URL_HASH MD5=b25588a43556068be05e1624e0e74d41
DOWNLOAD_NO_PROGRESS 1
DOWNLOAD_DIR "${TD_CONTRIB_DIR}/deps-download"

View File

@ -157,7 +157,7 @@ INSERT INTO d21001 USING meters TAGS ('California.SanFrancisco', 2) FILE '/tmp/c
Automatically creating table and the table name is specified through the `tbname` column
```sql
INSERT INTO meters(tbname, location, groupId, ts, current, phase)
INSERT INTO meters(tbname, location, groupId, ts, current, voltage, phase)
values('d31001', 'California.SanFrancisco', 2, '2021-07-13 14:06:34.630', 10.2, 219, 0.32)
('d31001', 'California.SanFrancisco', 2, '2021-07-13 14:06:35.779', 10.15, 217, 0.33)
('d31002', NULL, 2, '2021-07-13 14:06:34.255', 10.15, 217, 0.33)

View File

@ -157,7 +157,7 @@ INSERT INTO d21001 USING meters TAGS ('California.SanFrancisco', 2) FILE '/tmp/c
自动建表, 表名通过tbname列指定
```sql
INSERT INTO meters(tbname, location, groupId, ts, current, phase)
INSERT INTO meters(tbname, location, groupId, ts, current, voltage, phase)
values('d31001', 'California.SanFrancisco', 2, '2021-07-13 14:06:34.630', 10.2, 219, 0.32)
('d31001', 'California.SanFrancisco', 2, '2021-07-13 14:06:35.779', 10.15, 217, 0.33)
('d31002', NULL, 2, '2021-07-13 14:06:34.255', 10.15, 217, 0.33)

View File

@ -228,8 +228,8 @@ int32_t taosCfgDynamicOptions(SConfig *pCfg, char *name, bool forServer);
struct SConfig *taosGetCfg();
void taosSetAllDebugFlag(int32_t flag, bool rewrite);
void taosSetDebugFlag(int32_t *pFlagPtr, const char *flagName, int32_t flagVal, bool rewrite);
void taosSetAllDebugFlag(int32_t flag);
void taosSetDebugFlag(int32_t *pFlagPtr, const char *flagName, int32_t flagVal);
void taosLocalCfgForbiddenToChange(char *name, bool *forbidden);
int8_t taosGranted();

View File

@ -2557,12 +2557,6 @@ _err:
return NULL;
}
// this message is sent from mnode to mnode(read thread to write thread),
// so there is no need for serialization or deserialization
typedef struct {
SHashObj* rebSubHash; // SHashObj<key, SMqRebSubscribe>
} SMqDoRebalanceMsg;
typedef struct {
int64_t streamId;
int64_t checkpointId;

View File

@ -105,7 +105,7 @@ int32_t taosTm2Ts(struct STm* tm, int64_t* ts, int32_t precision);
/// formats array; If not NULL, [formats] will be used instead of [format] to skip parse formats again.
/// @param out output buffer, should be initialized by memset
/// @notes remember to free the generated formats
void taosTs2Char(const char* format, SArray** formats, int64_t ts, int32_t precision, char* out, int32_t outLen);
int32_t taosTs2Char(const char* format, SArray** formats, int64_t ts, int32_t precision, char* out, int32_t outLen);
/// @brief convert a formatted timestamp string to a timestamp
/// @param format must null terminated
/// @param [in, out] formats, see taosTs2Char

View File

@ -30,7 +30,10 @@ int32_t tqStreamTaskProcessCheckpointReadyMsg(SStreamMeta* pMeta, SRpcMsg* pMsg)
int32_t tqStreamTaskProcessDeployReq(SStreamMeta* pMeta, int64_t sversion, char* msg, int32_t msgLen, bool isLeader, bool restored);
int32_t tqStreamTaskProcessDropReq(SStreamMeta* pMeta, char* msg, int32_t msgLen);
int32_t tqStreamTaskProcessRunReq(SStreamMeta* pMeta, SRpcMsg* pMsg, bool isLeader);
int32_t startStreamTasks(SStreamMeta* pMeta);
int32_t resetStreamTaskStatus(SStreamMeta* pMeta);
int32_t tqStreamTaskResetStatus(SStreamMeta* pMeta);
int32_t tqStartTaskCompleteCallback(SStreamMeta* pMeta);
int32_t tqStreamTaskProcessTaskResetReq(SStreamMeta* pMeta, SRpcMsg* pMsg);
int32_t tqStreamTaskProcessTaskPauseReq(SStreamMeta* pMeta, char* pMsg);
int32_t tqStreamTaskProcessTaskResumeReq(void* handle, int64_t sversion, char* pMsg, bool fromVnode);
#endif // TDENGINE_TQ_COMMON_H

View File

@ -119,6 +119,7 @@ typedef struct SScanLogicNode {
bool groupOrderScan;
bool onlyMetaCtbIdx; // for tag scan with no tbname
bool filesetDelimited; // returned blocks delimited by fileset
bool isCountByTag; // true if selectstmt hasCountFunc & part by tag/tbname
} SScanLogicNode;
typedef struct SJoinLogicNode {
@ -434,6 +435,7 @@ typedef struct STableScanPhysiNode {
bool assignBlockUid;
int8_t igCheckUpdate;
bool filesetDelimited;
bool needCountEmptyTable;
} STableScanPhysiNode;
typedef STableScanPhysiNode STableSeqScanPhysiNode;

View File

@ -363,6 +363,7 @@ typedef struct SSelectStmt {
bool hasLastRowFunc;
bool hasLastFunc;
bool hasTimeLineFunc;
bool hasCountFunc;
bool hasUdaf;
bool hasStateKey;
bool onlyHasKeepOrderFunc;

View File

@ -53,6 +53,7 @@ extern "C" {
#define STREAM_EXEC_EXTRACT_DATA_IN_WAL_ID (-1)
#define STREAM_EXEC_START_ALL_TASKS_ID (-2)
#define STREAM_EXEC_RESTART_ALL_TASKS_ID (-3)
#define STREAM_EXEC_STOP_ALL_TASKS_ID (-4)
typedef struct SStreamTask SStreamTask;
typedef struct SStreamQueue SStreamQueue;
@ -80,7 +81,7 @@ typedef enum ETaskStatus {
TASK_STATUS__HALT, // pause, but not be manipulated by user command
TASK_STATUS__PAUSE, // pause
TASK_STATUS__CK, // stream task is in checkpoint status, no data are allowed to put into inputQ anymore
TASK_STATUS__STREAM_SCAN_HISTORY,
// TASK_STATUS__STREAM_SCAN_HISTORY,
} ETaskStatus;
enum {
@ -137,15 +138,14 @@ enum {
typedef enum EStreamTaskEvent {
TASK_EVENT_INIT = 0x1,
TASK_EVENT_INIT_SCANHIST = 0x2,
TASK_EVENT_INIT_STREAM_SCANHIST = 0x3,
TASK_EVENT_SCANHIST_DONE = 0x4,
TASK_EVENT_STOP = 0x5,
TASK_EVENT_GEN_CHECKPOINT = 0x6,
TASK_EVENT_CHECKPOINT_DONE = 0x7,
TASK_EVENT_PAUSE = 0x8,
TASK_EVENT_RESUME = 0x9,
TASK_EVENT_HALT = 0xA,
TASK_EVENT_DROPPING = 0xB,
TASK_EVENT_SCANHIST_DONE = 0x3,
TASK_EVENT_STOP = 0x4,
TASK_EVENT_GEN_CHECKPOINT = 0x5,
TASK_EVENT_CHECKPOINT_DONE = 0x6,
TASK_EVENT_PAUSE = 0x7,
TASK_EVENT_RESUME = 0x8,
TASK_EVENT_HALT = 0x9,
TASK_EVENT_DROPPING = 0xA,
} EStreamTaskEvent;
typedef struct {
@ -314,6 +314,7 @@ typedef struct SCheckpointInfo {
int32_t checkpointNotReadyTasks;
bool dispatchCheckpointTrigger;
int64_t msgVer;
int32_t transId;
} SCheckpointInfo;
typedef struct SStreamStatus {
@ -324,6 +325,7 @@ typedef struct SStreamStatus {
int8_t keepTaskStatus;
bool appendTranstateBlock; // has append the transfer state data block already, todo: remove it
int32_t timerActive; // timer is active
int8_t allowedAddInTimer; // allowed to add into timer
int32_t inScanHistorySentinel;
} SStreamStatus;
@ -460,6 +462,8 @@ struct SStreamTask {
char reserve[256];
};
typedef int32_t (*startComplete_fn_t)(struct SStreamMeta*);
typedef struct STaskStartInfo {
int64_t startTs;
int64_t readyTs;
@ -468,6 +472,8 @@ typedef struct STaskStartInfo {
SHashObj* pReadyTaskSet; // tasks that are all ready for running stream processing
SHashObj* pFailedTaskSet; // tasks that are done the check downstream process, may be successful or failed
int64_t elapsedTime;
int32_t restartCount; // restart task counter
startComplete_fn_t completeFn; // complete callback function
} STaskStartInfo;
typedef struct STaskUpdateInfo {
@ -489,8 +495,9 @@ typedef struct SStreamMeta {
int32_t vgId;
int64_t stage;
int32_t role;
bool sendMsgBeforeClosing; // send hb to mnode before close all tasks when switch to follower.
STaskStartInfo startInfo;
SRWLatch lock;
TdThreadRwlock lock;
int32_t walScanCounter;
void* streamBackend;
int64_t streamBackendRid;
@ -629,6 +636,7 @@ typedef struct {
int32_t nodeId;
SEpSet mgmtEps;
int32_t mnodeId;
int32_t transId;
int64_t expireTime;
} SStreamCheckpointSourceReq;
@ -671,8 +679,8 @@ typedef struct STaskStatusEntry {
int64_t verStart; // start version in WAL, only valid for source task
int64_t verEnd; // end version in WAL, only valid for source task
int64_t processedVer; // only valid for source task
int32_t relatedHTask; // has related fill-history task
int64_t activeCheckpointId; // current active checkpoint id
int32_t chkpointTransId; // checkpoint trans id
bool checkpointFailed; // denote if the checkpoint is failed or not
bool inputQChanging; // inputQ is changing or not
int64_t inputQUnchangeCounter;
@ -784,6 +792,7 @@ int8_t streamTaskSetSchedStatusInactive(SStreamTask* pTask);
int32_t streamTaskClearHTaskAttr(SStreamTask* pTask);
int32_t streamTaskHandleEvent(SStreamTaskSM* pSM, EStreamTaskEvent event);
int32_t streamTaskHandleEventAsync(SStreamTaskSM* pSM, EStreamTaskEvent event, void* pFn);
int32_t streamTaskOnHandleEventSuccess(SStreamTaskSM* pSM, EStreamTaskEvent event);
void streamTaskRestoreStatus(SStreamTask* pTask);
@ -811,6 +820,7 @@ int32_t streamTaskReleaseState(SStreamTask* pTask);
int32_t streamTaskReloadState(SStreamTask* pTask);
void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId);
void streamTaskOpenAllUpstreamInput(SStreamTask* pTask);
int32_t streamTaskSetDb(SStreamMeta* pMeta, void* pTask, char* key);
void streamTaskStatusInit(STaskStatusEntry* pEntry, const SStreamTask* pTask);
void streamTaskStatusCopy(STaskStatusEntry* pDst, const STaskStatusEntry* pSrc);
@ -828,22 +838,21 @@ int32_t streamProcessScanHistoryFinishRsp(SStreamTask* pTask);
// stream task meta
void streamMetaInit();
void streamMetaCleanup();
SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId, int64_t stage);
SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId, int64_t stage, startComplete_fn_t fn);
void streamMetaClose(SStreamMeta* streamMeta);
int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask); // save to stream meta store
int32_t streamMetaRemoveTask(SStreamMeta* pMeta, STaskId* pKey);
int32_t streamMetaRegisterTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask, bool* pAdded);
int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId);
int32_t streamMetaGetNumOfTasks(SStreamMeta* pMeta);
SStreamTask* streamMetaAcquireTaskNoLock(SStreamMeta* pMeta, int64_t streamId, int32_t taskId);
SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId);
void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask);
void streamMetaClear(SStreamMeta* pMeta);
void streamMetaInitBackend(SStreamMeta* pMeta);
int32_t streamMetaCommit(SStreamMeta* pMeta);
int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta);
int64_t streamMetaGetLatestCheckpointId(SStreamMeta* pMeta);
void streamMetaNotifyClose(SStreamMeta* pMeta);
int32_t streamTaskSetDb(SStreamMeta* pMeta, void* pTask, char* key);
void streamMetaStartHb(SStreamMeta* pMeta);
bool streamMetaTaskInTimer(SStreamMeta* pMeta);
int32_t streamMetaUpdateTaskDownstreamStatus(SStreamMeta* pMeta, int64_t streamId, int32_t taskId, int64_t startTs,
@ -853,6 +862,11 @@ void streamMetaRUnLock(SStreamMeta* pMeta);
void streamMetaWLock(SStreamMeta* pMeta);
void streamMetaWUnLock(SStreamMeta* pMeta);
void streamMetaResetStartInfo(STaskStartInfo* pMeta);
SArray* streamMetaSendMsgBeforeCloseTasks(SStreamMeta* pMeta);
void streamMetaUpdateStageRole(SStreamMeta* pMeta, int64_t stage, bool isLeader);
int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta);
int32_t streamMetaStartAllTasks(SStreamMeta* pMeta);
int32_t streamMetaStopAllTasks(SStreamMeta* pMeta);
// checkpoint
int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSourceReq* pReq);

View File

@ -46,8 +46,8 @@ extern "C" {
#define SYNC_HEARTBEAT_SLOW_MS 1500
#define SYNC_HEARTBEAT_REPLY_SLOW_MS 1500
#define SYNC_SNAP_RESEND_MS 1000 * 300
#define SYNC_SNAP_TIMEOUT_MS 1000 * 1800
#define SYNC_SNAP_RESEND_MS 1000 * 60
#define SYNC_SNAP_TIMEOUT_MS 1000 * 300
#define SYNC_VND_COMMIT_MIN_MS 3000

View File

@ -163,7 +163,8 @@ int rpcReleaseHandle(void *handle, int8_t type); // just release conn to rpc in
// These functions will not be called in the child process
int rpcSendRequestWithCtx(void *thandle, const SEpSet *pEpSet, SRpcMsg *pMsg, int64_t *rid, SRpcCtx *ctx);
int rpcSendRecv(void *shandle, SEpSet *pEpSet, SRpcMsg *pReq, SRpcMsg *pRsp);
int rpcSendRecvWithTimeout(void *shandle, SEpSet *pEpSet, SRpcMsg *pMsg, SRpcMsg *pRsp, int32_t timeoutMs);
int rpcSendRecvWithTimeout(void *shandle, SEpSet *pEpSet, SRpcMsg *pMsg, SRpcMsg *pRsp, int8_t *epUpdated,
int32_t timeoutMs);
int rpcSetDefaultAddr(void *thandle, const char *ip, const char *fqdn);
void *rpcAllocHandle();
void rpcSetIpWhite(void *thandl, void *arg);
@ -171,6 +172,7 @@ void rpcSetIpWhite(void *thandl, void *arg);
int32_t rpcUtilSIpRangeToStr(SIpV4Range *pRange, char *buf);
int32_t rpcUtilSWhiteListToStr(SIpWhiteList *pWhiteList, char **ppBuf);
int32_t rpcCvtErrCode(int32_t code);
#ifdef __cplusplus
}

View File

@ -68,6 +68,7 @@ int32_t* taosGetErrno();
#define TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED TAOS_DEF_ERROR_CODE(0, 0x0020) // "Vgroup could not be connected"
#define TSDB_CODE_RPC_SOMENODE_BROKEN_LINK TAOS_DEF_ERROR_CODE(0, 0x0021) //
#define TSDB_CODE_RPC_MAX_SESSIONS TAOS_DEF_ERROR_CODE(0, 0x0022) //
#define TSDB_CODE_RPC_NETWORK_ERROR TAOS_DEF_ERROR_CODE(0, 0x0023)
@ -355,6 +356,7 @@ int32_t* taosGetErrno();
#define TSDB_CODE_MND_TRANS_NETWORK_UNAVAILL TAOS_DEF_ERROR_CODE(0, 0x03D5)
#define TSDB_CODE_MND_LAST_TRANS_NOT_FINISHED TAOS_DEF_ERROR_CODE(0, 0x03D6) //internal
#define TSDB_CODE_MND_TRANS_SYNC_TIMEOUT TAOS_DEF_ERROR_CODE(0, 0x03D7)
#define TSDB_CODE_MND_TRANS_CTX_SWITCH TAOS_DEF_ERROR_CODE(0, 0x03D8)
#define TSDB_CODE_MND_TRANS_UNKNOW_ERROR TAOS_DEF_ERROR_CODE(0, 0x03DF)
// mnode-mq
@ -763,6 +765,7 @@ int32_t* taosGetErrno();
#define TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_FORMAT_ERR TAOS_DEF_ERROR_CODE(0, 0x2806)
#define TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_TS_ERR TAOS_DEF_ERROR_CODE(0, 0x2807)
#define TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_NOT_SUPPORTED TAOS_DEF_ERROR_CODE(0, 0x2808)
#define TSDB_CODE_FUNC_TO_CHAR_NOT_SUPPORTED TAOS_DEF_ERROR_CODE(0, 0x2809)
//udf
#define TSDB_CODE_UDF_STOPPING TAOS_DEF_ERROR_CODE(0, 0x2901)

View File

@ -94,7 +94,7 @@ typedef struct SConfigItem {
int64_t imax;
double fmax;
};
SArray *array; // SDiskCfg
SArray *array; // SDiskCfg/SLogVar
} SConfigItem;
typedef struct {

View File

@ -273,6 +273,7 @@ typedef enum ELogicConditionType {
#define TSDB_SHOW_SCHEMA_JSON_LEN TSDB_MAX_COLUMNS * 256
#define TSDB_SLOW_QUERY_SQL_LEN 512
#define TSDB_SHOW_SUBQUERY_LEN 1000
#define TSDB_LOG_VAR_LEN 32
#define TSDB_TRANS_STAGE_LEN 12
#define TSDB_TRANS_TYPE_LEN 16
@ -505,6 +506,10 @@ typedef struct {
int32_t primary;
} SDiskCfg;
typedef struct {
char name[TSDB_LOG_VAR_LEN];
} SLogVar;
#define TMQ_SEPARATOR ':'
enum {

View File

@ -67,6 +67,7 @@ extern int32_t smaDebugFlag;
extern int32_t idxDebugFlag;
extern int32_t tdbDebugFlag;
extern int32_t sndDebugFlag;
extern int32_t simDebugFlag;
int32_t taosInitLog(const char *logName, int32_t maxFiles);
void taosCloseLog();

View File

@ -902,6 +902,7 @@ int32_t hbQueryHbReqHandle(SClientHbKey *connKey, void *param, SClientHbReq *req
if (TSDB_CODE_SUCCESS != code) {
return code;
}
taosHashPut(clientHbMgr.appHbHash, &hbParam->clusterId, sizeof(uint64_t), NULL, 0);
}
// invoke after hbGetExpiredUserInfo
@ -1121,7 +1122,6 @@ static void *hbThreadFunc(void *param) {
asyncSendMsgToServer(pAppInstInfo->pTransporter, &epSet, &transporterId, pInfo);
tFreeClientHbBatchReq(pReq);
// hbClearReqInfo(pAppHbMgr);
taosHashPut(clientHbMgr.appHbHash, &pAppHbMgr->pAppInstInfo->clusterId, sizeof(uint64_t), NULL, 0);
atomic_add_fetch_32(&pAppHbMgr->reportCnt, 1);
}

View File

@ -1028,11 +1028,16 @@ static void tmqMgmtInit(void) {
}
}
#define SET_ERROR_MSG(MSG) if(errstr!=NULL)snprintf(errstr,errstrLen,MSG);
tmq_t* tmq_consumer_new(tmq_conf_t* conf, char* errstr, int32_t errstrLen) {
if(conf == NULL) return NULL;
if(conf == NULL) {
SET_ERROR_MSG("configure is null")
return NULL;
}
taosThreadOnce(&tmqInit, tmqMgmtInit);
if (tmqInitRes != 0) {
terrno = tmqInitRes;
SET_ERROR_MSG("tmq timer init error")
return NULL;
}
@ -1040,6 +1045,7 @@ tmq_t* tmq_consumer_new(tmq_conf_t* conf, char* errstr, int32_t errstrLen) {
if (pTmq == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
tscError("failed to create consumer, groupId:%s, code:%s", conf->groupId, terrstr());
SET_ERROR_MSG("malloc tmq failed")
return NULL;
}
@ -1055,6 +1061,7 @@ tmq_t* tmq_consumer_new(tmq_conf_t* conf, char* errstr, int32_t errstrLen) {
conf->groupId[0] == 0) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
tscError("consumer:0x%" PRIx64 " setup failed since %s, groupId:%s", pTmq->consumerId, terrstr(), pTmq->groupId);
SET_ERROR_MSG("malloc tmq element failed or group is empty")
goto _failed;
}
@ -1086,6 +1093,7 @@ tmq_t* tmq_consumer_new(tmq_conf_t* conf, char* errstr, int32_t errstrLen) {
if (tsem_init(&pTmq->rspSem, 0, 0) != 0) {
tscError("consumer:0x %" PRIx64 " setup failed since %s, consumer group %s", pTmq->consumerId, terrstr(),
pTmq->groupId);
SET_ERROR_MSG("init t_sem failed")
goto _failed;
}
@ -1094,11 +1102,13 @@ tmq_t* tmq_consumer_new(tmq_conf_t* conf, char* errstr, int32_t errstrLen) {
if (pTmq->pTscObj == NULL) {
tscError("consumer:0x%" PRIx64 " setup failed since %s, groupId:%s", pTmq->consumerId, terrstr(), pTmq->groupId);
tsem_destroy(&pTmq->rspSem);
SET_ERROR_MSG("init tscObj failed")
goto _failed;
}
pTmq->refId = taosAddRef(tmqMgmt.rsetId, pTmq);
if (pTmq->refId < 0) {
SET_ERROR_MSG("add tscObj ref failed")
goto _failed;
}

View File

@ -423,7 +423,7 @@ int32_t taosAddClientLogCfg(SConfig *pCfg) {
if (cfgAddBool(pCfg, "asyncLog", tsAsyncLog, CFG_SCOPE_BOTH, CFG_DYN_BOTH) != 0) return -1;
if (cfgAddInt32(pCfg, "logKeepDays", 0, -365000, 365000, CFG_SCOPE_BOTH, CFG_DYN_ENT_BOTH) != 0) return -1;
if (cfgAddInt32(pCfg, "debugFlag", 0, 0, 255, CFG_SCOPE_BOTH, CFG_DYN_BOTH) != 0) return -1;
if (cfgAddInt32(pCfg, "simDebugFlag", 143, 0, 255, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
if (cfgAddInt32(pCfg, "simDebugFlag", simDebugFlag, 0, 255, CFG_SCOPE_BOTH, CFG_DYN_BOTH) != 0) return -1;
if (cfgAddInt32(pCfg, "tmrDebugFlag", tmrDebugFlag, 0, 255, CFG_SCOPE_BOTH, CFG_DYN_BOTH) != 0) return -1;
if (cfgAddInt32(pCfg, "uDebugFlag", uDebugFlag, 0, 255, CFG_SCOPE_BOTH, CFG_DYN_BOTH) != 0) return -1;
if (cfgAddInt32(pCfg, "rpcDebugFlag", rpcDebugFlag, 0, 255, CFG_SCOPE_BOTH, CFG_DYN_BOTH) != 0) return -1;
@ -497,7 +497,7 @@ static int32_t taosAddClientCfg(SConfig *pCfg) {
if (cfgAddInt32(pCfg, "maxRetryWaitTime", tsMaxRetryWaitTime, 0, 86400000, CFG_SCOPE_BOTH, CFG_DYN_CLIENT) != 0)
return -1;
if (cfgAddBool(pCfg, "useAdapter", tsUseAdapter, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0) return -1;
if (cfgAddBool(pCfg, "crashReporting", tsEnableCrashReport, CFG_SCOPE_SERVER, CFG_DYN_CLIENT) != 0) return -1;
if (cfgAddBool(pCfg, "crashReporting", tsEnableCrashReport, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0) return -1;
if (cfgAddInt64(pCfg, "queryMaxConcurrentTables", tsQueryMaxConcurrentTables, INT64_MIN, INT64_MAX, CFG_SCOPE_CLIENT,
CFG_DYN_NONE) != 0)
return -1;
@ -789,7 +789,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) {
if (cfgAddInt32(pCfg, "s3PageCacheSize", tsS3PageCacheSize, 4, 1024 * 1024 * 1024, CFG_SCOPE_SERVER,
CFG_DYN_ENT_SERVER) != 0)
return -1;
if (cfgAddInt32(pCfg, "s3UploadDelaySec", tsS3UploadDelaySec, 60 * 10, 60 * 60 * 24 * 30, CFG_SCOPE_SERVER,
if (cfgAddInt32(pCfg, "s3UploadDelaySec", tsS3UploadDelaySec, 60 * 1, 60 * 60 * 24 * 30, CFG_SCOPE_SERVER,
CFG_DYN_ENT_SERVER) != 0)
return -1;
@ -962,6 +962,7 @@ static void taosSetClientLogCfg(SConfig *pCfg) {
rpcDebugFlag = cfgGetItem(pCfg, "rpcDebugFlag")->i32;
qDebugFlag = cfgGetItem(pCfg, "qDebugFlag")->i32;
cDebugFlag = cfgGetItem(pCfg, "cDebugFlag")->i32;
simDebugFlag = cfgGetItem(pCfg, "simDebugFlag")->i32;
}
static void taosSetServerLogCfg(SConfig *pCfg) {
@ -1278,7 +1279,7 @@ int32_t taosCreateLog(const char *logname, int32_t logFileNum, const char *cfgDi
taosSetServerLogCfg(pCfg);
}
taosSetAllDebugFlag(cfgGetItem(pCfg, "debugFlag")->i32, false);
taosSetAllDebugFlag(cfgGetItem(pCfg, "debugFlag")->i32);
if (taosMulModeMkDir(tsLogDir, 0777, true) != 0) {
terrno = TAOS_SYSTEM_ERROR(errno);
@ -1356,6 +1357,8 @@ int32_t taosInitCfg(const char *cfgDir, const char **envCmd, const char *envFile
taosSetSystemCfg(tsCfg);
if (taosSetFileHandlesLimit() != 0) return -1;
taosSetAllDebugFlag(cfgGetItem(tsCfg, "debugFlag")->i32);
cfgDumpCfg(tsCfg, tsc, false);
if (taosCheckGlobalCfg() != 0) {
@ -1399,7 +1402,7 @@ static int32_t taosCfgSetOption(OptionNameAndVar *pOptions, int32_t optionSize,
*pVar = flag;
if (isDebugflag) {
taosSetDebugFlag(pOptions[d].optionVar, optName, flag, true);
taosSetDebugFlag(pOptions[d].optionVar, optName, flag);
}
terrno = TSDB_CODE_SUCCESS;
} break;
@ -1432,6 +1435,13 @@ static int32_t taosCfgSetOption(OptionNameAndVar *pOptions, int32_t optionSize,
static int32_t taosCfgDynamicOptionsForServer(SConfig *pCfg, char *name) {
terrno = TSDB_CODE_SUCCESS;
if (strcasecmp(name, "resetlog") == 0) {
// trigger, no item in cfg
taosResetLog();
cfgDumpCfg(tsCfg, 0, false);
return 0;
}
SConfigItem *pItem = cfgGetItem(pCfg, name);
if (!pItem || (pItem->dynScope & CFG_DYN_SERVER) == 0) {
uError("failed to config:%s, not support", name);
@ -1440,14 +1450,7 @@ static int32_t taosCfgDynamicOptionsForServer(SConfig *pCfg, char *name) {
}
if (strncasecmp(name, "debugFlag", 9) == 0) {
int32_t flag = pItem->i32;
taosSetAllDebugFlag(flag, true);
return 0;
}
if (strcasecmp(name, "resetlog") == 0) {
taosResetLog();
cfgDumpCfg(tsCfg, 0, false);
taosSetAllDebugFlag(pItem->i32);
return 0;
}
@ -1459,7 +1462,7 @@ static int32_t taosCfgDynamicOptionsForServer(SConfig *pCfg, char *name) {
{"smaDebugFlag", &smaDebugFlag}, {"idxDebugFlag", &idxDebugFlag}, {"tdbDebugFlag", &tdbDebugFlag},
{"tmrDebugFlag", &tmrDebugFlag}, {"uDebugFlag", &uDebugFlag}, {"smaDebugFlag", &smaDebugFlag},
{"rpcDebugFlag", &rpcDebugFlag}, {"qDebugFlag", &qDebugFlag}, {"metaDebugFlag", &metaDebugFlag},
{"jniDebugFlag", &jniDebugFlag}, {"stDebugFlag", &stDebugFlag}, {"sndDebugFlag", &sndDebugFlag},
{"stDebugFlag", &stDebugFlag}, {"sndDebugFlag", &sndDebugFlag},
};
static OptionNameAndVar options[] = {
@ -1523,8 +1526,7 @@ static int32_t taosCfgDynamicOptionsForClient(SConfig *pCfg, char *name) {
switch (lowcaseName[0]) {
case 'd': {
if (strcasecmp("debugFlag", name) == 0) {
int32_t flag = pItem->i32;
taosSetAllDebugFlag(flag, true);
taosSetAllDebugFlag(pItem->i32);
matched = true;
}
break;
@ -1690,7 +1692,7 @@ static int32_t taosCfgDynamicOptionsForClient(SConfig *pCfg, char *name) {
{"cDebugFlag", &cDebugFlag}, {"dDebugFlag", &dDebugFlag}, {"fsDebugFlag", &fsDebugFlag},
{"idxDebugFlag", &idxDebugFlag}, {"jniDebugFlag", &jniDebugFlag}, {"qDebugFlag", &qDebugFlag},
{"rpcDebugFlag", &rpcDebugFlag}, {"smaDebugFlag", &smaDebugFlag}, {"tmrDebugFlag", &tmrDebugFlag},
{"uDebugFlag", &uDebugFlag},
{"uDebugFlag", &uDebugFlag}, {"simDebugFlag", &simDebugFlag},
};
static OptionNameAndVar options[] = {
@ -1735,9 +1737,9 @@ int32_t taosCfgDynamicOptions(SConfig *pCfg, char *name, bool forServer) {
return taosCfgDynamicOptionsForClient(pCfg, name);
}
void taosSetDebugFlag(int32_t *pFlagPtr, const char *flagName, int32_t flagVal, bool rewrite) {
void taosSetDebugFlag(int32_t *pFlagPtr, const char *flagName, int32_t flagVal) {
SConfigItem *pItem = cfgGetItem(tsCfg, flagName);
if (pItem != NULL && (rewrite || pItem->i32 == 0)) {
if (pItem != NULL) {
pItem->i32 = flagVal;
}
if (pFlagPtr != NULL) {
@ -1745,33 +1747,58 @@ void taosSetDebugFlag(int32_t *pFlagPtr, const char *flagName, int32_t flagVal,
}
}
void taosSetAllDebugFlag(int32_t flag, bool rewrite) {
static int taosLogVarComp(void const *lp, void const *rp) {
SLogVar *lpVar = (SLogVar *)lp;
SLogVar *rpVar = (SLogVar *)rp;
return strcasecmp(lpVar->name, rpVar->name);
}
static void taosCheckAndSetDebugFlag(int32_t *pFlagPtr, char *name, int32_t flag, SArray *noNeedToSetVars) {
if (noNeedToSetVars != NULL && taosArraySearch(noNeedToSetVars, name, taosLogVarComp, TD_EQ) != NULL) {
return;
}
taosSetDebugFlag(pFlagPtr, name, flag);
}
void taosSetAllDebugFlag(int32_t flag) {
if (flag <= 0) return;
taosSetDebugFlag(NULL, "debugFlag", flag, rewrite);
taosSetDebugFlag(NULL, "simDebugFlag", flag, rewrite);
taosSetDebugFlag(NULL, "tmrDebugFlag", flag, rewrite);
taosSetDebugFlag(&uDebugFlag, "uDebugFlag", flag, rewrite);
taosSetDebugFlag(&rpcDebugFlag, "rpcDebugFlag", flag, rewrite);
taosSetDebugFlag(&jniDebugFlag, "jniDebugFlag", flag, rewrite);
taosSetDebugFlag(&qDebugFlag, "qDebugFlag", flag, rewrite);
taosSetDebugFlag(&cDebugFlag, "cDebugFlag", flag, rewrite);
taosSetDebugFlag(&dDebugFlag, "dDebugFlag", flag, rewrite);
taosSetDebugFlag(&vDebugFlag, "vDebugFlag", flag, rewrite);
taosSetDebugFlag(&mDebugFlag, "mDebugFlag", flag, rewrite);
taosSetDebugFlag(&wDebugFlag, "wDebugFlag", flag, rewrite);
taosSetDebugFlag(&sDebugFlag, "sDebugFlag", flag, rewrite);
taosSetDebugFlag(&tsdbDebugFlag, "tsdbDebugFlag", flag, rewrite);
taosSetDebugFlag(&tqDebugFlag, "tqDebugFlag", flag, rewrite);
taosSetDebugFlag(&fsDebugFlag, "fsDebugFlag", flag, rewrite);
taosSetDebugFlag(&udfDebugFlag, "udfDebugFlag", flag, rewrite);
taosSetDebugFlag(&smaDebugFlag, "smaDebugFlag", flag, rewrite);
taosSetDebugFlag(&idxDebugFlag, "idxDebugFlag", flag, rewrite);
taosSetDebugFlag(&tdbDebugFlag, "tdbDebugFlag", flag, rewrite);
taosSetDebugFlag(&metaDebugFlag, "metaDebugFlag", flag, rewrite);
taosSetDebugFlag(&stDebugFlag, "stDebugFlag", flag, rewrite);
taosSetDebugFlag(&sndDebugFlag, "sndDebugFlag", flag, rewrite);
SArray *noNeedToSetVars = NULL;
SConfigItem *pItem = cfgGetItem(tsCfg, "debugFlag");
if (pItem != NULL) {
pItem->i32 = flag;
noNeedToSetVars = pItem->array;
}
taosCheckAndSetDebugFlag(&simDebugFlag, "simDebugFlag", flag, noNeedToSetVars);
taosCheckAndSetDebugFlag(&tmrDebugFlag, "tmrDebugFlag", flag, noNeedToSetVars);
taosCheckAndSetDebugFlag(&uDebugFlag, "uDebugFlag", flag, noNeedToSetVars);
taosCheckAndSetDebugFlag(&rpcDebugFlag, "rpcDebugFlag", flag, noNeedToSetVars);
taosCheckAndSetDebugFlag(&qDebugFlag, "qDebugFlag", flag, noNeedToSetVars);
taosCheckAndSetDebugFlag(&jniDebugFlag, "jniDebugFlag", flag, noNeedToSetVars);
taosCheckAndSetDebugFlag(&cDebugFlag, "cDebugFlag", flag, noNeedToSetVars);
taosCheckAndSetDebugFlag(&dDebugFlag, "dDebugFlag", flag, noNeedToSetVars);
taosCheckAndSetDebugFlag(&vDebugFlag, "vDebugFlag", flag, noNeedToSetVars);
taosCheckAndSetDebugFlag(&mDebugFlag, "mDebugFlag", flag, noNeedToSetVars);
taosCheckAndSetDebugFlag(&wDebugFlag, "wDebugFlag", flag, noNeedToSetVars);
taosCheckAndSetDebugFlag(&sDebugFlag, "sDebugFlag", flag, noNeedToSetVars);
taosCheckAndSetDebugFlag(&tsdbDebugFlag, "tsdbDebugFlag", flag, noNeedToSetVars);
taosCheckAndSetDebugFlag(&tqDebugFlag, "tqDebugFlag", flag, noNeedToSetVars);
taosCheckAndSetDebugFlag(&fsDebugFlag, "fsDebugFlag", flag, noNeedToSetVars);
taosCheckAndSetDebugFlag(&udfDebugFlag, "udfDebugFlag", flag, noNeedToSetVars);
taosCheckAndSetDebugFlag(&smaDebugFlag, "smaDebugFlag", flag, noNeedToSetVars);
taosCheckAndSetDebugFlag(&idxDebugFlag, "idxDebugFlag", flag, noNeedToSetVars);
taosCheckAndSetDebugFlag(&tdbDebugFlag, "tdbDebugFlag", flag, noNeedToSetVars);
taosCheckAndSetDebugFlag(&metaDebugFlag, "metaDebugFlag", flag, noNeedToSetVars);
taosCheckAndSetDebugFlag(&stDebugFlag, "stDebugFlag", flag, noNeedToSetVars);
taosCheckAndSetDebugFlag(&sndDebugFlag, "sndDebugFlag", flag, noNeedToSetVars);
taosArrayClear(noNeedToSetVars); // reset array
uInfo("all debug flag are set to %d", flag);
if (terrno == TSDB_CODE_CFG_NOT_FOUND) terrno = TSDB_CODE_SUCCESS; // ignore not exist
}
int8_t taosGranted() { return atomic_load_8(&tsGrant); }

View File

@ -8365,7 +8365,7 @@ int32_t tDecodeMqMetaRsp(SDecoder *pDecoder, SMqMetaRsp *pRsp) {
return 0;
}
int32_t tEncodeMqDataRsp(SEncoder *pEncoder, const SMqDataRsp *pRsp) {
int32_t tEncodeMqDataRspCommon(SEncoder *pEncoder, const SMqDataRsp *pRsp) {
if (tEncodeSTqOffsetVal(pEncoder, &pRsp->reqOffset) < 0) return -1;
if (tEncodeSTqOffsetVal(pEncoder, &pRsp->rspOffset) < 0) return -1;
if (tEncodeI32(pEncoder, pRsp->blockNum) < 0) return -1;
@ -8387,11 +8387,16 @@ int32_t tEncodeMqDataRsp(SEncoder *pEncoder, const SMqDataRsp *pRsp) {
}
}
}
return 0;
}
int32_t tEncodeMqDataRsp(SEncoder *pEncoder, const SMqDataRsp *pRsp) {
if (tEncodeMqDataRspCommon(pEncoder, pRsp) < 0) return -1;
if (tEncodeI64(pEncoder, pRsp->sleepTime) < 0) return -1;
return 0;
}
int32_t tDecodeMqDataRsp(SDecoder *pDecoder, SMqDataRsp *pRsp) {
int32_t tDecodeMqDataRspCommon(SDecoder *pDecoder, SMqDataRsp *pRsp) {
if (tDecodeSTqOffsetVal(pDecoder, &pRsp->reqOffset) < 0) return -1;
if (tDecodeSTqOffsetVal(pDecoder, &pRsp->rspOffset) < 0) return -1;
if (tDecodeI32(pDecoder, &pRsp->blockNum) < 0) return -1;
@ -8433,7 +8438,15 @@ int32_t tDecodeMqDataRsp(SDecoder *pDecoder, SMqDataRsp *pRsp) {
}
}
}
return 0;
}
int32_t tDecodeMqDataRsp(SDecoder *pDecoder, SMqDataRsp *pRsp) {
if (tDecodeMqDataRspCommon(pDecoder, pRsp) < 0) return -1;
if (!tDecodeIsEnd(pDecoder)) {
if (tDecodeI64(pDecoder, &pRsp->sleepTime) < 0) return -1;
}
return 0;
}
@ -8449,7 +8462,7 @@ void tDeleteMqDataRsp(SMqDataRsp *pRsp) {
}
int32_t tEncodeSTaosxRsp(SEncoder *pEncoder, const STaosxRsp *pRsp) {
if (tEncodeMqDataRsp(pEncoder, (const SMqDataRsp *)pRsp) < 0) return -1;
if (tEncodeMqDataRspCommon(pEncoder, (const SMqDataRsp *)pRsp) < 0) return -1;
if (tEncodeI32(pEncoder, pRsp->createTableNum) < 0) return -1;
if (pRsp->createTableNum) {
@ -8463,7 +8476,7 @@ int32_t tEncodeSTaosxRsp(SEncoder *pEncoder, const STaosxRsp *pRsp) {
}
int32_t tDecodeSTaosxRsp(SDecoder *pDecoder, STaosxRsp *pRsp) {
if (tDecodeMqDataRsp(pDecoder, (SMqDataRsp *)pRsp) < 0) return -1;
if (tDecodeMqDataRspCommon(pDecoder, (SMqDataRsp*)pRsp) < 0) return -1;
if (tDecodeI32(pDecoder, &pRsp->createTableNum) < 0) return -1;
if (pRsp->createTableNum) {
@ -8478,6 +8491,7 @@ int32_t tDecodeSTaosxRsp(SDecoder *pDecoder, STaosxRsp *pRsp) {
taosArrayPush(pRsp->createTableReq, &pCreate);
}
}
return 0;
}

View File

@ -1297,7 +1297,7 @@ static void parseTsFormat(const char* formatStr, SArray* formats) {
}
}
static void tm2char(const SArray* formats, const struct STm* tm, char* s, int32_t outLen) {
static int32_t tm2char(const SArray* formats, const struct STm* tm, char* s, int32_t outLen) {
int32_t size = taosArrayGetSize(formats);
const char* start = s;
for (int32_t i = 0; i < size; ++i) {
@ -1332,6 +1332,9 @@ static void tm2char(const SArray* formats, const struct STm* tm, char* s, int32_
s += 4;
break;
case TSFKW_DDD:
#ifdef WINDOWS
return TSDB_CODE_FUNC_TO_CHAR_NOT_SUPPORTED;
#endif
sprintf(s, "%03d", tm->tm.tm_yday + 1);
s += strlen(s);
break;
@ -1486,6 +1489,7 @@ static void tm2char(const SArray* formats, const struct STm* tm, char* s, int32_
break;
}
}
return TSDB_CODE_SUCCESS;
}
/// @brief find s in arr case insensitively
@ -1889,14 +1893,14 @@ static int32_t char2ts(const char* s, SArray* formats, int64_t* ts, int32_t prec
return ret;
}
void taosTs2Char(const char* format, SArray** formats, int64_t ts, int32_t precision, char* out, int32_t outLen) {
int32_t taosTs2Char(const char* format, SArray** formats, int64_t ts, int32_t precision, char* out, int32_t outLen) {
if (!*formats) {
*formats = taosArrayInit(8, sizeof(TSFormatNode));
parseTsFormat(format, *formats);
}
struct STm tm;
taosTs2Tm(ts, precision, &tm);
tm2char(*formats, &tm, out, outLen);
return tm2char(*formats, &tm, out, outLen);
}
int32_t taosChar2Ts(const char* format, SArray** formats, const char* tsStr, int64_t* ts, int32_t precision, char* errMsg,

View File

@ -68,7 +68,7 @@ static struct {
int64_t startTime;
} global = {0};
static void dmSetDebugFlag(int32_t signum, void *sigInfo, void *context) { taosSetAllDebugFlag(143, true); }
static void dmSetDebugFlag(int32_t signum, void *sigInfo, void *context) { taosSetAllDebugFlag(143); }
static void dmSetAssert(int32_t signum, void *sigInfo, void *context) { tsAssert = 1; }
static void dmStopDnode(int signum, void *sigInfo, void *context) {

View File

@ -159,13 +159,18 @@ void dmSendStatusReq(SDnodeMgmt *pMgmt) {
dTrace("send status req to mnode, dnodeVer:%" PRId64 " statusSeq:%d", req.dnodeVer, req.statusSeq);
SEpSet epSet = {0};
int8_t epUpdated = 0;
dmGetMnodeEpSet(pMgmt->pData, &epSet);
rpcSendRecvWithTimeout(pMgmt->msgCb.clientRpc, &epSet, &rpcMsg, &rpcRsp, 5000);
rpcSendRecvWithTimeout(pMgmt->msgCb.clientRpc, &epSet, &rpcMsg, &rpcRsp, &epUpdated, 5000);
if (rpcRsp.code != 0) {
dmRotateMnodeEpSet(pMgmt->pData);
char tbuf[256];
char tbuf[512];
dmEpSetToStr(tbuf, sizeof(tbuf), &epSet);
dError("failed to send status req since %s, epSet:%s, inUse:%d", tstrerror(rpcRsp.code), tbuf, epSet.inUse);
} else {
if (epUpdated == 1) {
dmSetMnodeEpSet(pMgmt->pData, &epSet);
}
}
dmProcessStatusRsp(pMgmt, &rpcRsp);
}

View File

@ -22,8 +22,6 @@ static void *dmStatusThreadFp(void *param) {
int64_t lastTime = taosGetTimestampMs();
setThreadName("dnode-status");
const static int16_t TRIM_FREQ = 30;
int32_t trimCount = 0;
int32_t upTimeCount = 0;
int64_t upTime = 0;
@ -38,11 +36,6 @@ static void *dmStatusThreadFp(void *param) {
dmSendStatusReq(pMgmt);
lastTime = curTime;
trimCount = (trimCount + 1) % TRIM_FREQ;
if (trimCount == 0) {
taosMemoryTrim(0);
}
if ((upTimeCount = ((upTimeCount + 1) & 63)) == 0) {
upTime = taosGetOsUptime() - tsDndStartOsUptime;
tsDndUpTime = TMAX(tsDndUpTime, upTime);
@ -83,6 +76,9 @@ static void *dmMonitorThreadFp(void *param) {
int64_t lastTime = taosGetTimestampMs();
setThreadName("dnode-monitor");
static int32_t TRIM_FREQ = 20;
int32_t trimCount = 0;
while (1) {
taosMsleep(200);
if (pMgmt->pData->dropped || pMgmt->pData->stopped) break;
@ -93,6 +89,11 @@ static void *dmMonitorThreadFp(void *param) {
if (interval >= tsMonitorInterval) {
(*pMgmt->sendMonitorReportFp)();
lastTime = curTime;
trimCount = (trimCount + 1) % TRIM_FREQ;
if (trimCount == 0) {
taosMemoryTrim(0);
}
}
}
@ -131,7 +132,8 @@ static void *dmCrashReportThreadFp(void *param) {
TdFilePtr pFile = NULL;
bool truncateFile = false;
int32_t sleepTime = 200;
int32_t reportPeriodNum = 3600 * 1000 / sleepTime;;
int32_t reportPeriodNum = 3600 * 1000 / sleepTime;
;
int32_t loopTimes = reportPeriodNum;
while (1) {

View File

@ -108,12 +108,11 @@ SArray *mmGetMsgHandles() {
if (dmSetMgmtHandle(pArray, TDMT_DND_DROP_SNODE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_DND_CREATE_VNODE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_DND_DROP_VNODE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_DND_CONFIG_DNODE_RSP, mmPutMsgToReadQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_MND_GET_MACHINE_RSP, mmPutMsgToReadQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_DND_ALTER_MNODE_TYPE_RSP, mmPutMsgToReadQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_DND_ALTER_VNODE_TYPE_RSP, mmPutMsgToReadQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_DND_CHECK_VNODE_LEARNER_CATCHUP_RSP, mmPutMsgToReadQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_SYNC_CONFIG_CHANGE_RSP, mmPutMsgToReadQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_DND_CONFIG_DNODE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_DND_ALTER_MNODE_TYPE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_DND_ALTER_VNODE_TYPE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_DND_CHECK_VNODE_LEARNER_CATCHUP_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_SYNC_CONFIG_CHANGE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_MND_CONNECT, mmPutMsgToReadQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_MND_CREATE_ACCT, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
@ -165,6 +164,7 @@ SArray *mmGetMsgHandles() {
if (dmSetMgmtHandle(pArray, TDMT_MND_PAUSE_STREAM, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_MND_RESUME_STREAM, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_MND_GRANT_RSP, mmPutMsgToReadQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_MND_GET_MACHINE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_MND_RETRIEVE_IP_WHITE, mmPutMsgToReadQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_MND_GET_USER_WHITELIST, mmPutMsgToReadQueue, 0) == NULL) goto _OVER;

View File

@ -90,7 +90,7 @@ SArray *smGetMsgHandles() {
if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_SCAN_HISTORY_FINISH, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_SCAN_HISTORY_FINISH_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECKPOINT_READY, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_RESET, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER;
code = 0;
_OVER:

View File

@ -46,10 +46,7 @@ SSdbRow *mndConsumerActionDecode(SSdbRaw *pRaw);
int32_t mndSetConsumerCommitLogs(SMnode *pMnode, STrans *pTrans, SMqConsumerObj *pConsumer);
int32_t mndSetConsumerDropLogs(SMnode *pMnode, STrans *pTrans, SMqConsumerObj *pConsumer);
bool mndRebTryStart();
void mndRebEnd();
void mndRebCntInc();
void mndRebCntDec();
const char *mndConsumerStatusName(int status);
#ifdef __cplusplus
}

View File

@ -47,7 +47,6 @@ typedef struct SStreamExecInfo {
SArray *pNodeList;
int64_t ts; // snapshot ts
SStreamTransMgmt transMgmt;
int64_t activeCheckpoint; // active check point id
SHashObj *pTaskMap;
SArray *pTaskList;
TdThreadMutex lock;

View File

@ -32,14 +32,13 @@ void mndReleaseSubscribe(SMnode *pMnode, SMqSubscribeObj *pSub);
int32_t mndMakeSubscribeKey(char *key, const char *cgroup, const char *topicName);
//static FORCE_INLINE int32_t mndMakePartitionKey(char *key, const char *cgroup, const char *topicName, int32_t vgId) {
// return snprintf(key, TSDB_PARTITION_KEY_LEN, "%d:%s:%s", vgId, cgroup, topicName);
//}
//int32_t mndDropSubByDB(SMnode *pMnode, STrans *pTrans, SDbObj *pDb);
int32_t mndDropSubByTopic(SMnode *pMnode, STrans *pTrans, const char *topic);
int32_t mndSetDropSubCommitLogs(SMnode *pMnode, STrans *pTrans, SMqSubscribeObj *pSub);
bool mndRebTryStart();
void mndRebCntInc();
void mndRebCntDec();
#ifdef __cplusplus
}
#endif

View File

@ -97,7 +97,7 @@ SSdbRaw *mndTransEncode(STrans *pTrans);
SSdbRow *mndTransDecode(SSdbRaw *pRaw);
void mndTransDropData(STrans *pTrans);
bool mndTransPerformPrepareStage(SMnode *pMnode, STrans *pTrans);
bool mndTransPerformPrepareStage(SMnode *pMnode, STrans *pTrans, bool topHalf);
#ifdef __cplusplus
}
#endif

View File

@ -26,6 +26,7 @@
#include "mndTrans.h"
#define MND_COMPACT_VER_NUMBER 1
#define MND_COMPACT_ID_LEN 11
static int32_t mndProcessCompactTimer(SRpcMsg *pReq);
@ -451,7 +452,7 @@ int32_t mndProcessKillCompactReq(SRpcMsg *pReq){
code = TSDB_CODE_ACTION_IN_PROGRESS;
char obj[10] = {0};
char obj[MND_COMPACT_ID_LEN] = {0};
sprintf(obj, "%d", pCompact->compactId);
auditRecord(pReq, pMnode->clusterId, "killCompact", pCompact->dbname, obj, killCompactReq.sql, killCompactReq.sqlLen);

View File

@ -290,7 +290,7 @@ int32_t mndAddCompactDetailToTran(SMnode *pMnode, STrans *pTrans, SCompactObj* p
SSdbRaw *pVgRaw = mndCompactDetailActionEncode(&compactDetail);
if (pVgRaw == NULL) return -1;
if (mndTransAppendRedolog(pTrans, pVgRaw) != 0) {
if (mndTransAppendCommitlog(pTrans, pVgRaw) != 0) {
sdbFreeRaw(pVgRaw);
return -1;
}

View File

@ -29,12 +29,6 @@
#define MND_CONSUMER_RESERVE_SIZE 64
#define MND_MAX_GROUP_PER_TOPIC 100
#define MND_CONSUMER_LOST_HB_CNT 6
#define MND_CONSUMER_LOST_CLEAR_THRESHOLD 43200
static int32_t mqRebInExecCnt = 0;
static const char *mndConsumerStatusName(int status);
static int32_t mndConsumerActionInsert(SSdb *pSdb, SMqConsumerObj *pConsumer);
static int32_t mndConsumerActionDelete(SSdb *pSdb, SMqConsumerObj *pConsumer);
@ -45,7 +39,6 @@ static void mndCancelGetNextConsumer(SMnode *pMnode, void *pIter);
static int32_t mndProcessSubscribeReq(SRpcMsg *pMsg);
static int32_t mndProcessAskEpReq(SRpcMsg *pMsg);
static int32_t mndProcessMqHbReq(SRpcMsg *pMsg);
static int32_t mndProcessMqTimerMsg(SRpcMsg *pMsg);
static int32_t mndProcessConsumerClearMsg(SRpcMsg *pMsg);
static int32_t mndProcessConsumerRecoverMsg(SRpcMsg *pMsg);
@ -63,7 +56,7 @@ int32_t mndInitConsumer(SMnode *pMnode) {
mndSetMsgHandle(pMnode, TDMT_MND_TMQ_SUBSCRIBE, mndProcessSubscribeReq);
mndSetMsgHandle(pMnode, TDMT_MND_TMQ_HB, mndProcessMqHbReq);
mndSetMsgHandle(pMnode, TDMT_MND_TMQ_ASK_EP, mndProcessAskEpReq);
mndSetMsgHandle(pMnode, TDMT_MND_TMQ_TIMER, mndProcessMqTimerMsg);
// mndSetMsgHandle(pMnode, TDMT_MND_TMQ_TIMER, mndProcessMqTimerMsg);
mndSetMsgHandle(pMnode, TDMT_MND_TMQ_CONSUMER_RECOVER, mndProcessConsumerRecoverMsg);
mndSetMsgHandle(pMnode, TDMT_MND_TMQ_LOST_CONSUMER_CLEAR, mndProcessConsumerClearMsg);
@ -95,36 +88,6 @@ void mndDropConsumerFromSdb(SMnode *pMnode, int64_t consumerId, SRpcHandleInfo*
return;
}
bool mndRebTryStart() {
int32_t old = atomic_val_compare_exchange_32(&mqRebInExecCnt, 0, 1);
mDebug("tq timer, rebalance counter old val:%d", old);
return old == 0;
}
void mndRebEnd() { mndRebCntDec(); }
void mndRebCntInc() {
int32_t val = atomic_add_fetch_32(&mqRebInExecCnt, 1);
mInfo("rebalance trans start, rebalance counter:%d", val);
}
void mndRebCntDec() {
while (1) {
int32_t val = atomic_load_32(&mqRebInExecCnt);
if (val <= 0) {
mError("rebalance trans end, rebalance counter:%d should not be less equalled than 0, ignore counter desc", val);
break;
}
int32_t newVal = val - 1;
int32_t oldVal = atomic_val_compare_exchange_32(&mqRebInExecCnt, val, newVal);
if (oldVal == val) {
mDebug("rebalance trans end, rebalance counter:%d", newVal);
break;
}
}
}
static int32_t validateTopics(STrans *pTrans, const SArray *pTopicList, SMnode *pMnode, const char *pUser, bool enableReplay) {
SMqTopicObj *pTopic = NULL;
int32_t code = 0;
@ -257,162 +220,6 @@ FAIL:
return -1;
}
static SMqRebInfo *mndGetOrCreateRebSub(SHashObj *pHash, const char *key) {
SMqRebInfo *pRebInfo = taosHashGet(pHash, key, strlen(key) + 1);
if (pRebInfo == NULL) {
pRebInfo = tNewSMqRebSubscribe(key);
if (pRebInfo == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return NULL;
}
taosHashPut(pHash, key, strlen(key) + 1, pRebInfo, sizeof(SMqRebInfo));
taosMemoryFree(pRebInfo);
pRebInfo = taosHashGet(pHash, key, strlen(key) + 1);
}
return pRebInfo;
}
static void freeRebalanceItem(void *param) {
SMqRebInfo *pInfo = param;
taosArrayDestroy(pInfo->newConsumers);
taosArrayDestroy(pInfo->removedConsumers);
}
static int32_t mndProcessMqTimerMsg(SRpcMsg *pMsg) {
SMnode *pMnode = pMsg->info.node;
SSdb *pSdb = pMnode->pSdb;
SMqConsumerObj *pConsumer;
void *pIter = NULL;
mDebug("start to process mq timer");
// rebalance cannot be parallel
if (!mndRebTryStart()) {
mInfo("mq rebalance already in progress, do nothing");
return 0;
}
SMqDoRebalanceMsg *pRebMsg = rpcMallocCont(sizeof(SMqDoRebalanceMsg));
if (pRebMsg == NULL) {
mError("failed to create the rebalance msg, size:%d, quit mq timer", (int32_t)sizeof(SMqDoRebalanceMsg));
mndRebEnd();
return TSDB_CODE_OUT_OF_MEMORY;
}
pRebMsg->rebSubHash = taosHashInit(64, MurmurHash3_32, true, HASH_NO_LOCK);
if (pRebMsg->rebSubHash == NULL) {
mError("failed to create rebalance hashmap");
rpcFreeCont(pRebMsg);
mndRebEnd();
return TSDB_CODE_OUT_OF_MEMORY;
}
taosHashSetFreeFp(pRebMsg->rebSubHash, freeRebalanceItem);
// iterate all consumers, find all modification
while (1) {
pIter = sdbFetch(pSdb, SDB_CONSUMER, pIter, (void **)&pConsumer);
if (pIter == NULL) {
break;
}
int32_t hbStatus = atomic_add_fetch_32(&pConsumer->hbStatus, 1);
int32_t status = atomic_load_32(&pConsumer->status);
mInfo("check for consumer:0x%" PRIx64 " status:%d(%s), sub-time:%" PRId64 ", createTime:%" PRId64 ", hbstatus:%d",
pConsumer->consumerId, status, mndConsumerStatusName(status), pConsumer->subscribeTime, pConsumer->createTime,
hbStatus);
if (status == MQ_CONSUMER_STATUS_READY) {
if (taosArrayGetSize(pConsumer->assignedTopics) == 0) { // unsubscribe or close
mndDropConsumerFromSdb(pMnode, pConsumer->consumerId, &pMsg->info);
} else if (hbStatus > MND_CONSUMER_LOST_HB_CNT) {
taosRLockLatch(&pConsumer->lock);
int32_t topicNum = taosArrayGetSize(pConsumer->currentTopics);
for (int32_t i = 0; i < topicNum; i++) {
char key[TSDB_SUBSCRIBE_KEY_LEN];
char *removedTopic = taosArrayGetP(pConsumer->currentTopics, i);
mndMakeSubscribeKey(key, pConsumer->cgroup, removedTopic);
SMqRebInfo *pRebSub = mndGetOrCreateRebSub(pRebMsg->rebSubHash, key);
taosArrayPush(pRebSub->removedConsumers, &pConsumer->consumerId);
}
taosRUnLockLatch(&pConsumer->lock);
}else{
int32_t newTopicNum = taosArrayGetSize(pConsumer->currentTopics);
for (int32_t i = 0; i < newTopicNum; i++) {
char * topic = taosArrayGetP(pConsumer->currentTopics, i);
SMqSubscribeObj *pSub = mndAcquireSubscribe(pMnode, pConsumer->cgroup, topic);
if (pSub == NULL) {
continue;
}
taosRLockLatch(&pSub->lock);
// 2.2 iterate all vg assigned to the consumer of that topic
SMqConsumerEp *pConsumerEp = taosHashGet(pSub->consumerHash, &pConsumer->consumerId, sizeof(int64_t));
int32_t vgNum = taosArrayGetSize(pConsumerEp->vgs);
for (int32_t j = 0; j < vgNum; j++) {
SMqVgEp *pVgEp = taosArrayGetP(pConsumerEp->vgs, j);
SVgObj * pVgroup = mndAcquireVgroup(pMnode, pVgEp->vgId);
if (!pVgroup) {
char key[TSDB_SUBSCRIBE_KEY_LEN];
mndMakeSubscribeKey(key, pConsumer->cgroup, topic);
mndGetOrCreateRebSub(pRebMsg->rebSubHash, key);
mInfo("vnode splitted, vgId:%d rebalance will be triggered", pVgEp->vgId);
}
mndReleaseVgroup(pMnode, pVgroup);
}
taosRUnLockLatch(&pSub->lock);
mndReleaseSubscribe(pMnode, pSub);
}
}
} else if (status == MQ_CONSUMER_STATUS_LOST) {
if (hbStatus > MND_CONSUMER_LOST_CLEAR_THRESHOLD) { // clear consumer if lost a day
mndDropConsumerFromSdb(pMnode, pConsumer->consumerId, &pMsg->info);
}
} else { // MQ_CONSUMER_STATUS_REBALANCE
taosRLockLatch(&pConsumer->lock);
int32_t newTopicNum = taosArrayGetSize(pConsumer->rebNewTopics);
for (int32_t i = 0; i < newTopicNum; i++) {
char key[TSDB_SUBSCRIBE_KEY_LEN];
char *newTopic = taosArrayGetP(pConsumer->rebNewTopics, i);
mndMakeSubscribeKey(key, pConsumer->cgroup, newTopic);
SMqRebInfo *pRebSub = mndGetOrCreateRebSub(pRebMsg->rebSubHash, key);
taosArrayPush(pRebSub->newConsumers, &pConsumer->consumerId);
}
int32_t removedTopicNum = taosArrayGetSize(pConsumer->rebRemovedTopics);
for (int32_t i = 0; i < removedTopicNum; i++) {
char key[TSDB_SUBSCRIBE_KEY_LEN];
char *removedTopic = taosArrayGetP(pConsumer->rebRemovedTopics, i);
mndMakeSubscribeKey(key, pConsumer->cgroup, removedTopic);
SMqRebInfo *pRebSub = mndGetOrCreateRebSub(pRebMsg->rebSubHash, key);
taosArrayPush(pRebSub->removedConsumers, &pConsumer->consumerId);
}
taosRUnLockLatch(&pConsumer->lock);
}
mndReleaseConsumer(pMnode, pConsumer);
}
if (taosHashGetSize(pRebMsg->rebSubHash) != 0) {
mInfo("mq rebalance will be triggered");
SRpcMsg rpcMsg = {
.msgType = TDMT_MND_TMQ_DO_REBALANCE,
.pCont = pRebMsg,
.contLen = sizeof(SMqDoRebalanceMsg),
};
tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg);
} else {
taosHashCleanup(pRebMsg->rebSubHash);
rpcFreeCont(pRebMsg);
mDebug("mq timer finished, no need to re-balance");
mndRebEnd();
}
return 0;
}
static int32_t mndProcessMqHbReq(SRpcMsg *pMsg) {
int32_t code = 0;
SMnode *pMnode = pMsg->info.node;
@ -948,7 +755,7 @@ static void updateConsumerStatus(SMqConsumerObj *pConsumer) {
if (taosArrayGetSize(pConsumer->rebNewTopics) == 0 && taosArrayGetSize(pConsumer->rebRemovedTopics) == 0) {
if (status == MQ_CONSUMER_STATUS_REBALANCE) {
pConsumer->status = MQ_CONSUMER_STATUS_READY;
} else if (status == MQ_CONSUMER_STATUS_READY) {
} else if (status == MQ_CONSUMER_STATUS_READY && taosArrayGetSize(pConsumer->currentTopics) == 0) {
pConsumer->status = MQ_CONSUMER_STATUS_LOST;
}
}
@ -1251,7 +1058,7 @@ static void mndCancelGetNextConsumer(SMnode *pMnode, void *pIter) {
sdbCancelFetch(pSdb, pIter);
}
static const char *mndConsumerStatusName(int status) {
const char *mndConsumerStatusName(int status) {
switch (status) {
case MQ_CONSUMER_STATUS_READY:
return "ready";

View File

@ -1050,6 +1050,32 @@ _OVER:
return code;
}
static bool mndIsEmptyDnode(SMnode *pMnode, int32_t dnodeId) {
bool isEmpty = false;
SMnodeObj *pMObj = NULL;
SQnodeObj *pQObj = NULL;
SSnodeObj *pSObj = NULL;
pQObj = mndAcquireQnode(pMnode, dnodeId);
if (pQObj) goto _OVER;
pSObj = mndAcquireSnode(pMnode, dnodeId);
if (pSObj) goto _OVER;
pMObj = mndAcquireMnode(pMnode, dnodeId);
if (pMObj) goto _OVER;
int32_t numOfVnodes = mndGetVnodesNum(pMnode, dnodeId);
if (numOfVnodes > 0) goto _OVER;
isEmpty = true;
_OVER:
mndReleaseMnode(pMnode, pMObj);
mndReleaseQnode(pMnode, pQObj);
mndReleaseSnode(pMnode, pSObj);
return isEmpty;
}
static int32_t mndProcessDropDnodeReq(SRpcMsg *pReq) {
SMnode *pMnode = pReq->info.node;
int32_t code = -1;
@ -1111,7 +1137,8 @@ static int32_t mndProcessDropDnodeReq(SRpcMsg *pReq) {
goto _OVER;
}
if (!isonline && !force) {
bool isEmpty = mndIsEmptyDnode(pMnode, pDnode->id);
if (!isonline && !force && !isEmpty) {
terrno = TSDB_CODE_DNODE_OFFLINE;
mError("dnode:%d, failed to drop since %s, vnodes:%d mnode:%d qnode:%d snode:%d", pDnode->id, terrstr(),
numOfVnodes, pMObj != NULL, pQObj != NULL, pSObj != NULL);

View File

@ -16,6 +16,8 @@
#define _DEFAULT_SOURCE
#include "mndAcct.h"
#include "mndCluster.h"
#include "mndCompact.h"
#include "mndCompactDetail.h"
#include "mndConsumer.h"
#include "mndDb.h"
#include "mndDnode.h"
@ -42,8 +44,6 @@
#include "mndUser.h"
#include "mndVgroup.h"
#include "mndView.h"
#include "mndCompact.h"
#include "mndCompactDetail.h"
static inline int32_t mndAcquireRpc(SMnode *pMnode) {
int32_t code = 0;
@ -275,18 +275,48 @@ static void mndCheckDnodeOffline(SMnode *pMnode) {
mndReleaseRpc(pMnode);
}
static void *mndThreadFp(void *param) {
SMnode *pMnode = param;
int64_t lastTime = 0;
setThreadName("mnode-timer");
static bool mnodeIsNotLeader(SMnode *pMnode) {
terrno = 0;
taosThreadRwlockRdlock(&pMnode->lock);
SSyncState state = syncGetState(pMnode->syncMgmt.sync);
if (terrno != 0) {
taosThreadRwlockUnlock(&pMnode->lock);
return true;
}
while (1) {
lastTime++;
taosMsleep(100);
if (mndGetStop(pMnode)) break;
if (lastTime % 10 != 0) continue;
if (state.state != TAOS_SYNC_STATE_LEADER) {
taosThreadRwlockUnlock(&pMnode->lock);
terrno = TSDB_CODE_SYN_NOT_LEADER;
return true;
}
if (!state.restored || !pMnode->restored) {
taosThreadRwlockUnlock(&pMnode->lock);
terrno = TSDB_CODE_SYN_RESTORING;
return true;
}
taosThreadRwlockUnlock(&pMnode->lock);
return false;
}
int64_t sec = lastTime / 10;
static int32_t minCronTime() {
int64_t min = INT64_MAX;
min = TMIN(min, tsTtlPushIntervalSec);
min = TMIN(min, tsTrimVDbIntervalSec);
min = TMIN(min, tsTransPullupInterval);
min = TMIN(min, tsCompactPullupInterval);
min = TMIN(min, tsMqRebalanceInterval);
min = TMIN(min, tsStreamCheckpointInterval);
min = TMIN(min, 5); // checkpointRemain
min = TMIN(min, tsStreamNodeCheckInterval);
int64_t telemInt = TMIN(60, (tsTelemInterval - 1));
min = TMIN(min, telemInt);
min = TMIN(min, tsGrantHBInterval);
min = TMIN(min, tsUptimeInterval);
return min <= 1 ? 2 : min;
}
void mndDoTimerPullupTask(SMnode *pMnode, int64_t sec) {
if (sec % tsTtlPushIntervalSec == 0) {
mndPullupTtl(pMnode);
}
@ -330,15 +360,37 @@ static void *mndThreadFp(void *param) {
if (sec % tsUptimeInterval == 0) {
mndIncreaseUpTime(pMnode);
}
}
void mndDoTimerCheckTask(SMnode *pMnode, int64_t sec) {
if (sec % (tsStatusInterval * 5) == 0) {
mndCheckDnodeOffline(pMnode);
}
if (sec % (MNODE_TIMEOUT_SEC / 2) == 0) {
mndSyncCheckTimeout(pMnode);
}
}
static void *mndThreadFp(void *param) {
SMnode *pMnode = param;
int64_t lastTime = 0;
setThreadName("mnode-timer");
while (1) {
lastTime++;
taosMsleep(100);
if (mndGetStop(pMnode)) break;
if (lastTime % 10 != 0) continue;
int64_t sec = lastTime / 10;
mndDoTimerCheckTask(pMnode, sec);
int64_t minCron = minCronTime();
if (sec % minCron == 0 && mnodeIsNotLeader(pMnode)) {
// not leader, do nothing
mTrace("timer not process since mnode is not leader, reason: %s", tstrerror(terrno)) terrno = 0;
continue;
}
mndDoTimerPullupTask(pMnode, sec);
}
return NULL;
}
@ -730,7 +782,9 @@ _OVER:
if (pMsg->msgType == TDMT_MND_TMQ_TIMER || pMsg->msgType == TDMT_MND_TELEM_TIMER ||
pMsg->msgType == TDMT_MND_TRANS_TIMER || pMsg->msgType == TDMT_MND_TTL_TIMER ||
pMsg->msgType == TDMT_MND_TRIM_DB_TIMER || pMsg->msgType == TDMT_MND_UPTIME_TIMER ||
pMsg->msgType == TDMT_MND_COMPACT_TIMER) {
pMsg->msgType == TDMT_MND_COMPACT_TIMER || pMsg->msgType == TDMT_MND_NODECHECK_TIMER ||
pMsg->msgType == TDMT_MND_GRANT_HB_TIMER || pMsg->msgType == TDMT_MND_STREAM_CHECKPOINT_CANDIDITATE ||
pMsg->msgType == TDMT_MND_STREAM_CHECKPOINT_TIMER) {
mTrace("timer not process since mnode restored:%d stopped:%d, sync restored:%d role:%s ", pMnode->restored,
pMnode->stopped, state.restored, syncStr(state.state));
return -1;

View File

@ -15,6 +15,7 @@
#define _DEFAULT_SOURCE
#include "mndProfile.h"
#include "audit.h"
#include "mndDb.h"
#include "mndDnode.h"
#include "mndMnode.h"
@ -26,7 +27,6 @@
#include "mndView.h"
#include "tglobal.h"
#include "tversion.h"
#include "audit.h"
typedef struct {
uint32_t id;
@ -546,7 +546,8 @@ static int32_t mndProcessQueryHeartBeat(SMnode *pMnode, SRpcMsg *pMsg, SClientHb
if (needCheck) {
SKv kv1 = {.key = HEARTBEAT_KEY_DYN_VIEW, .valueLen = sizeof(*pDynViewVer), .value = pRspVer};
taosArrayPush(hbRsp.info, &kv1);
mTrace("need to check view ver, lastest bootTs:%" PRId64 ", ver:%" PRIu64, pRspVer->svrBootTs, pRspVer->dynViewVer);
mTrace("need to check view ver, lastest bootTs:%" PRId64 ", ver:%" PRIu64, pRspVer->svrBootTs,
pRspVer->dynViewVer);
}
}
#endif
@ -815,7 +816,8 @@ static int32_t mndRetrieveConns(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBl
* @param rowsToPack at most rows to pack
* @return rows packed
*/
static int32_t packQueriesIntoBlock(SShowObj* pShow, SConnObj* pConn, SSDataBlock* pBlock, uint32_t offset, uint32_t rowsToPack) {
static int32_t packQueriesIntoBlock(SShowObj *pShow, SConnObj *pConn, SSDataBlock *pBlock, uint32_t offset,
uint32_t rowsToPack) {
int32_t cols = 0;
taosRLockLatch(&pConn->queryLock);
int32_t numOfQueries = taosArrayGetSize(pConn->pQueries);
@ -877,14 +879,19 @@ static int32_t packQueriesIntoBlock(SShowObj* pShow, SConnObj* pConn, SSDataBloc
colDataSetVal(pColInfo, curRowIndex, (const char *)&pQuery->subPlanNum, false);
char subStatus[TSDB_SHOW_SUBQUERY_LEN + VARSTR_HEADER_SIZE] = {0};
int64_t reserve = 64;
int32_t strSize = sizeof(subStatus);
int32_t offset = VARSTR_HEADER_SIZE;
for (int32_t i = 0; i < pQuery->subPlanNum && offset < strSize; ++i) {
for (int32_t i = 0; i < pQuery->subPlanNum && offset + reserve < strSize; ++i) {
if (i) {
offset += snprintf(subStatus + offset, strSize - offset - 1, ",");
offset += sprintf(subStatus + offset, ",");
}
if (offset + reserve < strSize) {
SQuerySubDesc *pDesc = taosArrayGet(pQuery->subDesc, i);
offset += snprintf(subStatus + offset, strSize - offset - 1, "%" PRIu64 ":%s", pDesc->tid, pDesc->status);
offset += sprintf(subStatus + offset, "%" PRIu64 ":%s", pDesc->tid, pDesc->status);
} else {
break;
}
}
varDataLen(subStatus) = strlen(&subStatus[VARSTR_HEADER_SIZE]);
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);

View File

@ -251,7 +251,7 @@ int32_t doAddSinkTask(SStreamObj* pStream, SArray* pTaskList, SMnode* pMnode, in
static int32_t addSourceTask(SMnode* pMnode, SVgObj* pVgroup, SArray* pTaskList, SArray* pSinkTaskList,
SStreamObj* pStream, SSubplan* plan, uint64_t uid, SEpSet* pEpset, bool fillHistory,
bool hasExtraSink, int64_t firstWindowSkey, bool hasFillHistory) {
bool hasExtraSink, int64_t nextWindowSkey, bool hasFillHistory) {
SStreamTask* pTask =
tNewStreamTask(uid, TASK_LEVEL__SOURCE, fillHistory, pStream->conf.triggerParam, pTaskList, hasFillHistory);
if (pTask == NULL) {
@ -262,7 +262,7 @@ static int32_t addSourceTask(SMnode* pMnode, SVgObj* pVgroup, SArray* pTaskList,
STimeWindow* pWindow = &pTask->dataRange.window;
pWindow->skey = INT64_MIN;
pWindow->ekey = firstWindowSkey - 1;
pWindow->ekey = nextWindowSkey - 1;
mDebug("add source task 0x%x window:%" PRId64 " - %" PRId64, pTask->id.taskId, pWindow->skey, pWindow->ekey);
// sink or dispatch
@ -382,7 +382,7 @@ static int32_t doAddSourceTask(SArray* pTaskList, bool isFillhistory, int64_t ui
epsetAssign(&(pTask)->info.mnodeEpset, pEpset);
// todo set the correct ts, which should be last key of queried table.
// set the correct ts, which is the last key of queried table.
STimeWindow* pWindow = &pTask->dataRange.window;
pWindow->skey = INT64_MIN;
pWindow->ekey = nextWindowSkey - 1;

View File

@ -62,7 +62,7 @@ static void mndCancelGetNextStreamTask(SMnode *pMnode, void *pIter);
static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq);
static int32_t mndProcessResumeStreamReq(SRpcMsg *pReq);
static int32_t mndBuildStreamCheckpointSourceReq2(void **pBuf, int32_t *pLen, int32_t nodeId, int64_t checkpointId,
int64_t streamId, int32_t taskId);
int64_t streamId, int32_t taskId, int32_t transId);
static int32_t mndProcessNodeCheck(SRpcMsg *pReq);
static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg);
static SArray *extractNodeListFromStream(SMnode *pMnode);
@ -685,6 +685,40 @@ _OVER:
return -1;
}
static int32_t extractNodeEpset(SMnode *pMnode, SEpSet *pEpSet, bool* hasEpset, int32_t taskId, int32_t nodeId) {
*hasEpset = false;
if (nodeId == SNODE_HANDLE) {
SSnodeObj *pObj = NULL;
void *pIter = NULL;
pIter = sdbFetch(pMnode->pSdb, SDB_SNODE, pIter, (void **)&pObj);
if (pIter != NULL) {
addEpIntoEpSet(pEpSet, pObj->pDnode->fqdn, pObj->pDnode->port);
sdbRelease(pMnode->pSdb, pObj);
sdbCancelFetch(pMnode->pSdb, pIter);
*hasEpset = true;
return TSDB_CODE_SUCCESS;
} else {
mError("failed to acquire snode epset");
return TSDB_CODE_INVALID_PARA;
}
} else {
SVgObj *pVgObj = mndAcquireVgroup(pMnode, nodeId);
if (pVgObj != NULL) {
SEpSet epset = mndGetVgroupEpset(pMnode, pVgObj);
mndReleaseVgroup(pMnode, pVgObj);
epsetAssign(pEpSet, &epset);
*hasEpset = true;
return TSDB_CODE_SUCCESS;
} else {
mDebug("orphaned task:0x%x need to be dropped, nodeId:%d, no redo action", taskId, nodeId);
return TSDB_CODE_SUCCESS;
}
}
}
static int32_t mndPersistTaskDropReq(SMnode *pMnode, STrans *pTrans, SStreamTask *pTask) {
SVDropStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVDropStreamTaskReq));
if (pReq == NULL) {
@ -698,28 +732,17 @@ static int32_t mndPersistTaskDropReq(SMnode *pMnode, STrans *pTrans, SStreamTask
STransAction action = {0};
SEpSet epset = {0};
if (pTask->info.nodeId == SNODE_HANDLE) {
SSnodeObj *pObj = NULL;
void *pIter = NULL;
while (1) {
pIter = sdbFetch(pMnode->pSdb, SDB_SNODE, pIter, (void **)&pObj);
if (pIter == NULL) {
break;
bool hasEpset = false;
int32_t code = extractNodeEpset(pMnode, &epset, &hasEpset, pTask->id.taskId, pTask->info.nodeId);
if (code != TSDB_CODE_SUCCESS) {
terrno = code;
return -1;
}
addEpIntoEpSet(&epset, pObj->pDnode->fqdn, pObj->pDnode->port);
sdbRelease(pMnode->pSdb, pObj);
}
} else {
SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->info.nodeId);
if (pVgObj != NULL) {
epset = mndGetVgroupEpset(pMnode, pVgObj);
mndReleaseVgroup(pMnode, pVgObj);
} else {
mDebug("orphaned task:0x%x need to be dropped, nodeId:%d, no redo action", pTask->id.taskId, pTask->info.nodeId);
taosMemoryFree(pReq);
return 0;
}
// no valid epset, return directly without redoAction
if (!hasEpset) {
return TSDB_CODE_SUCCESS;
}
// The epset of nodeId of this task may have been expired now, let's use the newest epset from mnode.
@ -769,7 +792,7 @@ static int32_t checkForNumOfStreams(SMnode *pMnode, SStreamObj *pStreamObj) { /
if (pStream->targetStbUid == pStreamObj->targetStbUid) {
mError("Cannot write the same stable as other stream:%s", pStream->name);
sdbCancelFetch(pMnode->pSdb, pIter);
terrno = TSDB_CODE_MND_TOO_MANY_STREAMS;
terrno = TSDB_CODE_MND_INVALID_TARGET_TABLE;
return terrno;
}
}
@ -978,13 +1001,14 @@ static int32_t mndProcessStreamRemainChkptTmr(SRpcMsg *pReq) {
}
static int32_t mndBuildStreamCheckpointSourceReq2(void **pBuf, int32_t *pLen, int32_t nodeId, int64_t checkpointId,
int64_t streamId, int32_t taskId) {
int64_t streamId, int32_t taskId, int32_t transId) {
SStreamCheckpointSourceReq req = {0};
req.checkpointId = checkpointId;
req.nodeId = nodeId;
req.expireTime = -1;
req.streamId = streamId; // pTask->id.streamId;
req.taskId = taskId; // pTask->id.taskId;
req.transId = transId;
int32_t code;
int32_t blen;
@ -1074,7 +1098,7 @@ static int32_t mndProcessStreamCheckpointTrans(SMnode *pMnode, SStreamObj *pStre
void * buf;
int32_t tlen;
if (mndBuildStreamCheckpointSourceReq2(&buf, &tlen, pTask->info.nodeId, checkpointId, pTask->id.streamId,
pTask->id.taskId) < 0) {
pTask->id.taskId, pTrans->id) < 0) {
mndReleaseVgroup(pMnode, pVgObj);
taosWUnLockLatch(&pStream->lock);
goto _ERR;
@ -1143,7 +1167,7 @@ static int32_t mndAddStreamCheckpointToTrans(STrans *pTrans, SStreamObj *pStream
void * buf;
int32_t tlen;
if (mndBuildStreamCheckpointSourceReq2(&buf, &tlen, pTask->info.nodeId, chkptId, pTask->id.streamId,
pTask->id.taskId) < 0) {
pTask->id.taskId, pTrans->id) < 0) {
mndReleaseVgroup(pMnode, pVgObj);
taosWUnLockLatch(&pStream->lock);
return -1;
@ -1780,9 +1804,20 @@ static int32_t mndPauseStreamTask(SMnode *pMnode, STrans *pTrans, SStreamTask *p
pReq->taskId = pTask->id.taskId;
pReq->streamId = pTask->id.streamId;
SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->info.nodeId);
SEpSet epset = mndGetVgroupEpset(pMnode, pVgObj);
mndReleaseVgroup(pMnode, pVgObj);
SEpSet epset;
bool hasEpset = false;
int32_t code = extractNodeEpset(pMnode, &epset, &hasEpset, pTask->id.taskId, pTask->info.nodeId);
if (code != TSDB_CODE_SUCCESS) {
terrno = code;
taosMemoryFree(pReq);
return -1;
}
// no valid epset, return directly without redoAction
if (!hasEpset) {
taosMemoryFree(pReq);
return TSDB_CODE_SUCCESS;
}
STransAction action = {0};
initTransAction(&action, pReq, sizeof(SVPauseStreamTaskReq), TDMT_STREAM_TASK_PAUSE, &epset, 0);
@ -1924,17 +1959,25 @@ static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq) {
static int32_t mndResumeStreamTask(STrans *pTrans, SMnode *pMnode, SStreamTask *pTask, int8_t igUntreated) {
SVResumeStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVResumeStreamTaskReq));
if (pReq == NULL) {
mError("failed to malloc in resume stream, size:%" PRIzu ", code:%s", sizeof(SVResumeStreamTaskReq),
tstrerror(TSDB_CODE_OUT_OF_MEMORY));
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
pReq->head.vgId = htonl(pTask->info.nodeId);
pReq->taskId = pTask->id.taskId;
pReq->streamId = pTask->id.streamId;
pReq->igUntreated = igUntreated;
SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->info.nodeId);
SEpSet epset = mndGetVgroupEpset(pMnode, pVgObj);
mndReleaseVgroup(pMnode, pVgObj);
SEpSet epset;
bool hasEpset = false;
int32_t code = extractNodeEpset(pMnode, &epset, &hasEpset, pTask->id.taskId, pTask->info.nodeId);
if (code != TSDB_CODE_SUCCESS) {
terrno = code;
taosMemoryFree(pReq);
return -1;
}
STransAction action = {0};
initTransAction(&action, pReq, sizeof(SVResumeStreamTaskReq), TDMT_STREAM_TASK_RESUME, &epset, 0);
@ -2212,6 +2255,8 @@ static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pP
epsetAssign(&updateInfo.newEp, &pCurrent->epset);
taosArrayPush(info.pUpdateNodeList, &updateInfo);
}
// todo handle the snode info
if (pCurrent->nodeId != SNODE_HANDLE) {
SVgObj *pVgroup = mndAcquireVgroup(pMnode, pCurrent->nodeId);
taosHashPut(info.pDBMap, pVgroup->dbName, strlen(pVgroup->dbName), NULL, 0);
@ -2291,12 +2336,28 @@ static SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool *allReady) {
static int32_t mndProcessVgroupChange(SMnode *pMnode, SVgroupChangeInfo *pChangeInfo) {
SSdb *pSdb = pMnode->pSdb;
// check all streams that involved this vnode should update the epset info
SStreamObj *pStream = NULL;
void *pIter = NULL;
STrans *pTrans = NULL;
// conflict check for nodeUpdate trans, here we randomly chose one stream to add into the trans pool
while(1) {
pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream);
if (pIter == NULL) {
break;
}
bool conflict = streamTransConflictOtherTrans(pMnode, pStream->uid, MND_STREAM_TASK_UPDATE_NAME, false);
sdbRelease(pSdb, pStream);
if (conflict) {
mWarn("nodeUpdate trans in progress, current nodeUpdate ignored");
sdbCancelFetch(pSdb, pIter);
return -1;
}
}
while (1) {
pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream);
if (pIter == NULL) {
@ -2311,6 +2372,8 @@ static int32_t mndProcessVgroupChange(SMnode *pMnode, SVgroupChangeInfo *pChange
sdbCancelFetch(pSdb, pIter);
return terrno;
}
mndStreamRegisterTrans(pTrans, MND_STREAM_TASK_RESET_NAME, pStream->uid);
}
void *p = taosHashGet(pChangeInfo->pDBMap, pStream->targetDb, strlen(pStream->targetDb));
@ -2557,7 +2620,7 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) {
code = mndProcessVgroupChange(pMnode, &changeInfo);
// keep the new vnode snapshot
// keep the new vnode snapshot if success
if (code == TSDB_CODE_SUCCESS || code == TSDB_CODE_ACTION_IN_PROGRESS) {
mDebug("create trans successfully, update cached node list");
taosArrayDestroy(execInfo.pNodeList);
@ -2708,9 +2771,18 @@ int32_t createStreamResetStatusTrans(SMnode *pMnode, SStreamObj *pStream) {
pReq->taskId = pTask->id.taskId;
pReq->streamId = pTask->id.streamId;
SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->info.nodeId);
SEpSet epset = mndGetVgroupEpset(pMnode, pVgObj);
mndReleaseVgroup(pMnode, pVgObj);
SEpSet epset;
bool hasEpset = false;
int32_t code = extractNodeEpset(pMnode, &epset, &hasEpset, pTask->id.taskId, pTask->info.nodeId);
if (code != TSDB_CODE_SUCCESS) {
taosMemoryFree(pReq);
continue;
}
if (!hasEpset) {
taosMemoryFree(pReq);
continue;
}
STransAction action = {0};
initTransAction(&action, pReq, sizeof(SVResetStreamTaskReq), TDMT_VND_STREAM_TASK_RESET, &epset, 0);
@ -2770,40 +2842,37 @@ int32_t killActiveCheckpointTrans(SMnode *pMnode, const char *pDBName, size_t le
return TSDB_CODE_SUCCESS;
}
static int32_t mndResetStatusFromCheckpoint(SMnode *pMnode, int32_t transId) {
static int32_t mndResetStatusFromCheckpoint(SMnode *pMnode, int64_t streamId, int32_t transId) {
int32_t code = TSDB_CODE_SUCCESS;
STrans *pTrans = mndAcquireTrans(pMnode, transId);
if (pTrans != NULL) {
mInfo("kill checkpoint transId:%d to reset task status", transId);
mndKillTrans(pMnode, pTrans);
mndReleaseTrans(pMnode, pTrans);
} else {
mError("failed to acquire checkpoint trans:%d", transId);
}
// set all tasks status to be normal, refactor later to be stream level, instead of vnode level.
SSdb * pSdb = pMnode->pSdb;
SStreamObj *pStream = NULL;
void * pIter = NULL;
while (1) {
pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream);
if (pIter == NULL) {
break;
}
SStreamObj *pStream = mndGetStreamObj(pMnode, streamId);
if (pStream == NULL) {
code = TSDB_CODE_STREAM_TASK_NOT_EXIST;
mError("failed to acquire the streamObj:0x%" PRIx64 " to reset checkpoint, may have been dropped", pStream->uid);
} else {
bool conflict = streamTransConflictOtherTrans(pMnode, pStream->uid, MND_STREAM_TASK_RESET_NAME, false);
if (conflict) {
mError("stream:%s other trans exists in DB:%s & %s failed to start reset-status trans", pStream->name,
pStream->sourceDb, pStream->targetDb);
continue;
mError("stream:%s other trans exists in DB:%s, dstTable:%s failed to start reset-status trans", pStream->name,
pStream->sourceDb, pStream->targetSTbName);
} else {
mDebug("stream:%s (0x%" PRIx64 ") reset checkpoint procedure, transId:%d, create reset trans", pStream->name,
pStream->uid, transId);
code = createStreamResetStatusTrans(pMnode, pStream);
mndReleaseStream(pMnode, pStream);
}
}
mDebug("stream:%s (0x%" PRIx64 ") reset checkpoint procedure, create reset trans", pStream->name, pStream->uid);
int32_t code = createStreamResetStatusTrans(pMnode, pStream);
if (code != TSDB_CODE_SUCCESS) {
sdbCancelFetch(pSdb, pIter);
return code;
}
}
return 0;
}
static SStreamTask *mndGetStreamTask(STaskId *pId, SStreamObj *pStream) {
for (int32_t i = 0; i < taosArrayGetSize(pStream->tasks); i++) {
@ -2821,38 +2890,38 @@ static SStreamTask *mndGetStreamTask(STaskId *pId, SStreamObj *pStream) {
return NULL;
}
static bool needDropRelatedFillhistoryTask(STaskStatusEntry *pTaskEntry, SStreamExecInfo *pExecNode) {
if (pTaskEntry->status == TASK_STATUS__STREAM_SCAN_HISTORY && pTaskEntry->statusLastDuration >= 10) {
if (!pTaskEntry->inputQChanging && pTaskEntry->inputQUnchangeCounter > 10) {
int32_t numOfReady = 0;
int32_t numOfTotal = 0;
for (int32_t k = 0; k < taosArrayGetSize(pExecNode->pTaskList); ++k) {
STaskId *pId = taosArrayGet(pExecNode->pTaskList, k);
if (pTaskEntry->id.streamId == pId->streamId) {
numOfTotal++;
if (pTaskEntry->id.taskId != pId->taskId) {
STaskStatusEntry *pEntry = taosHashGet(execInfo.pTaskMap, pId, sizeof(*pId));
if (pEntry->status == TASK_STATUS__READY) {
numOfReady++;
}
}
}
}
if (numOfReady > 0) {
mDebug("stream:0x%" PRIx64
" %d tasks are ready, %d tasks in stream-scan-history for more than 50s, drop related fill-history task",
pTaskEntry->id.streamId, numOfReady, numOfTotal - numOfReady);
return true;
} else {
return false;
}
}
}
return false;
}
//static bool needDropRelatedFillhistoryTask(STaskStatusEntry *pTaskEntry, SStreamExecInfo *pExecNode) {
// if (pTaskEntry->status == TASK_STATUS__STREAM_SCAN_HISTORY && pTaskEntry->statusLastDuration >= 10) {
// if (!pTaskEntry->inputQChanging && pTaskEntry->inputQUnchangeCounter > 10) {
// int32_t numOfReady = 0;
// int32_t numOfTotal = 0;
// for (int32_t k = 0; k < taosArrayGetSize(pExecNode->pTaskList); ++k) {
// STaskId *pId = taosArrayGet(pExecNode->pTaskList, k);
// if (pTaskEntry->id.streamId == pId->streamId) {
// numOfTotal++;
//
// if (pTaskEntry->id.taskId != pId->taskId) {
// STaskStatusEntry *pEntry = taosHashGet(execInfo.pTaskMap, pId, sizeof(*pId));
// if (pEntry->status == TASK_STATUS__READY) {
// numOfReady++;
// }
// }
// }
// }
//
// if (numOfReady > 0) {
// mDebug("stream:0x%" PRIx64
// " %d tasks are ready, %d tasks in stream-scan-history for more than 50s, drop related fill-history task",
// pTaskEntry->id.streamId, numOfReady, numOfTotal - numOfReady);
// return true;
// } else {
// return false;
// }
// }
// }
//
// return false;
//}
// currently only handle the sink task
// 1. sink task, drop related fill-history task msg is missing
@ -2882,11 +2951,17 @@ static int32_t mndDropRelatedFillhistoryTask(SMnode *pMnode, STaskStatusEntry *p
mDebug("build and send drop related fill-history task for task:0x%x", pTask->id.taskId);
SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->info.nodeId);
SEpSet epset = mndGetVgroupEpset(pMnode, pVgObj);
mndReleaseVgroup(pMnode, pVgObj);
SEpSet epset;
bool hasEpset = false;
int32_t code = extractNodeEpset(pMnode, &epset, &hasEpset, pTask->id.taskId, pTask->info.nodeId);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
if (hasEpset) {
tmsgSendReq(&epset, &msg);
}
return TSDB_CODE_SUCCESS;
}
@ -2934,6 +3009,8 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) {
bool checkpointFailed = false;
int64_t activeCheckpointId = 0;
int64_t streamId = 0;
int32_t transId = 0;
SDecoder decoder = {0};
tDecoderInit(&decoder, pReq->pCont, pReq->contLen);
@ -2976,7 +3053,9 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) {
if (pTaskEntry->stage != p->stage && pTaskEntry->stage != -1) {
updateStageInfo(pTaskEntry, p->stage);
if (pTaskEntry->nodeId == SNODE_HANDLE) snodeChanged = true;
if (pTaskEntry->nodeId == SNODE_HANDLE) {
snodeChanged = true;
}
} else {
// task is idle for more than 50 sec.
if (fabs(pTaskEntry->inputQUsed - p->inputQUsed) <= DBL_EPSILON) {
@ -3000,6 +3079,8 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) {
if (p->checkpointFailed) {
checkpointFailed = p->checkpointFailed;
streamId = p->id.streamId;
transId = p->chkpointTransId;
}
}
}
@ -3014,18 +3095,18 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) {
if (p->status != TASK_STATUS__READY) {
mDebug("received s-task:0x%" PRIx64 " not in ready status:%s", p->id.taskId, streamTaskGetStatusStr(p->status));
if (p->status == TASK_STATUS__STREAM_SCAN_HISTORY) {
bool drop = needDropRelatedFillhistoryTask(pTaskEntry, &execInfo);
if (drop) {
SStreamObj *pStreamObj = mndGetStreamObj(pMnode, pTaskEntry->id.streamId);
if (pStreamObj == NULL) {
mError("failed to acquire the streamObj:0x%" PRIx64 " it may have been dropped", pStreamObj->uid);
} else {
mndDropRelatedFillhistoryTask(pMnode, pTaskEntry, pStreamObj);
mndReleaseStream(pMnode, pStreamObj);
}
}
}
// if (p->status == TASK_STATUS__STREAM_SCAN_HISTORY) {
// bool drop = needDropRelatedFillhistoryTask(pTaskEntry, &execInfo);
// if (drop) {
// SStreamObj *pStreamObj = mndGetStreamObj(pMnode, pTaskEntry->id.streamId);
// if (pStreamObj == NULL) {
// mError("failed to acquire the streamObj:0x%" PRIx64 " it may have been dropped", pStreamObj->uid);
// } else {
// mndDropRelatedFillhistoryTask(pMnode, pTaskEntry, pStreamObj);
// mndReleaseStream(pMnode, pStreamObj);
// }
// }
// }
}
}
@ -3038,9 +3119,8 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) {
if (allReady || snodeChanged) {
// if the execInfo.activeCheckpoint == 0, the checkpoint is restoring from wal
mInfo("checkpointId:%" PRId64 " failed, issue task-reset trans to reset all tasks status",
execInfo.activeCheckpoint);
mndResetStatusFromCheckpoint(pMnode, activeCheckpointId);
mInfo("checkpointId:%" PRId64 " failed, issue task-reset trans to reset all tasks status", activeCheckpointId);
mndResetStatusFromCheckpoint(pMnode, streamId, transId);
} else {
mInfo("not all vgroups are ready, wait for next HB from stream tasks");
}

View File

@ -90,16 +90,20 @@ bool streamTransConflictOtherTrans(SMnode* pMnode, int64_t streamUid, const char
if (strcmp(tInfo.name, MND_STREAM_CHECKPOINT_NAME) == 0) {
if (strcmp(pTransName, MND_STREAM_DROP_NAME) != 0) {
mWarn("conflict with other transId:%d streamUid:%" PRIx64 ", trans:%s", tInfo.transId, tInfo.streamUid,
tInfo.name);
return true;
}
} else if ((strcmp(tInfo.name, MND_STREAM_CREATE_NAME) == 0) ||
(strcmp(tInfo.name, MND_STREAM_DROP_NAME) == 0)) {
mWarn("conflict with other transId:%d streamUid:%" PRIx64 ", trans:%s", tInfo.transId, tInfo.streamUid,
mWarn("conflict with other transId:%d streamUid:0x%" PRIx64 ", trans:%s", tInfo.transId, tInfo.streamUid,
tInfo.name);
return true;
} else {
mDebug("not conflict with checkpoint trans, name:%s, continue create trans", pTransName);
}
} else if ((strcmp(tInfo.name, MND_STREAM_CREATE_NAME) == 0) || (strcmp(tInfo.name, MND_STREAM_DROP_NAME) == 0) ||
(strcmp(tInfo.name, MND_STREAM_TASK_RESET_NAME) == 0)) {
mWarn("conflict with other transId:%d streamUid:0x%" PRIx64 ", trans:%s", tInfo.transId, tInfo.streamUid,
tInfo.name);
return true;
}
} else {
mDebug("stream:0x%"PRIx64" no conflict trans existed, continue create trans", streamUid);
}
if (lock) {

View File

@ -27,7 +27,10 @@
#define MND_SUBSCRIBE_VER_NUMBER 2
#define MND_SUBSCRIBE_RESERVE_SIZE 64
#define MND_SUBSCRIBE_REBALANCE_CNT 3
#define MND_CONSUMER_LOST_HB_CNT 6
#define MND_CONSUMER_LOST_CLEAR_THRESHOLD 43200
static int32_t mqRebInExecCnt = 0;
static SSdbRaw *mndSubActionEncode(SMqSubscribeObj *);
static SSdbRow *mndSubActionDecode(SSdbRaw *pRaw);
@ -38,14 +41,7 @@ static int32_t mndProcessRebalanceReq(SRpcMsg *pMsg);
static int32_t mndProcessDropCgroupReq(SRpcMsg *pMsg);
static int32_t mndRetrieveSubscribe(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows);
static void mndCancelGetNextSubscribe(SMnode *pMnode, void *pIter);
static int32_t mndSetSubRedoLogs(SMnode *pMnode, STrans *pTrans, SMqSubscribeObj *pSub) {
SSdbRaw *pRedoRaw = mndSubActionEncode(pSub);
if (pRedoRaw == NULL) return -1;
if (mndTransAppendRedolog(pTrans, pRedoRaw) != 0) return -1;
if (sdbSetRawStatus(pRedoRaw, SDB_STATUS_READY) != 0) return -1;
return 0;
}
static int32_t mndCheckConsumer(SRpcMsg *pMsg, SHashObj* hash);
static int32_t mndSetSubCommitLogs(SMnode *pMnode, STrans *pTrans, SMqSubscribeObj *pSub) {
SSdbRaw *pCommitRaw = mndSubActionEncode(pSub);
@ -68,7 +64,7 @@ int32_t mndInitSubscribe(SMnode *pMnode) {
mndSetMsgHandle(pMnode, TDMT_VND_TMQ_SUBSCRIBE_RSP, mndTransProcessRsp);
mndSetMsgHandle(pMnode, TDMT_VND_TMQ_DELETE_SUB_RSP, mndTransProcessRsp);
mndSetMsgHandle(pMnode, TDMT_MND_TMQ_DO_REBALANCE, mndProcessRebalanceReq);
mndSetMsgHandle(pMnode, TDMT_MND_TMQ_TIMER, mndProcessRebalanceReq);
mndSetMsgHandle(pMnode, TDMT_MND_TMQ_DROP_CGROUP, mndProcessDropCgroupReq);
mndSetMsgHandle(pMnode, TDMT_MND_TMQ_DROP_CGROUP_RSP, mndTransProcessRsp);
@ -213,16 +209,18 @@ static int32_t mndSplitSubscribeKey(const char *key, char *topic, char *cgroup,
}
static SMqRebInfo *mndGetOrCreateRebSub(SHashObj *pHash, const char *key) {
SMqRebInfo *pRebSub = taosHashGet(pHash, key, strlen(key) + 1);
if (pRebSub == NULL) {
pRebSub = tNewSMqRebSubscribe(key);
if (pRebSub == NULL) {
SMqRebInfo *pRebInfo = taosHashGet(pHash, key, strlen(key) + 1);
if (pRebInfo == NULL) {
pRebInfo = tNewSMqRebSubscribe(key);
if (pRebInfo == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return NULL;
}
taosHashPut(pHash, key, strlen(key) + 1, pRebSub, sizeof(SMqRebInfo));
taosHashPut(pHash, key, strlen(key) + 1, pRebInfo, sizeof(SMqRebInfo));
taosMemoryFree(pRebInfo);
pRebInfo = taosHashGet(pHash, key, strlen(key) + 1);
}
return pRebSub;
return pRebInfo;
}
static void doRemoveLostConsumers(SMqRebOutputObj *pOutput, SHashObj *pHash, const SMqRebInputObj *pInput) {
@ -727,17 +725,156 @@ static int32_t mndPersistRebResult(SMnode *pMnode, SRpcMsg *pMsg, const SMqRebOu
return 0;
}
static int32_t mndProcessRebalanceReq(SRpcMsg *pMsg) {
SMnode *pMnode = pMsg->info.node;
SMqDoRebalanceMsg *pReq = pMsg->pCont;
void *pIter = NULL;
// bool rebalanceOnce = false; // to ensure only once.
static void freeRebalanceItem(void *param) {
SMqRebInfo *pInfo = param;
taosArrayDestroy(pInfo->newConsumers);
taosArrayDestroy(pInfo->removedConsumers);
}
mInfo("mq re-balance start, total required re-balanced trans:%d", taosHashGetSize(pReq->rebSubHash));
static int32_t mndCheckConsumer(SRpcMsg *pMsg, SHashObj* rebSubHash) {
SMnode *pMnode = pMsg->info.node;
SSdb *pSdb = pMnode->pSdb;
SMqConsumerObj *pConsumer;
void *pIter = NULL;
// iterate all consumers, find all modification
while (1) {
pIter = sdbFetch(pSdb, SDB_CONSUMER, pIter, (void **)&pConsumer);
if (pIter == NULL) {
break;
}
int32_t hbStatus = atomic_add_fetch_32(&pConsumer->hbStatus, 1);
int32_t status = atomic_load_32(&pConsumer->status);
mInfo("check for consumer:0x%" PRIx64 " status:%d(%s), sub-time:%" PRId64 ", createTime:%" PRId64 ", hbstatus:%d",
pConsumer->consumerId, status, mndConsumerStatusName(status), pConsumer->subscribeTime, pConsumer->createTime,
hbStatus);
if (status == MQ_CONSUMER_STATUS_READY) {
if (taosArrayGetSize(pConsumer->assignedTopics) == 0) { // unsubscribe or close
mndDropConsumerFromSdb(pMnode, pConsumer->consumerId, &pMsg->info);
} else if (hbStatus > MND_CONSUMER_LOST_HB_CNT) {
taosRLockLatch(&pConsumer->lock);
int32_t topicNum = taosArrayGetSize(pConsumer->currentTopics);
for (int32_t i = 0; i < topicNum; i++) {
char key[TSDB_SUBSCRIBE_KEY_LEN];
char *removedTopic = taosArrayGetP(pConsumer->currentTopics, i);
mndMakeSubscribeKey(key, pConsumer->cgroup, removedTopic);
SMqRebInfo *pRebSub = mndGetOrCreateRebSub(rebSubHash, key);
taosArrayPush(pRebSub->removedConsumers, &pConsumer->consumerId);
}
taosRUnLockLatch(&pConsumer->lock);
}else{
int32_t newTopicNum = taosArrayGetSize(pConsumer->currentTopics);
for (int32_t i = 0; i < newTopicNum; i++) {
char * topic = taosArrayGetP(pConsumer->currentTopics, i);
SMqSubscribeObj *pSub = mndAcquireSubscribe(pMnode, pConsumer->cgroup, topic);
if (pSub == NULL) {
continue;
}
taosRLockLatch(&pSub->lock);
// 2.2 iterate all vg assigned to the consumer of that topic
SMqConsumerEp *pConsumerEp = taosHashGet(pSub->consumerHash, &pConsumer->consumerId, sizeof(int64_t));
int32_t vgNum = taosArrayGetSize(pConsumerEp->vgs);
for (int32_t j = 0; j < vgNum; j++) {
SMqVgEp *pVgEp = taosArrayGetP(pConsumerEp->vgs, j);
SVgObj * pVgroup = mndAcquireVgroup(pMnode, pVgEp->vgId);
if (!pVgroup) {
char key[TSDB_SUBSCRIBE_KEY_LEN];
mndMakeSubscribeKey(key, pConsumer->cgroup, topic);
mndGetOrCreateRebSub(rebSubHash, key);
mInfo("vnode splitted, vgId:%d rebalance will be triggered", pVgEp->vgId);
}
mndReleaseVgroup(pMnode, pVgroup);
}
taosRUnLockLatch(&pSub->lock);
mndReleaseSubscribe(pMnode, pSub);
}
}
} else if (status == MQ_CONSUMER_STATUS_LOST) {
if (hbStatus > MND_CONSUMER_LOST_CLEAR_THRESHOLD) { // clear consumer if lost a day
mndDropConsumerFromSdb(pMnode, pConsumer->consumerId, &pMsg->info);
}
} else {
taosRLockLatch(&pConsumer->lock);
int32_t newTopicNum = taosArrayGetSize(pConsumer->rebNewTopics);
for (int32_t i = 0; i < newTopicNum; i++) {
char key[TSDB_SUBSCRIBE_KEY_LEN];
char *newTopic = taosArrayGetP(pConsumer->rebNewTopics, i);
mndMakeSubscribeKey(key, pConsumer->cgroup, newTopic);
SMqRebInfo *pRebSub = mndGetOrCreateRebSub(rebSubHash, key);
taosArrayPush(pRebSub->newConsumers, &pConsumer->consumerId);
}
int32_t removedTopicNum = taosArrayGetSize(pConsumer->rebRemovedTopics);
for (int32_t i = 0; i < removedTopicNum; i++) {
char key[TSDB_SUBSCRIBE_KEY_LEN];
char *removedTopic = taosArrayGetP(pConsumer->rebRemovedTopics, i);
mndMakeSubscribeKey(key, pConsumer->cgroup, removedTopic);
SMqRebInfo *pRebSub = mndGetOrCreateRebSub(rebSubHash, key);
taosArrayPush(pRebSub->removedConsumers, &pConsumer->consumerId);
}
if (newTopicNum == 0 && removedTopicNum == 0 && taosArrayGetSize(pConsumer->assignedTopics) == 0) { // unsubscribe or close
mndDropConsumerFromSdb(pMnode, pConsumer->consumerId, &pMsg->info);
}
taosRUnLockLatch(&pConsumer->lock);
}
mndReleaseConsumer(pMnode, pConsumer);
}
return 0;
}
bool mndRebTryStart() {
int32_t old = atomic_val_compare_exchange_32(&mqRebInExecCnt, 0, 1);
mInfo("rebalance counter old val:%d", old);
return old == 0;
}
void mndRebCntInc() {
int32_t val = atomic_add_fetch_32(&mqRebInExecCnt, 1);
mInfo("rebalance cnt inc, value:%d", val);
}
void mndRebCntDec() {
int32_t val = atomic_sub_fetch_32(&mqRebInExecCnt, 1);
mInfo("rebalance cnt sub, value:%d", val);
}
static int32_t mndProcessRebalanceReq(SRpcMsg *pMsg) {
int code = 0;
mInfo("start to process mq timer");
if (!mndRebTryStart()) {
mInfo("mq rebalance already in progress, do nothing");
return code;
}
SHashObj *rebSubHash = taosHashInit(64, MurmurHash3_32, true, HASH_NO_LOCK);
if (rebSubHash == NULL) {
mError("failed to create rebalance hashmap");
terrno = TSDB_CODE_OUT_OF_MEMORY;
code = -1;
goto END;
}
taosHashSetFreeFp(rebSubHash, freeRebalanceItem);
mndCheckConsumer(pMsg, rebSubHash);
mInfo("mq re-balance start, total required re-balanced trans:%d", taosHashGetSize(rebSubHash));
// here we only handle one topic rebalance requirement to ensure the atomic execution of this transaction.
void *pIter = NULL;
SMnode *pMnode = pMsg->info.node;
while (1) {
pIter = taosHashIterate(pReq->rebSubHash, pIter);
pIter = taosHashIterate(rebSubHash, pIter);
if (pIter == NULL) {
break;
}
@ -756,12 +893,11 @@ static int32_t mndProcessRebalanceReq(SRpcMsg *pMsg) {
taosArrayDestroy(rebOutput.modifyConsumers);
taosArrayDestroy(rebOutput.rebVgs);
taosHashCancelIterate(pReq->rebSubHash, pIter);
taosHashCancelIterate(rebSubHash, pIter);
terrno = TSDB_CODE_OUT_OF_MEMORY;
mInfo("mq re-balance failed, due to out of memory");
taosHashCleanup(pReq->rebSubHash);
mndRebEnd();
return -1;
mError("mq re-balance failed, due to out of memory");
code = -1;
goto END;
}
SMqRebInfo *pRebInfo = (SMqRebInfo *)pIter;
@ -829,10 +965,12 @@ static int32_t mndProcessRebalanceReq(SRpcMsg *pMsg) {
// reset flag
mInfo("mq re-balance completed successfully");
taosHashCleanup(pReq->rebSubHash);
mndRebEnd();
return 0;
END:
taosHashCleanup(rebSubHash);
mndRebCntDec();
return code;
}
static int32_t sendDeleteSubToVnode(SMqSubscribeObj *pSub, STrans *pTrans){
@ -1215,7 +1353,7 @@ int32_t mndDropSubByTopic(SMnode *pMnode, STrans *pTrans, const char *topicName)
return -1;
}
if (mndSetDropSubRedoLogs(pMnode, pTrans, pSub) < 0) {
if (mndSetDropSubCommitLogs(pMnode, pTrans, pSub) < 0) {
sdbRelease(pSdb, pSub);
sdbCancelFetch(pSdb, pIter);
return -1;

View File

@ -180,7 +180,7 @@ int32_t mndProcessWriteMsg(SMnode *pMnode, SRpcMsg *pMsg, SFsmCbMeta *pMeta) {
goto _OUT;
}
mInfo("trans:%d, is proposed, saved:%d code:0x%x, apply index:%" PRId64 " term:%" PRIu64 " config:%" PRId64
mInfo("trans:%d, process sync proposal, saved:%d code:0x%x, apply index:%" PRId64 " term:%" PRIu64 " config:%" PRId64
" role:%s raw:%p sec:%d seq:%" PRId64,
transId, pMgmt->transId, pMeta->code, pMeta->index, pMeta->term, pMeta->lastConfigIndex, syncStr(pMeta->state),
pRaw, pMgmt->transSec, pMgmt->transSeq);
@ -208,15 +208,11 @@ int32_t mndProcessWriteMsg(SMnode *pMnode, SRpcMsg *pMsg, SFsmCbMeta *pMeta) {
}
if (pTrans->stage == TRN_STAGE_PREPARE) {
bool continueExec = mndTransPerformPrepareStage(pMnode, pTrans);
bool continueExec = mndTransPerformPrepareStage(pMnode, pTrans, false);
if (!continueExec) goto _OUT;
}
if (pTrans->id != pMgmt->transId) {
mInfo("trans:%d, execute in mnode which not leader or sync timeout, createTime:%" PRId64 " saved trans:%d",
pTrans->id, pTrans->createdTime, pMgmt->transId);
mndTransRefresh(pMnode, pTrans);
}
sdbSetApplyInfo(pMnode->pSdb, pMeta->index, pMeta->term, pMeta->lastConfigIndex);
sdbWriteFile(pMnode->pSdb, tsMndSdbWriteDelta);
@ -234,6 +230,7 @@ static int32_t mndPostMgmtCode(SMnode *pMnode, int32_t code) {
goto _OUT;
}
int32_t transId = pMgmt->transId;
pMgmt->transId = 0;
pMgmt->transSec = 0;
pMgmt->transSeq = 0;
@ -241,9 +238,9 @@ static int32_t mndPostMgmtCode(SMnode *pMnode, int32_t code) {
tsem_post(&pMgmt->syncSem);
if (pMgmt->errCode != 0) {
mError("trans:%d, failed to propose since %s, post sem", pMgmt->transId, tstrerror(pMgmt->errCode));
mError("trans:%d, failed to propose since %s, post sem", transId, tstrerror(pMgmt->errCode));
} else {
mInfo("trans:%d, is proposed and post sem, seq:%" PRId64, pMgmt->transId, pMgmt->transSeq);
mInfo("trans:%d, is proposed and post sem, seq:%" PRId64, transId, pMgmt->transSeq);
}
_OUT:
@ -542,7 +539,7 @@ int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw, int32_t transId) {
taosThreadMutexLock(&pMgmt->lock);
pMgmt->errCode = 0;
if (pMgmt->transId != 0 /* && pMgmt->transId != transId*/) {
if (pMgmt->transId != 0) {
mError("trans:%d, can't be proposed since trans:%d already waiting for confirm", transId, pMgmt->transId);
taosThreadMutexUnlock(&pMgmt->lock);
rpcFreeCont(req.pCont);

View File

@ -15,7 +15,7 @@
#define _DEFAULT_SOURCE
#include "mndTrans.h"
#include "mndConsumer.h"
#include "mndSubscribe.h"
#include "mndDb.h"
#include "mndPrivilege.h"
#include "mndShow.h"
@ -36,21 +36,25 @@ static int32_t mndTransAppendLog(SArray *pArray, SSdbRaw *pRaw);
static int32_t mndTransAppendAction(SArray *pArray, STransAction *pAction);
static void mndTransDropLogs(SArray *pArray);
static void mndTransDropActions(SArray *pArray);
static int32_t mndTransExecuteActions(SMnode *pMnode, STrans *pTrans, SArray *pArray);
static int32_t mndTransExecuteRedoLogs(SMnode *pMnode, STrans *pTrans);
static int32_t mndTransExecuteUndoLogs(SMnode *pMnode, STrans *pTrans);
static int32_t mndTransExecuteRedoActions(SMnode *pMnode, STrans *pTrans);
static int32_t mndTransExecuteUndoActions(SMnode *pMnode, STrans *pTrans);
static int32_t mndTransExecuteCommitActions(SMnode *pMnode, STrans *pTrans);
static bool mndTransPerformRedoLogStage(SMnode *pMnode, STrans *pTrans);
static bool mndTransPerformRedoActionStage(SMnode *pMnode, STrans *pTrans);
static bool mndTransPerformUndoLogStage(SMnode *pMnode, STrans *pTrans);
static bool mndTransPerformUndoActionStage(SMnode *pMnode, STrans *pTrans);
static bool mndTransPerformCommitActionStage(SMnode *pMnode, STrans *pTrans);
static bool mndTransPerformCommitStage(SMnode *pMnode, STrans *pTrans);
static bool mndTransPerformRollbackStage(SMnode *pMnode, STrans *pTrans);
static bool mndTransPerformFinishStage(SMnode *pMnode, STrans *pTrans);
static bool mndCannotExecuteTransAction(SMnode *pMnode) { return !pMnode->deploy && !mndIsLeader(pMnode); }
static int32_t mndTransExecuteActions(SMnode *pMnode, STrans *pTrans, SArray *pArray, bool topHalf);
static int32_t mndTransExecuteRedoLogs(SMnode *pMnode, STrans *pTrans, bool topHalf);
static int32_t mndTransExecuteUndoLogs(SMnode *pMnode, STrans *pTrans, bool topHalf);
static int32_t mndTransExecuteRedoActions(SMnode *pMnode, STrans *pTrans, bool topHalf);
static int32_t mndTransExecuteUndoActions(SMnode *pMnode, STrans *pTrans, bool topHalf);
static int32_t mndTransExecuteCommitActions(SMnode *pMnode, STrans *pTrans, bool topHalf);
static bool mndTransPerformRedoLogStage(SMnode *pMnode, STrans *pTrans, bool topHalf);
static bool mndTransPerformRedoActionStage(SMnode *pMnode, STrans *pTrans, bool topHalf);
static bool mndTransPerformUndoLogStage(SMnode *pMnode, STrans *pTrans, bool topHalf);
static bool mndTransPerformUndoActionStage(SMnode *pMnode, STrans *pTrans, bool topHalf);
static bool mndTransPerformCommitActionStage(SMnode *pMnode, STrans *pTrans, bool topHalf);
static bool mndTransPerformCommitStage(SMnode *pMnode, STrans *pTrans, bool topHalf);
static bool mndTransPerformRollbackStage(SMnode *pMnode, STrans *pTrans, bool topHalf);
static bool mndTransPerformFinishStage(SMnode *pMnode, STrans *pTrans, bool topHalf);
static bool mndCannotExecuteTransAction(SMnode *pMnode, bool topHalf) {
return (!pMnode->deploy && !mndIsLeader(pMnode)) || !topHalf;
}
static void mndTransSendRpcRsp(SMnode *pMnode, STrans *pTrans);
static int32_t mndProcessTransTimer(SRpcMsg *pReq);
@ -1090,8 +1094,9 @@ static void mndTransResetActions(SMnode *pMnode, STrans *pTrans, SArray *pArray)
}
}
static int32_t mndTransWriteSingleLog(SMnode *pMnode, STrans *pTrans, STransAction *pAction) {
static int32_t mndTransWriteSingleLog(SMnode *pMnode, STrans *pTrans, STransAction *pAction, bool topHalf) {
if (pAction->rawWritten) return 0;
if (topHalf) return TSDB_CODE_MND_TRANS_CTX_SWITCH;
int32_t code = sdbWriteWithoutFree(pMnode->pSdb, pAction->pRaw);
if (code == 0 || terrno == TSDB_CODE_SDB_OBJ_NOT_THERE) {
@ -1112,9 +1117,12 @@ static int32_t mndTransWriteSingleLog(SMnode *pMnode, STrans *pTrans, STransActi
return code;
}
static int32_t mndTransSendSingleMsg(SMnode *pMnode, STrans *pTrans, STransAction *pAction) {
static int32_t mndTransSendSingleMsg(SMnode *pMnode, STrans *pTrans, STransAction *pAction, bool topHalf) {
if (pAction->msgSent) return 0;
if (mndCannotExecuteTransAction(pMnode)) return -1;
if (mndCannotExecuteTransAction(pMnode, topHalf)) {
terrno = TSDB_CODE_MND_TRANS_CTX_SWITCH;
return TSDB_CODE_MND_TRANS_CTX_SWITCH;
}
int64_t signature = pTrans->id;
signature = (signature << 32);
@ -1159,7 +1167,8 @@ static int32_t mndTransSendSingleMsg(SMnode *pMnode, STrans *pTrans, STransActio
return code;
}
static int32_t mndTransExecNullMsg(SMnode *pMnode, STrans *pTrans, STransAction *pAction) {
static int32_t mndTransExecNullMsg(SMnode *pMnode, STrans *pTrans, STransAction *pAction, bool topHalf) {
if (!topHalf) return TSDB_CODE_MND_TRANS_CTX_SWITCH;
pAction->rawWritten = 0;
pAction->errCode = 0;
mInfo("trans:%d, %s:%d confirm action executed", pTrans->id, mndTransStr(pAction->stage), pAction->id);
@ -1168,34 +1177,39 @@ static int32_t mndTransExecNullMsg(SMnode *pMnode, STrans *pTrans, STransAction
return 0;
}
static int32_t mndTransExecSingleAction(SMnode *pMnode, STrans *pTrans, STransAction *pAction) {
static int32_t mndTransExecSingleAction(SMnode *pMnode, STrans *pTrans, STransAction *pAction, bool topHalf) {
if (pAction->actionType == TRANS_ACTION_RAW) {
return mndTransWriteSingleLog(pMnode, pTrans, pAction);
return mndTransWriteSingleLog(pMnode, pTrans, pAction, topHalf);
} else if (pAction->actionType == TRANS_ACTION_MSG) {
return mndTransSendSingleMsg(pMnode, pTrans, pAction);
return mndTransSendSingleMsg(pMnode, pTrans, pAction, topHalf);
} else {
return mndTransExecNullMsg(pMnode, pTrans, pAction);
return mndTransExecNullMsg(pMnode, pTrans, pAction, topHalf);
}
}
static int32_t mndTransExecSingleActions(SMnode *pMnode, STrans *pTrans, SArray *pArray) {
static int32_t mndTransExecSingleActions(SMnode *pMnode, STrans *pTrans, SArray *pArray, bool topHalf) {
int32_t numOfActions = taosArrayGetSize(pArray);
int32_t code = 0;
for (int32_t action = 0; action < numOfActions; ++action) {
STransAction *pAction = taosArrayGet(pArray, action);
code = mndTransExecSingleAction(pMnode, pTrans, pAction);
if (code != 0) break;
code = mndTransExecSingleAction(pMnode, pTrans, pAction, topHalf);
if (code != 0) {
mInfo("trans:%d, action:%d not executed since %s. numOfActions:%d", pTrans->id, action, tstrerror(code),
numOfActions);
break;
}
}
return code;
}
static int32_t mndTransExecuteActions(SMnode *pMnode, STrans *pTrans, SArray *pArray) {
static int32_t mndTransExecuteActions(SMnode *pMnode, STrans *pTrans, SArray *pArray, bool topHalf) {
int32_t numOfActions = taosArrayGetSize(pArray);
int32_t code = 0;
if (numOfActions == 0) return 0;
if (mndTransExecSingleActions(pMnode, pTrans, pArray) != 0) {
if ((code = mndTransExecSingleActions(pMnode, pTrans, pArray, topHalf)) != 0) {
return -1;
}
@ -1248,31 +1262,31 @@ static int32_t mndTransExecuteActions(SMnode *pMnode, STrans *pTrans, SArray *pA
}
}
static int32_t mndTransExecuteRedoActions(SMnode *pMnode, STrans *pTrans) {
int32_t code = mndTransExecuteActions(pMnode, pTrans, pTrans->redoActions);
static int32_t mndTransExecuteRedoActions(SMnode *pMnode, STrans *pTrans, bool topHalf) {
int32_t code = mndTransExecuteActions(pMnode, pTrans, pTrans->redoActions, topHalf);
if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) {
mError("failed to execute redoActions since:%s, code:0x%x", terrstr(), terrno);
}
return code;
}
static int32_t mndTransExecuteUndoActions(SMnode *pMnode, STrans *pTrans) {
int32_t code = mndTransExecuteActions(pMnode, pTrans, pTrans->undoActions);
static int32_t mndTransExecuteUndoActions(SMnode *pMnode, STrans *pTrans, bool topHalf) {
int32_t code = mndTransExecuteActions(pMnode, pTrans, pTrans->undoActions, topHalf);
if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) {
mError("failed to execute undoActions since %s", terrstr());
}
return code;
}
static int32_t mndTransExecuteCommitActions(SMnode *pMnode, STrans *pTrans) {
int32_t code = mndTransExecuteActions(pMnode, pTrans, pTrans->commitActions);
static int32_t mndTransExecuteCommitActions(SMnode *pMnode, STrans *pTrans, bool topHalf) {
int32_t code = mndTransExecuteActions(pMnode, pTrans, pTrans->commitActions, topHalf);
if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) {
mError("failed to execute commitActions since %s", terrstr());
}
return code;
}
static int32_t mndTransExecuteRedoActionsSerial(SMnode *pMnode, STrans *pTrans) {
static int32_t mndTransExecuteRedoActionsSerial(SMnode *pMnode, STrans *pTrans, bool topHalf) {
int32_t code = 0;
int32_t numOfActions = taosArrayGetSize(pTrans->redoActions);
if (numOfActions == 0) return code;
@ -1289,7 +1303,7 @@ static int32_t mndTransExecuteRedoActionsSerial(SMnode *pMnode, STrans *pTrans)
for (int32_t action = pTrans->redoActionPos; action < numOfActions; ++action) {
STransAction *pAction = taosArrayGet(pTrans->redoActions, pTrans->redoActionPos);
code = mndTransExecSingleAction(pMnode, pTrans, pAction);
code = mndTransExecSingleAction(pMnode, pTrans, pAction, topHalf);
if (code == 0) {
if (pAction->msgSent) {
if (pAction->msgReceived) {
@ -1317,14 +1331,16 @@ static int32_t mndTransExecuteRedoActionsSerial(SMnode *pMnode, STrans *pTrans)
}
mndSetTransLastAction(pTrans, pAction);
if (mndCannotExecuteTransAction(pMnode)) break;
if (mndCannotExecuteTransAction(pMnode, topHalf)) break;
if (code == 0) {
pTrans->code = 0;
pTrans->redoActionPos++;
mInfo("trans:%d, %s:%d is executed and need sync to other mnodes", pTrans->id, mndTransStr(pAction->stage),
pAction->id);
taosThreadMutexUnlock(&pTrans->mutex);
code = mndTransSync(pMnode, pTrans);
taosThreadMutexLock(&pTrans->mutex);
if (code != 0) {
pTrans->redoActionPos--;
pTrans->code = terrno;
@ -1357,7 +1373,7 @@ static int32_t mndTransExecuteRedoActionsSerial(SMnode *pMnode, STrans *pTrans)
return code;
}
bool mndTransPerformPrepareStage(SMnode *pMnode, STrans *pTrans) {
bool mndTransPerformPrepareStage(SMnode *pMnode, STrans *pTrans, bool topHalf) {
bool continueExec = true;
int32_t code = 0;
@ -1368,7 +1384,7 @@ bool mndTransPerformPrepareStage(SMnode *pMnode, STrans *pTrans) {
for (int32_t action = 0; action < numOfActions; ++action) {
STransAction *pAction = taosArrayGet(pTrans->prepareActions, action);
code = mndTransExecSingleAction(pMnode, pTrans, pAction);
code = mndTransExecSingleAction(pMnode, pTrans, pAction, topHalf);
if (code != 0) {
mError("trans:%d, failed to execute prepare action:%d, numOfActions:%d", pTrans->id, action, numOfActions);
return false;
@ -1381,17 +1397,17 @@ _OVER:
return continueExec;
}
static bool mndTransPerformRedoActionStage(SMnode *pMnode, STrans *pTrans) {
static bool mndTransPerformRedoActionStage(SMnode *pMnode, STrans *pTrans, bool topHalf) {
bool continueExec = true;
int32_t code = 0;
if (pTrans->exec == TRN_EXEC_SERIAL) {
code = mndTransExecuteRedoActionsSerial(pMnode, pTrans);
code = mndTransExecuteRedoActionsSerial(pMnode, pTrans, topHalf);
} else {
code = mndTransExecuteRedoActions(pMnode, pTrans);
code = mndTransExecuteRedoActions(pMnode, pTrans, topHalf);
}
if (mndCannotExecuteTransAction(pMnode)) return false;
if (mndCannotExecuteTransAction(pMnode, topHalf)) return false;
terrno = code;
if (code == 0) {
@ -1431,8 +1447,8 @@ static bool mndTransPerformRedoActionStage(SMnode *pMnode, STrans *pTrans) {
return continueExec;
}
static bool mndTransPerformCommitStage(SMnode *pMnode, STrans *pTrans) {
if (mndCannotExecuteTransAction(pMnode)) return false;
static bool mndTransPerformCommitStage(SMnode *pMnode, STrans *pTrans, bool topHalf) {
if (mndCannotExecuteTransAction(pMnode, topHalf)) return false;
bool continueExec = true;
int32_t code = mndTransCommit(pMnode, pTrans);
@ -1452,9 +1468,9 @@ static bool mndTransPerformCommitStage(SMnode *pMnode, STrans *pTrans) {
return continueExec;
}
static bool mndTransPerformCommitActionStage(SMnode *pMnode, STrans *pTrans) {
static bool mndTransPerformCommitActionStage(SMnode *pMnode, STrans *pTrans, bool topHalf) {
bool continueExec = true;
int32_t code = mndTransExecuteCommitActions(pMnode, pTrans);
int32_t code = mndTransExecuteCommitActions(pMnode, pTrans, topHalf);
if (code == 0) {
pTrans->code = 0;
@ -1471,9 +1487,9 @@ static bool mndTransPerformCommitActionStage(SMnode *pMnode, STrans *pTrans) {
return continueExec;
}
static bool mndTransPerformUndoActionStage(SMnode *pMnode, STrans *pTrans) {
static bool mndTransPerformUndoActionStage(SMnode *pMnode, STrans *pTrans, bool topHalf) {
bool continueExec = true;
int32_t code = mndTransExecuteUndoActions(pMnode, pTrans);
int32_t code = mndTransExecuteUndoActions(pMnode, pTrans, topHalf);
if (code == 0) {
pTrans->stage = TRN_STAGE_PRE_FINISH;
@ -1491,8 +1507,8 @@ static bool mndTransPerformUndoActionStage(SMnode *pMnode, STrans *pTrans) {
return continueExec;
}
static bool mndTransPerformRollbackStage(SMnode *pMnode, STrans *pTrans) {
if (mndCannotExecuteTransAction(pMnode)) return false;
static bool mndTransPerformRollbackStage(SMnode *pMnode, STrans *pTrans, bool topHalf) {
if (mndCannotExecuteTransAction(pMnode, topHalf)) return false;
bool continueExec = true;
int32_t code = mndTransRollback(pMnode, pTrans);
@ -1510,8 +1526,8 @@ static bool mndTransPerformRollbackStage(SMnode *pMnode, STrans *pTrans) {
return continueExec;
}
static bool mndTransPerformPreFinishStage(SMnode *pMnode, STrans *pTrans) {
if (mndCannotExecuteTransAction(pMnode)) return false;
static bool mndTransPerformPreFinishStage(SMnode *pMnode, STrans *pTrans, bool topHalf) {
if (mndCannotExecuteTransAction(pMnode, topHalf)) return false;
bool continueExec = true;
int32_t code = mndTransPreFinish(pMnode, pTrans);
@ -1529,8 +1545,9 @@ static bool mndTransPerformPreFinishStage(SMnode *pMnode, STrans *pTrans) {
return continueExec;
}
static bool mndTransPerformFinishStage(SMnode *pMnode, STrans *pTrans) {
static bool mndTransPerformFinishStage(SMnode *pMnode, STrans *pTrans, bool topHalf) {
bool continueExec = false;
if (topHalf) return continueExec;
SSdbRaw *pRaw = mndTransEncode(pTrans);
if (pRaw == NULL) {
@ -1558,43 +1575,28 @@ void mndTransExecuteImp(SMnode *pMnode, STrans *pTrans, bool topHalf) {
pTrans->lastExecTime = taosGetTimestampMs();
switch (pTrans->stage) {
case TRN_STAGE_PREPARE:
continueExec = mndTransPerformPrepareStage(pMnode, pTrans);
continueExec = mndTransPerformPrepareStage(pMnode, pTrans, topHalf);
break;
case TRN_STAGE_REDO_ACTION:
continueExec = mndTransPerformRedoActionStage(pMnode, pTrans);
continueExec = mndTransPerformRedoActionStage(pMnode, pTrans, topHalf);
break;
case TRN_STAGE_COMMIT:
if (topHalf) {
continueExec = mndTransPerformCommitStage(pMnode, pTrans);
} else {
mInfo("trans:%d, can not commit since not leader", pTrans->id);
continueExec = false;
}
continueExec = mndTransPerformCommitStage(pMnode, pTrans, topHalf);
break;
case TRN_STAGE_COMMIT_ACTION:
continueExec = mndTransPerformCommitActionStage(pMnode, pTrans);
continueExec = mndTransPerformCommitActionStage(pMnode, pTrans, topHalf);
break;
case TRN_STAGE_ROLLBACK:
if (topHalf) {
continueExec = mndTransPerformRollbackStage(pMnode, pTrans);
} else {
mInfo("trans:%d, can not rollback since not leader", pTrans->id);
continueExec = false;
}
continueExec = mndTransPerformRollbackStage(pMnode, pTrans, topHalf);
break;
case TRN_STAGE_UNDO_ACTION:
continueExec = mndTransPerformUndoActionStage(pMnode, pTrans);
continueExec = mndTransPerformUndoActionStage(pMnode, pTrans, topHalf);
break;
case TRN_STAGE_PRE_FINISH:
if (topHalf) {
continueExec = mndTransPerformPreFinishStage(pMnode, pTrans);
} else {
mInfo("trans:%d, can not pre-finish since not leader", pTrans->id);
continueExec = false;
}
continueExec = mndTransPerformPreFinishStage(pMnode, pTrans, topHalf);
break;
case TRN_STAGE_FINISH:
continueExec = mndTransPerformFinishStage(pMnode, pTrans);
continueExec = mndTransPerformFinishStage(pMnode, pTrans, topHalf);
break;
default:
continueExec = false;

View File

@ -2707,8 +2707,16 @@ int32_t mndSplitVgroup(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb, SVgObj *pVgro
if (newVg1.replica == 1) {
if (mndAddVnodeToVgroup(pMnode, pTrans, &newVg1, pArray) != 0) goto _OVER;
newVg1.vnodeGid[1].nodeRole = TAOS_SYNC_ROLE_LEARNER;
if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pDb, &newVg1, newVg1.vnodeGid[0].dnodeId) != 0) goto _OVER;
if (mndAddCreateVnodeAction(pMnode, pTrans, pDb, &newVg1, &newVg1.vnodeGid[1]) != 0) goto _OVER;
newVg1.vnodeGid[1].nodeRole = TAOS_SYNC_ROLE_VOTER;
if (mndAddAlterVnodeTypeAction(pMnode, pTrans, pDb, &newVg1, newVg1.vnodeGid[1].dnodeId) != 0) goto _OVER;
if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pDb, &newVg1, newVg1.vnodeGid[0].dnodeId) != 0) goto _OVER;
if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVg1) != 0) goto _OVER;
} else if (newVg1.replica == 3) {
SVnodeGid del1 = {0};
if (mndRemoveVnodeFromVgroup(pMnode, pTrans, &newVg1, pArray, &del1) != 0) goto _OVER;

View File

@ -19,26 +19,24 @@
#include "tqCommon.h"
#include "tuuid.h"
#define sndError(...) \
do { \
if (sndDebugFlag & DEBUG_ERROR) { \
taosPrintLog("SND ERROR ", DEBUG_ERROR, sndDebugFlag, __VA_ARGS__); \
} \
} while (0)
// clang-format off
#define sndError(...) do { if (sndDebugFlag & DEBUG_ERROR) {taosPrintLog("SND ERROR ", DEBUG_ERROR, sndDebugFlag, __VA_ARGS__);}} while (0)
#define sndInfo(...) do { if (sndDebugFlag & DEBUG_INFO) { taosPrintLog("SND INFO ", DEBUG_INFO, sndDebugFlag, __VA_ARGS__);}} while (0)
#define sndDebug(...) do { if (sndDebugFlag & DEBUG_DEBUG) { taosPrintLog("SND ", DEBUG_DEBUG, sndDebugFlag, __VA_ARGS__);}} while (0)
// clang-format on
#define sndInfo(...) \
do { \
if (sndDebugFlag & DEBUG_INFO) { \
taosPrintLog("SND INFO ", DEBUG_INFO, sndDebugFlag, __VA_ARGS__); \
} \
} while (0)
#define sndDebug(...) \
do { \
if (sndDebugFlag & DEBUG_DEBUG) { \
taosPrintLog("SND ", DEBUG_DEBUG, sndDebugFlag, __VA_ARGS__); \
} \
} while (0)
static STaskId replaceStreamTaskId(SStreamTask *pTask) {
ASSERT(pTask->info.fillHistory);
STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId};
pTask->id.streamId = pTask->streamTaskId.streamId;
pTask->id.taskId = pTask->streamTaskId.taskId;
return id;
}
static void restoreStreamTaskId(SStreamTask *pTask, STaskId *pId) {
ASSERT(pTask->info.fillHistory);
pTask->id.taskId = pId->taskId;
pTask->id.streamId = pId->streamId;
}
int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t nextProcessVer) {
ASSERT(pTask->info.taskLevel == TASK_LEVEL__AGG && taosArrayGetSize(pTask->upstreamInfo.pList) != 0);
@ -50,16 +48,12 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t nextProcessVer
streamTaskOpenAllUpstreamInput(pTask);
SStreamTask *pSateTask = pTask;
SStreamTask task = {0};
STaskId taskId = {0};
if (pTask->info.fillHistory) {
task.id.streamId = pTask->streamTaskId.streamId;
task.id.taskId = pTask->streamTaskId.taskId;
task.pMeta = pTask->pMeta;
pSateTask = &task;
taskId = replaceStreamTaskId(pTask);
}
pTask->pState = streamStateOpen(pSnode->path, pSateTask, false, -1, -1);
pTask->pState = streamStateOpen(pSnode->path, pTask, false, -1, -1);
if (pTask->pState == NULL) {
sndError("s-task:%s failed to open state for task", pTask->id.idStr);
return -1;
@ -67,6 +61,9 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t nextProcessVer
sndDebug("s-task:%s state:%p", pTask->id.idStr, pTask->pState);
}
if (pTask->info.fillHistory) {
restoreStreamTaskId(pTask, &taskId);
}
int32_t numOfVgroups = (int32_t)taosArrayGetSize(pTask->upstreamInfo.pList);
SReadHandle handle = {
@ -90,8 +87,8 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t nextProcessVer
// checkpoint ver is the kept version, handled data should be the next version.
if (pTask->chkInfo.checkpointId != 0) {
pTask->chkInfo.nextProcessVer = pTask->chkInfo.checkpointVer + 1;
sndInfo("s-task:%s restore from the checkpointId:%" PRId64 " ver:%" PRId64 " nextProcessVer:%" PRId64, pTask->id.idStr,
pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer);
sndInfo("s-task:%s restore from the checkpointId:%" PRId64 " ver:%" PRId64 " nextProcessVer:%" PRId64,
pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer);
}
char *p = NULL;
@ -128,7 +125,7 @@ SSnode *sndOpen(const char *path, const SSnodeOpt *pOption) {
}
pSnode->msgCb = pOption->msgCb;
pSnode->pMeta = streamMetaOpen(path, pSnode, (FTaskExpand *)sndExpandTask, SNODE_HANDLE, taosGetTimestampMs());
pSnode->pMeta = streamMetaOpen(path, pSnode, (FTaskExpand *)sndExpandTask, SNODE_HANDLE, taosGetTimestampMs(), tqStartTaskCompleteCallback);
if (pSnode->pMeta == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
goto FAIL;
@ -150,8 +147,8 @@ FAIL:
}
int32_t sndInit(SSnode *pSnode) {
resetStreamTaskStatus(pSnode->pMeta);
startStreamTasks(pSnode->pMeta);
tqStreamTaskResetStatus(pSnode->pMeta);
streamMetaStartAllTasks(pSnode->pMeta);
return 0;
}
@ -198,13 +195,19 @@ int32_t sndProcessWriteMsg(SSnode *pSnode, SRpcMsg *pMsg, SRpcMsg *pRsp) {
case TDMT_STREAM_TASK_DEPLOY: {
void * pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
int32_t len = pMsg->contLen - sizeof(SMsgHead);
return tqStreamTaskProcessDeployReq(pSnode->pMeta, -1, pReq, len, true, true);
return tqStreamTaskProcessDeployReq(pSnode->pMeta, pMsg->info.conn.applyIndex, pReq, len, true, true);
}
case TDMT_STREAM_TASK_DROP:
return tqStreamTaskProcessDropReq(pSnode->pMeta, pMsg->pCont, pMsg->contLen);
case TDMT_VND_STREAM_TASK_UPDATE:
return tqStreamTaskProcessUpdateReq(pSnode->pMeta, &pSnode->msgCb, pMsg, true);
case TDMT_VND_STREAM_TASK_RESET:
return tqStreamTaskProcessTaskResetReq(pSnode->pMeta, pMsg);
case TDMT_STREAM_TASK_PAUSE:
return tqStreamTaskProcessTaskPauseReq(pSnode->pMeta, pMsg->pCont);
case TDMT_STREAM_TASK_RESUME:
return tqStreamTaskProcessTaskResumeReq(pSnode->pMeta, pMsg->info.conn.applyIndex, pMsg->pCont, false);
default:
ASSERT(0);
}

View File

@ -152,9 +152,6 @@ void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data);
char* tqOffsetBuildFName(const char* path, int32_t fVer);
int32_t tqOffsetRestoreFromFile(STqOffsetStore* pStore, const char* fname);
// tqStream
int32_t tqStopStreamTasks(STQ* pTq);
// tq util
int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, void** pRefBlock, int32_t type);
int32_t tqExtractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg);

View File

@ -256,31 +256,8 @@ void tsdbDelFileName(STsdb *pTsdb, SDelFile *pFile, char fname[]);
// tsdbFS.c ==============================================================================================
int32_t tsdbFSOpen(STsdb *pTsdb, int8_t rollback);
int32_t tsdbFSClose(STsdb *pTsdb);
int32_t tsdbFSCopy(STsdb *pTsdb, STsdbFS *pFS);
void tsdbFSDestroy(STsdbFS *pFS);
int32_t tDFileSetCmprFn(const void *p1, const void *p2);
int32_t tsdbFSCommit(STsdb *pTsdb);
int32_t tsdbFSRollback(STsdb *pTsdb);
int32_t tsdbFSPrepareCommit(STsdb *pTsdb, STsdbFS *pFS);
int32_t tsdbFSRef(STsdb *pTsdb, STsdbFS *pFS);
void tsdbFSUnref(STsdb *pTsdb, STsdbFS *pFS);
void tsdbGetCurrentFName(STsdb *pTsdb, char *current, char *current_t);
int32_t tsdbFSUpsertFSet(STsdbFS *pFS, SDFileSet *pSet);
int32_t tsdbFSUpsertDelFile(STsdbFS *pFS, SDelFile *pDelFile);
// tsdbReaderWriter.c ==============================================================================================
// SDataFWriter
int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pSet);
int32_t tsdbDataFWriterClose(SDataFWriter **ppWriter, int8_t sync);
int32_t tsdbUpdateDFileSetHeader(SDataFWriter *pWriter);
int32_t tsdbWriteBlockIdx(SDataFWriter *pWriter, SArray *aBlockIdx);
int32_t tsdbWriteDataBlk(SDataFWriter *pWriter, SMapData *mDataBlk, SBlockIdx *pBlockIdx);
int32_t tsdbWriteSttBlk(SDataFWriter *pWriter, SArray *aSttBlk);
int32_t tsdbWriteBlockData(SDataFWriter *pWriter, SBlockData *pBlockData, SBlockInfo *pBlkInfo, SSmaInfo *pSmaInfo,
int8_t cmprAlg, int8_t toLast);
int32_t tsdbWriteDiskData(SDataFWriter *pWriter, const SDiskData *pDiskData, SBlockInfo *pBlkInfo, SSmaInfo *pSmaInfo);
int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo);
// SDataFReader
int32_t tsdbDataFReaderOpen(SDataFReader **ppReader, STsdb *pTsdb, SDFileSet *pSet);
int32_t tsdbDataFReaderClose(SDataFReader **ppReader);
@ -292,12 +269,6 @@ int32_t tsdbReadDataBlock(SDataFReader *pReader, SDataBlk *pBlock, SBlockData *p
int32_t tsdbReadDataBlockEx(SDataFReader *pReader, SDataBlk *pDataBlk, SBlockData *pBlockData);
int32_t tsdbReadSttBlock(SDataFReader *pReader, int32_t iStt, SSttBlk *pSttBlk, SBlockData *pBlockData);
int32_t tsdbReadSttBlockEx(SDataFReader *pReader, int32_t iStt, SSttBlk *pSttBlk, SBlockData *pBlockData);
// SDelFWriter
int32_t tsdbDelFWriterOpen(SDelFWriter **ppWriter, SDelFile *pFile, STsdb *pTsdb);
int32_t tsdbDelFWriterClose(SDelFWriter **ppWriter, int8_t sync);
int32_t tsdbWriteDelData(SDelFWriter *pWriter, SArray *aDelData, SDelIdx *pDelIdx);
int32_t tsdbWriteDelIdx(SDelFWriter *pWriter, SArray *aDelIdx);
int32_t tsdbUpdateDelFileHdr(SDelFWriter *pWriter);
// SDelFReader
int32_t tsdbDelFReaderOpen(SDelFReader **ppReader, SDelFile *pFile, STsdb *pTsdb);
int32_t tsdbDelFReaderClose(SDelFReader **ppReader);
@ -426,6 +397,7 @@ struct STbData {
tb_uid_t uid;
TSKEY minKey;
TSKEY maxKey;
SRWLatch lock;
SDelData *pHead;
SDelData *pTail;
SMemSkipList sl;
@ -737,7 +709,6 @@ struct STsdbReadSnap {
SMemTable *pIMem;
SQueryNode *pINode;
TFileSetArray *pfSetArray;
STsdbFS fs;
};
struct SDataFWriter {

View File

@ -216,8 +216,6 @@ int32_t tsdbBegin(STsdb* pTsdb);
int32_t tsdbCacheCommit(STsdb* pTsdb);
int32_t tsdbCompact(STsdb* pTsdb, SCompactInfo* pInfo);
int32_t tsdbRetention(STsdb* tsdb, int64_t now, int32_t sync);
// int32_t tsdbFinishCommit(STsdb* pTsdb);
// int32_t tsdbRollbackCommit(STsdb* pTsdb);
int tsdbScanAndConvertSubmitMsg(STsdb* pTsdb, SSubmitReq2* pMsg);
int tsdbInsertData(STsdb* pTsdb, int64_t version, SSubmitReq2* pMsg, SSubmitRsp2* pRsp);
int32_t tsdbInsertTableData(STsdb* pTsdb, int64_t version, SSubmitTbData* pSubmitTbData, int32_t* affectedRows);
@ -232,6 +230,7 @@ int tqPushMsg(STQ*, tmsg_t msgType);
int tqRegisterPushHandle(STQ* pTq, void* handle, SRpcMsg* pMsg);
int tqUnregisterPushHandle(STQ* pTq, void* pHandle);
int tqScanWalAsync(STQ* pTq, bool ckPause);
int32_t tqStopStreamTasksAsync(STQ* pTq);
int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp);
int32_t tqProcessTaskCheckpointReadyMsg(STQ* pTq, SRpcMsg* pMsg);
int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg);

View File

@ -36,7 +36,6 @@ static int metaDeleteBtimeIdx(SMeta *pMeta, const SMetaEntry *pME);
static int metaUpdateNcolIdx(SMeta *pMeta, const SMetaEntry *pME);
static int metaDeleteNcolIdx(SMeta *pMeta, const SMetaEntry *pME);
static void metaGetEntryInfo(const SMetaEntry *pEntry, SMetaInfo *pInfo) {
pInfo->uid = pEntry->uid;
pInfo->version = pEntry->version;
@ -562,6 +561,7 @@ int metaAddIndexToSTable(SMeta *pMeta, int64_t version, SVCreateStbReq *pReq) {
tdbTbUpsert(pMeta->pTagIdx, pTagIdxKey, nTagIdxKey, NULL, 0, pMeta->txn);
metaULock(pMeta);
metaDestroyTagIdxKey(pTagIdxKey);
pTagIdxKey = NULL;
}
nStbEntry.version = version;
@ -692,6 +692,7 @@ int metaDropIndexFromSTable(SMeta *pMeta, int64_t version, SDropIndexReq *pReq)
tdbTbDelete(pMeta->pTagIdx, pTagIdxKey, nTagIdxKey, pMeta->txn);
metaULock(pMeta);
metaDestroyTagIdxKey(pTagIdxKey);
pTagIdxKey = NULL;
}
// clear idx flag
@ -1146,6 +1147,7 @@ static int metaDropTableByUid(SMeta *pMeta, tb_uid_t uid, int *type, tb_uid_t *p
tdbTbDelete(pMeta->pTagIdx, pTagIdxKey, nTagIdxKey, pMeta->txn);
}
metaDestroyTagIdxKey(pTagIdxKey);
pTagIdxKey = NULL;
}
}
tDecoderClear(&tdc);
@ -1865,6 +1867,7 @@ static int metaAddTagIndex(SMeta *pMeta, int64_t version, SVAlterTbReq *pAlterTb
}
tdbTbUpsert(pMeta->pTagIdx, pTagIdxKey, nTagIdxKey, NULL, 0, pMeta->txn);
metaDestroyTagIdxKey(pTagIdxKey);
pTagIdxKey = NULL;
}
tdbTbcClose(pCtbIdxc);
return 0;
@ -2228,15 +2231,14 @@ static int metaUpdateTagIdx(SMeta *pMeta, const SMetaEntry *pCtbEntry) {
nTagData = tDataTypes[pTagColumn->type].bytes;
}
if (pTagData != NULL) {
if (metaCreateTagIdxKey(pCtbEntry->ctbEntry.suid, pTagColumn->colId, pTagData, nTagData, pTagColumn->type,
pCtbEntry->uid, &pTagIdxKey, &nTagIdxKey) < 0) {
ret = -1;
goto end;
}
tdbTbUpsert(pMeta->pTagIdx, pTagIdxKey, nTagIdxKey, NULL, 0, pMeta->txn);
}
metaDestroyTagIdxKey(pTagIdxKey);
pTagIdxKey = NULL;
}
}
end:

View File

@ -240,23 +240,23 @@ int32_t tdFetchTbUidList(SSma *pSma, STbUidStore **ppStore, tb_uid_t suid, tb_ui
static void tdRSmaTaskInit(SStreamMeta *pMeta, SRSmaInfoItem *pItem, SStreamTaskId *pId) {
STaskId id = {.streamId = pId->streamId, .taskId = pId->taskId};
taosRLockLatch(&pMeta->lock);
streamMetaRLock(pMeta);
SStreamTask **ppTask = (SStreamTask **)taosHashGet(pMeta->pTasksMap, &id, sizeof(id));
if (ppTask && *ppTask) {
pItem->submitReqVer = (*ppTask)->chkInfo.checkpointVer;
pItem->fetchResultVer = (*ppTask)->info.triggerParam;
}
taosRUnLockLatch(&pMeta->lock);
streamMetaRUnLock(pMeta);
}
static void tdRSmaTaskRemove(SStreamMeta *pMeta, int64_t streamId, int32_t taskId) {
streamMetaUnregisterTask(pMeta, streamId, taskId);
taosWLockLatch(&pMeta->lock);
streamMetaWLock(pMeta);
int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta);
if (streamMetaCommit(pMeta) < 0) {
// persist to disk
}
taosWUnLockLatch(&pMeta->lock);
streamMetaWUnLock(pMeta);
smaDebug("vgId:%d, rsma task:%" PRIi64 ",%d dropped, remain tasks:%d", pMeta->vgId, streamId, taskId, numOfTasks);
}
@ -750,6 +750,7 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma
}
tDestroySubmitReq(pReq, TSDB_MSG_FLG_ENCODE);
taosMemoryFree(pReq);
pReq = NULL;
TSDB_CHECK_CODE(code, lino, _exit);
}
@ -1301,14 +1302,14 @@ _checkpoint:
checkpointBuilt = true;
}
taosWLockLatch(&pMeta->lock);
streamMetaWLock(pMeta);
if (streamMetaSaveTask(pMeta, pTask)) {
taosWUnLockLatch(&pMeta->lock);
streamMetaWUnLock(pMeta);
code = terrno ? terrno : TSDB_CODE_OUT_OF_MEMORY;
taosHashCancelIterate(pInfoHash, infoHash);
TSDB_CHECK_CODE(code, lino, _exit);
}
taosWUnLockLatch(&pMeta->lock);
streamMetaWUnLock(pMeta);
smaDebug("vgId:%d, rsma commit, succeed to commit task:%p, submitReqVer:%" PRIi64 ", fetchResultVer:%" PRIi64
", table:%" PRIi64 ", level:%d",
TD_VID(pVnode), pTask, pItem->submitReqVer, pItem->fetchResultVer, pRSmaInfo->suid, i + 1);
@ -1316,13 +1317,13 @@ _checkpoint:
}
}
if (pMeta) {
taosWLockLatch(&pMeta->lock);
streamMetaWLock(pMeta);
if (streamMetaCommit(pMeta)) {
taosWUnLockLatch(&pMeta->lock);
streamMetaWUnLock(pMeta);
code = terrno ? terrno : TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
taosWUnLockLatch(&pMeta->lock);
streamMetaWUnLock(pMeta);
}
if (checkpointBuilt) {
smaInfo("vgId:%d, rsma commit, succeed to commit checkpoint:%" PRIi64, TD_VID(pVnode), checkpointId);

View File

@ -96,7 +96,8 @@ int32_t tqInitialize(STQ* pTq) {
return -1;
}
pTq->pStreamMeta = streamMetaOpen(pTq->path, pTq, (FTaskExpand*)tqExpandTask, pTq->pVnode->config.vgId, -1);
int32_t vgId = TD_VID(pTq->pVnode);
pTq->pStreamMeta = streamMetaOpen(pTq->path, pTq, (FTaskExpand*)tqExpandTask, vgId, -1, tqStartTaskCompleteCallback);
if (pTq->pStreamMeta == NULL) {
return -1;
}
@ -1036,15 +1037,16 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) {
streamMetaReleaseTask(pMeta, pStreamTask);
} else {
STimeWindow* pWindow = &pTask->dataRange.window;
ASSERT(HAS_RELATED_FILLHISTORY_TASK(pTask) || streamTaskShouldStop(pTask));
// Not update the fill-history time window until the state transfer is completed.
tqDebug("s-task:%s scan-history in stream time window completed, start to handle data from WAL, startVer:%" PRId64
", window:%" PRId64 " - %" PRId64,
id, pTask->chkInfo.nextProcessVer, pWindow->skey, pWindow->ekey);
code = streamTaskScanHistoryDataComplete(pTask);
ASSERT(0);
// STimeWindow* pWindow = &pTask->dataRange.window;
// ASSERT(HAS_RELATED_FILLHISTORY_TASK(pTask) || streamTaskShouldStop(pTask));
//
// // Not update the fill-history time window until the state transfer is completed.
// tqDebug("s-task:%s scan-history in stream time window completed, start to handle data from WAL, startVer:%" PRId64
// ", window:%" PRId64 " - %" PRId64,
// id, pTask->chkInfo.nextProcessVer, pWindow->skey, pWindow->ekey);
//
// code = streamTaskScanHistoryDataComplete(pTask);
}
atomic_store_32(&pTask->status.inScanHistorySentinel, 0);
@ -1070,10 +1072,12 @@ int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) {
tqScanWal(pTq);
return 0;
}
int32_t code = tqStreamTaskProcessRunReq(pTq->pStreamMeta, pMsg, vnodeIsRoleLeader(pTq->pVnode));
if(code == 0 && taskId > 0){
tqScanWalAsync(pTq, false);
}
return code;
}
@ -1090,108 +1094,11 @@ int32_t tqProcessTaskDropReq(STQ* pTq, char* msg, int32_t msgLen) {
}
int32_t tqProcessTaskPauseReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
SVPauseStreamTaskReq* pReq = (SVPauseStreamTaskReq*)msg;
SStreamMeta* pMeta = pTq->pStreamMeta;
SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId);
if (pTask == NULL) {
tqError("vgId:%d process pause req, failed to acquire task:0x%x, it may have been dropped already", pMeta->vgId,
pReq->taskId);
// since task is in [STOP|DROPPING] state, it is safe to assume the pause is active
return TSDB_CODE_SUCCESS;
}
tqDebug("s-task:%s receive pause msg from mnode", pTask->id.idStr);
streamTaskPause(pTask, pMeta);
SStreamTask* pHistoryTask = NULL;
if (HAS_RELATED_FILLHISTORY_TASK(pTask)) {
pHistoryTask = streamMetaAcquireTask(pMeta, pTask->hTaskInfo.id.streamId, pTask->hTaskInfo.id.taskId);
if (pHistoryTask == NULL) {
tqError("vgId:%d process pause req, failed to acquire fill-history task:0x%" PRIx64
", it may have been dropped already",
pMeta->vgId, pTask->hTaskInfo.id.taskId);
streamMetaReleaseTask(pMeta, pTask);
// since task is in [STOP|DROPPING] state, it is safe to assume the pause is active
return TSDB_CODE_SUCCESS;
}
tqDebug("s-task:%s fill-history task handle paused along with related stream task", pHistoryTask->id.idStr);
streamTaskPause(pHistoryTask, pMeta);
streamMetaReleaseTask(pMeta, pHistoryTask);
}
streamMetaReleaseTask(pMeta, pTask);
return TSDB_CODE_SUCCESS;
}
int32_t tqProcessTaskResumeImpl(STQ* pTq, SStreamTask* pTask, int64_t sversion, int8_t igUntreated) {
int32_t vgId = pTq->pStreamMeta->vgId;
if (pTask == NULL) {
return -1;
}
streamTaskResume(pTask);
ETaskStatus status = streamTaskGetStatus(pTask, NULL);
int32_t level = pTask->info.taskLevel;
if (level == TASK_LEVEL__SINK) {
if (status == TASK_STATUS__UNINIT) {
}
streamMetaReleaseTask(pTq->pStreamMeta, pTask);
return 0;
}
if (status == TASK_STATUS__READY || status == TASK_STATUS__SCAN_HISTORY || status == TASK_STATUS__CK) {
// no lock needs to secure the access of the version
if (igUntreated && level == TASK_LEVEL__SOURCE && !pTask->info.fillHistory) {
// discard all the data when the stream task is suspended.
walReaderSetSkipToVersion(pTask->exec.pWalReader, sversion);
tqDebug("vgId:%d s-task:%s resume to exec, prev paused version:%" PRId64 ", start from vnode ver:%" PRId64
", schedStatus:%d",
vgId, pTask->id.idStr, pTask->chkInfo.nextProcessVer, sversion, pTask->status.schedStatus);
} else { // from the previous paused version and go on
tqDebug("vgId:%d s-task:%s resume to exec, from paused ver:%" PRId64 ", vnode ver:%" PRId64 ", schedStatus:%d",
vgId, pTask->id.idStr, pTask->chkInfo.nextProcessVer, sversion, pTask->status.schedStatus);
}
if (level == TASK_LEVEL__SOURCE && pTask->info.fillHistory && status == TASK_STATUS__SCAN_HISTORY) {
streamStartScanHistoryAsync(pTask, igUntreated);
} else if (level == TASK_LEVEL__SOURCE && (streamQueueGetNumOfItems(pTask->inputq.queue) == 0)) {
tqScanWalAsync(pTq, false);
} else {
streamSchedExec(pTask);
}
} else if (status == TASK_STATUS__UNINIT) {
// todo: fill-history task init ?
if (pTask->info.fillHistory == 0) {
EStreamTaskEvent event = HAS_RELATED_FILLHISTORY_TASK(pTask) ? TASK_EVENT_INIT_STREAM_SCANHIST : TASK_EVENT_INIT;
streamTaskHandleEvent(pTask->status.pSM, event);
}
}
streamMetaReleaseTask(pTq->pStreamMeta, pTask);
return 0;
return tqStreamTaskProcessTaskPauseReq(pTq->pStreamMeta, msg);
}
int32_t tqProcessTaskResumeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
SVResumeStreamTaskReq* pReq = (SVResumeStreamTaskReq*)msg;
SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->streamId, pReq->taskId);
int32_t code = tqProcessTaskResumeImpl(pTq, pTask, sversion, pReq->igUntreated);
if (code != 0) {
return code;
}
STaskId* pHTaskId = &pTask->hTaskInfo.id;
SStreamTask* pHistoryTask = streamMetaAcquireTask(pTq->pStreamMeta, pHTaskId->streamId, pHTaskId->taskId);
if (pHistoryTask) {
code = tqProcessTaskResumeImpl(pTq, pHistoryTask, sversion, pReq->igUntreated);
}
return code;
return tqStreamTaskProcessTaskResumeReq(pTq, sversion, msg, true);
}
int32_t tqProcessTaskRetrieveReq(STQ* pTq, SRpcMsg* pMsg) {
@ -1256,10 +1163,11 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp)
if (pTask->status.downstreamReady != 1) {
pTask->chkInfo.failedId = req.checkpointId; // record the latest failed checkpoint id
pTask->chkInfo.checkpointingId = req.checkpointId;
pTask->chkInfo.transId = req.transId;
tqError("s-task:%s not ready for checkpoint, since downstream not ready, ignore this checkpoint:%" PRId64
", set it failure",
pTask->id.idStr, req.checkpointId);
", transId:%d set it failed",
pTask->id.idStr, req.checkpointId, req.transId);
streamMetaReleaseTask(pMeta, pTask);
SRpcMsg rsp = {0};
@ -1288,10 +1196,9 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp)
// check if the checkpoint msg already sent or not.
if (status == TASK_STATUS__CK) {
ASSERT(pTask->chkInfo.checkpointingId == req.checkpointId);
tqWarn("s-task:%s recv checkpoint-source msg again checkpointId:%" PRId64
" already received, ignore this msg and continue process checkpoint",
pTask->id.idStr, pTask->chkInfo.checkpointingId);
" transId:%d already received, ignore this msg and continue process checkpoint",
pTask->id.idStr, pTask->chkInfo.checkpointingId, req.transId);
taosThreadMutexUnlock(&pTask->lock);
streamMetaReleaseTask(pMeta, pTask);
@ -1335,28 +1242,10 @@ int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) {
}
int32_t tqProcessTaskResetReq(STQ* pTq, SRpcMsg* pMsg) {
SVPauseStreamTaskReq* pReq = (SVPauseStreamTaskReq*)pMsg->pCont;
SStreamMeta* pMeta = pTq->pStreamMeta;
SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId);
if (pTask == NULL) {
tqError("vgId:%d process task-reset req, failed to acquire task:0x%x, it may have been dropped already",
pMeta->vgId, pReq->taskId);
return TSDB_CODE_SUCCESS;
}
tqDebug("s-task:%s receive task-reset msg from mnode, reset status and ready for data processing", pTask->id.idStr);
// clear flag set during do checkpoint, and open inputQ for all upstream tasks
if (streamTaskGetStatus(pTask, NULL) == TASK_STATUS__CK) {
streamTaskClearCheckInfo(pTask, true);
streamTaskSetStatusReady(pTask);
}
streamMetaReleaseTask(pMeta, pTask);
return TSDB_CODE_SUCCESS;
return tqStreamTaskProcessTaskResetReq(pTq->pStreamMeta, pMsg);
}
// NOTE: here we may receive this message more than once, so need to handle this case
int32_t tqProcessTaskDropHTask(STQ* pTq, SRpcMsg* pMsg) {
SVDropHTaskReq* pReq = (SVDropHTaskReq*)pMsg->pCont;
@ -1375,14 +1264,17 @@ int32_t tqProcessTaskDropHTask(STQ* pTq, SRpcMsg* pMsg) {
return TSDB_CODE_SUCCESS;
}
taosThreadMutexLock(&pTask->lock);
ETaskStatus status = streamTaskGetStatus(pTask, NULL);
ASSERT(status == TASK_STATUS__STREAM_SCAN_HISTORY);
streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_SCANHIST_DONE);
// if (status == TASK_STATUS__STREAM_SCAN_HISTORY) {
// streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_SCANHIST_DONE);
// }
SStreamTaskId id = {.streamId = pTask->hTaskInfo.id.streamId, .taskId = pTask->hTaskInfo.id.taskId};
streamBuildAndSendDropTaskMsg(pTask->pMsgCb, pMeta->vgId, &id);
taosThreadMutexUnlock(&pTask->lock);
// clear the scheduler status
streamTaskSetSchedStatusInactive(pTask);
tqDebug("s-task:%s set scheduler status:%d after drop fill-history task", pTask->id.idStr, pTask->status.schedStatus);

View File

@ -195,7 +195,8 @@ int32_t tqFetchLog(STQ* pTq, STqHandle* pHandle, int64_t* fetchOffset, uint64_t
int64_t committedVer = walGetCommittedVer(pHandle->pWalReader->pWal);
int64_t appliedVer = walGetAppliedVer(pHandle->pWalReader->pWal);
wDebug("vgId:%d, wal start to fetch, index:%" PRId64 ", last index:%" PRId64 " commit index:%" PRId64 ", applied index:%" PRId64", 0x%"PRIx64,
wDebug("vgId:%d, start to fetch wal, index:%" PRId64 ", last:%" PRId64 " commit:%" PRId64 ", applied:%" PRId64
", 0x%" PRIx64,
vgId, offset, lastVer, committedVer, appliedVer, id);
while (offset <= appliedVer) {

View File

@ -39,7 +39,7 @@ int32_t tqScanWal(STQ* pTq) {
bool shouldIdle = true;
doScanWalForAllTasks(pTq->pStreamMeta, &shouldIdle);
if (shouldIdle) {
// if (shouldIdle) {
streamMetaWLock(pMeta);
int32_t times = (--pMeta->walScanCounter);
ASSERT(pMeta->walScanCounter >= 0);
@ -50,7 +50,7 @@ int32_t tqScanWal(STQ* pTq) {
} else {
tqDebug("vgId:%d scan wal for stream tasks for %d times in %dms", vgId, times, SCAN_WAL_IDLE_DURATION);
}
}
// }
taosMsleep(SCAN_WAL_IDLE_DURATION);
}
@ -109,8 +109,8 @@ int32_t tqScanWalAsync(STQ* pTq, bool ckPause) {
return -1;
}
tqDebug("vgId:%d create msg to start wal scan to launch stream tasks, numOfTasks:%d, restored:%d", vgId, numOfTasks,
alreadyRestored);
tqDebug("vgId:%d create msg to start wal scan to launch stream tasks, numOfTasks:%d, vnd restored:%d", vgId,
numOfTasks, alreadyRestored);
pRunReq->head.vgId = vgId;
pRunReq->streamId = 0;
@ -123,33 +123,25 @@ int32_t tqScanWalAsync(STQ* pTq, bool ckPause) {
return 0;
}
int32_t tqStopStreamTasks(STQ* pTq) {
int32_t tqStopStreamTasksAsync(STQ* pTq) {
SStreamMeta* pMeta = pTq->pStreamMeta;
int32_t vgId = TD_VID(pTq->pVnode);
int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList);
int32_t vgId = pMeta->vgId;
tqDebug("vgId:%d stop all %d stream task(s)", vgId, numOfTasks);
if (numOfTasks == 0) {
return TSDB_CODE_SUCCESS;
SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq));
if (pRunReq == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
tqError("vgId:%d failed to create msg to stop tasks, code:%s", vgId, terrstr());
return -1;
}
SArray* pTaskList = NULL;
streamMetaWLock(pMeta);
pTaskList = taosArrayDup(pMeta->pTaskList, NULL);
streamMetaWUnLock(pMeta);
tqDebug("vgId:%d create msg to stop tasks", vgId);
for (int32_t i = 0; i < numOfTasks; ++i) {
SStreamTaskId* pTaskId = taosArrayGet(pTaskList, i);
SStreamTask* pTask = streamMetaAcquireTask(pMeta, pTaskId->streamId, pTaskId->taskId);
if (pTask == NULL) {
continue;
}
pRunReq->head.vgId = vgId;
pRunReq->streamId = 0;
pRunReq->taskId = STREAM_EXEC_STOP_ALL_TASKS_ID;
streamTaskStop(pTask);
streamMetaReleaseTask(pMeta, pTask);
}
taosArrayDestroy(pTaskList);
SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)};
tmsgPutToQueue(&pTq->pVnode->msgCb, STREAM_QUEUE, &msg);
return 0;
}

View File

@ -196,7 +196,7 @@ int32_t streamTaskSnapWriterClose(SStreamTaskWriter* pWriter, int8_t rollback) {
int32_t code = 0;
STQ* pTq = pWriter->pTq;
taosWLockLatch(&pTq->pStreamMeta->lock);
streamMetaWLock(pTq->pStreamMeta);
tqDebug("vgId:%d, vnode stream-task snapshot writer closed", TD_VID(pTq->pVnode));
if (rollback) {
tdbAbort(pTq->pStreamMeta->db, pTq->pStreamMeta->txn);
@ -212,14 +212,14 @@ int32_t streamTaskSnapWriterClose(SStreamTaskWriter* pWriter, int8_t rollback) {
taosMemoryFree(pWriter);
goto _err;
}
taosWUnLockLatch(&pTq->pStreamMeta->lock);
streamMetaWUnLock(pTq->pStreamMeta);
taosMemoryFree(pWriter);
return code;
_err:
tqError("vgId:%d, vnode stream-task snapshot writer failed to close since %s", TD_VID(pWriter->pTq->pVnode),
tstrerror(code));
taosWUnLockLatch(&pTq->pStreamMeta->lock);
streamMetaWUnLock(pTq->pStreamMeta);
return code;
}
@ -240,13 +240,13 @@ int32_t streamTaskSnapWrite(SStreamTaskWriter* pWriter, uint8_t* pData, uint32_t
tDecoderClear(&decoder);
int64_t key[2] = {taskId.streamId, taskId.taskId};
taosWLockLatch(&pTq->pStreamMeta->lock);
streamMetaWLock(pTq->pStreamMeta);
if (tdbTbUpsert(pTq->pStreamMeta->pTaskDb, key, sizeof(int64_t) << 1, (uint8_t*)pData + sizeof(SSnapDataHdr),
nData - sizeof(SSnapDataHdr), pTq->pStreamMeta->txn) < 0) {
taosWUnLockLatch(&pTq->pStreamMeta->lock);
streamMetaWUnLock(pTq->pStreamMeta);
return -1;
}
taosWUnLockLatch(&pTq->pStreamMeta->lock);
streamMetaWUnLock(pTq->pStreamMeta);
} else if (pHdr->type == SNAP_DATA_STREAM_TASK_CHECKPOINT) {
// do nothing
}

View File

@ -36,19 +36,7 @@ int32_t tqInitDataRsp(SMqDataRsp* pRsp, STqOffsetVal pOffset) {
void tqUpdateNodeStage(STQ* pTq, bool isLeader) {
SSyncState state = syncGetState(pTq->pVnode->sync);
SStreamMeta* pMeta = pTq->pStreamMeta;
int64_t stage = pMeta->stage;
pMeta->stage = state.term;
pMeta->role = (isLeader)? NODE_ROLE_LEADER:NODE_ROLE_FOLLOWER;
if (isLeader) {
tqInfo("vgId:%d update meta stage:%" PRId64 ", prev:%" PRId64 " leader:%d, start to send Hb", pMeta->vgId,
state.term, stage, isLeader);
streamMetaStartHb(pMeta);
} else {
tqInfo("vgId:%d update meta stage:%" PRId64 " prev:%" PRId64 " leader:%d", pMeta->vgId, state.term, stage,
isLeader);
}
streamMetaUpdateStageRole(pTq->pStreamMeta, state.term, isLeader);
}
static int32_t tqInitTaosxRsp(STaosxRsp* pRsp, STqOffsetVal pOffset) {

View File

@ -106,14 +106,15 @@ int32_t tqStreamTaskProcessUpdateReq(SStreamMeta* pMeta, SMsgCb* cb, SRpcMsg* pM
return rsp.code;
}
streamMetaWUnLock(pMeta);
// streamMetaWUnLock(pMeta);
// todo for test purpose
// the following two functions should not be executed within the scope of meta lock to avoid deadlock
streamTaskUpdateEpsetInfo(pTask, req.pNodeList);
streamTaskResetStatus(pTask);
// continue after lock the meta again
streamMetaWLock(pMeta);
// streamMetaWLock(pMeta);
SStreamTask** ppHTask = NULL;
if (HAS_RELATED_FILLHISTORY_TASK(pTask)) {
@ -166,65 +167,17 @@ int32_t tqStreamTaskProcessUpdateReq(SStreamMeta* pMeta, SMsgCb* cb, SRpcMsg* pM
streamMetaWUnLock(pMeta);
} else {
if (!restored) {
tqDebug("vgId:%d vnode restore not completed, not restart the tasks, clear the start after nodeUpdate flag",
vgId);
tqDebug("vgId:%d vnode restore not completed, not start the tasks, clear the start after nodeUpdate flag", vgId);
pMeta->startInfo.tasksWillRestart = 0;
streamMetaWUnLock(pMeta);
} else {
tqDebug("vgId:%d all %d task(s) nodeEp updated and closed", vgId, numOfTasks);
#if 1
#if 0
// for test purpose, to trigger the leader election
taosMSleep(5000);
#endif
tqStreamTaskStartAsync(pMeta, cb, true);
streamMetaWUnLock(pMeta);
#else
streamMetaWUnLock(pMeta);
// For debug purpose.
// the following procedure consume many CPU resource, result in the re-election of leader
// with high probability. So we employ it as a test case for the stream processing framework, with
// checkpoint/restart/nodeUpdate etc.
while (1) {
int32_t startVal = atomic_val_compare_exchange_32(&pMeta->startInfo.taskStarting, 0, 1);
if (startVal == 0) {
break;
}
tqDebug("vgId:%d in start stream tasks procedure, wait for 500ms and recheck", vgId);
taosMsleep(500);
}
while (streamMetaTaskInTimer(pMeta)) {
tqDebug("vgId:%d some tasks in timer, wait for 100ms and recheck", pMeta->vgId);
taosMsleep(100);
}
streamMetaWLock(pMeta);
int32_t code = streamMetaReopen(pMeta);
if (code != 0) {
tqError("vgId:%d failed to reopen stream meta", vgId);
streamMetaWUnLock(pMeta);
taosArrayDestroy(req.pNodeList);
return -1;
}
streamMetaInitBackend(pMeta);
if (streamMetaLoadAllTasks(pTq->pStreamMeta) < 0) {
tqError("vgId:%d failed to load stream tasks", vgId);
streamMetaWUnLock(pMeta);
taosArrayDestroy(req.pNodeList);
return -1;
}
if (vnodeIsRoleLeader(pTq->pVnode) && !tsDisableStream) {
tqInfo("vgId:%d start all stream tasks after all being updated", vgId);
resetStreamTaskStatus(pTq->pStreamMeta);
tqStartStreamTaskAsync(pTq, false);
} else {
tqInfo("vgId:%d, follower node not start stream tasks", vgId);
}
streamMetaWUnLock(pMeta);
#endif
}
}
@ -592,7 +545,7 @@ int32_t tqStreamTaskProcessDeployReq(SStreamMeta* pMeta, int64_t sversion, char*
SStreamTask* p = streamMetaAcquireTask(pMeta, streamId, taskId);
if (p != NULL && restored && p->info.fillHistory == 0) {
EStreamTaskEvent event = (HAS_RELATED_FILLHISTORY_TASK(p)) ? TASK_EVENT_INIT_STREAM_SCANHIST : TASK_EVENT_INIT;
EStreamTaskEvent event = TASK_EVENT_INIT;
streamTaskHandleEvent(p->status.pSM, event);
} else if (!restored) {
tqWarn("s-task:%s not launched since vnode(vgId:%d) not ready", p->id.idStr, vgId);
@ -645,65 +598,7 @@ int32_t tqStreamTaskProcessDropReq(SStreamMeta* pMeta, char* msg, int32_t msgLen
return 0;
}
int32_t startStreamTasks(SStreamMeta* pMeta) {
int32_t code = TSDB_CODE_SUCCESS;
int32_t vgId = pMeta->vgId;
int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList);
tqDebug("vgId:%d start to check all %d stream task(s) downstream status", vgId, numOfTasks);
if (numOfTasks == 0) {
return TSDB_CODE_SUCCESS;
}
SArray* pTaskList = NULL;
streamMetaWLock(pMeta);
pTaskList = taosArrayDup(pMeta->pTaskList, NULL);
taosHashClear(pMeta->startInfo.pReadyTaskSet);
taosHashClear(pMeta->startInfo.pFailedTaskSet);
pMeta->startInfo.startTs = taosGetTimestampMs();
streamMetaWUnLock(pMeta);
// broadcast the check downstream tasks msg
for (int32_t i = 0; i < numOfTasks; ++i) {
SStreamTaskId* pTaskId = taosArrayGet(pTaskList, i);
SStreamTask* pTask = streamMetaAcquireTask(pMeta, pTaskId->streamId, pTaskId->taskId);
if (pTask == NULL) {
continue;
}
// fill-history task can only be launched by related stream tasks.
if (pTask->info.fillHistory == 1) {
streamMetaReleaseTask(pMeta, pTask);
continue;
}
if (pTask->status.downstreamReady == 1) {
if (HAS_RELATED_FILLHISTORY_TASK(pTask)) {
tqDebug("s-task:%s downstream ready, no need to check downstream, check only related fill-history task",
pTask->id.idStr);
streamLaunchFillHistoryTask(pTask);
}
streamMetaUpdateTaskDownstreamStatus(pTask->pMeta, pTask->id.streamId, pTask->id.taskId, pTask->execInfo.init,
pTask->execInfo.start, true);
streamMetaReleaseTask(pMeta, pTask);
continue;
}
EStreamTaskEvent event = (HAS_RELATED_FILLHISTORY_TASK(pTask)) ? TASK_EVENT_INIT_STREAM_SCANHIST : TASK_EVENT_INIT;
int32_t ret = streamTaskHandleEvent(pTask->status.pSM, event);
if (ret != TSDB_CODE_SUCCESS) {
code = ret;
}
streamMetaReleaseTask(pMeta, pTask);
}
taosArrayDestroy(pTaskList);
return code;
}
int32_t resetStreamTaskStatus(SStreamMeta* pMeta) {
int32_t tqStreamTaskResetStatus(SStreamMeta* pMeta) {
int32_t vgId = pMeta->vgId;
int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList);
@ -728,15 +623,17 @@ static int32_t restartStreamTasks(SStreamMeta* pMeta, bool isLeader) {
int32_t code = 0;
int64_t st = taosGetTimestampMs();
while (1) {
int32_t startVal = atomic_val_compare_exchange_32(&pMeta->startInfo.taskStarting, 0, 1);
if (startVal == 0) {
break;
streamMetaWLock(pMeta);
if (pMeta->startInfo.taskStarting == 1) {
pMeta->startInfo.restartCount += 1;
tqDebug("vgId:%d in start tasks procedure, inc restartCounter by 1, remaining restart:%d", vgId,
pMeta->startInfo.restartCount);
streamMetaWUnLock(pMeta);
return TSDB_CODE_SUCCESS;
}
tqDebug("vgId:%d in start stream tasks procedure, wait for 500ms and recheck", vgId);
taosMsleep(500);
}
pMeta->startInfo.taskStarting = 1;
streamMetaWUnLock(pMeta);
terrno = 0;
tqInfo("vgId:%d tasks are all updated and stopped, restart all tasks, triggered by transId:%d", vgId,
@ -762,11 +659,9 @@ static int32_t restartStreamTasks(SStreamMeta* pMeta, bool isLeader) {
}
if (isLeader && !tsDisableStream) {
resetStreamTaskStatus(pMeta);
tqStreamTaskResetStatus(pMeta);
streamMetaWUnLock(pMeta);
tqInfo("vgId:%d restart all stream tasks after all tasks being updated", vgId);
startStreamTasks(pMeta);
streamMetaStartAllTasks(pMeta);
} else {
streamMetaResetStartInfo(&pMeta->startInfo);
streamMetaWUnLock(pMeta);
@ -784,11 +679,14 @@ int32_t tqStreamTaskProcessRunReq(SStreamMeta* pMeta, SRpcMsg* pMsg, bool isLead
int32_t vgId = pMeta->vgId;
if (taskId == STREAM_EXEC_START_ALL_TASKS_ID) {
startStreamTasks(pMeta);
streamMetaStartAllTasks(pMeta);
return 0;
} else if (taskId == STREAM_EXEC_RESTART_ALL_TASKS_ID) {
restartStreamTasks(pMeta, isLeader);
return 0;
} else if (taskId == STREAM_EXEC_STOP_ALL_TASKS_ID) {
streamMetaStopAllTasks(pMeta);
return 0;
}
SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, taskId);
@ -812,3 +710,151 @@ int32_t tqStreamTaskProcessRunReq(SStreamMeta* pMeta, SRpcMsg* pMsg, bool isLead
return -1;
}
}
int32_t tqStartTaskCompleteCallback(SStreamMeta* pMeta) {
STaskStartInfo* pStartInfo = &pMeta->startInfo;
streamMetaWLock(pMeta);
if (pStartInfo->restartCount > 0) {
pStartInfo->restartCount -= 1;
ASSERT(pStartInfo->taskStarting == 0);
tqDebug("vgId:%d role:%d need to restart all tasks again, restartCounter:%d", pMeta->vgId, pMeta->role,
pStartInfo->restartCount);
streamMetaWUnLock(pMeta);
restartStreamTasks(pMeta, (pMeta->role == NODE_ROLE_LEADER));
} else {
streamMetaWUnLock(pMeta);
tqDebug("vgId:%d start all tasks completed", pMeta->vgId);
}
return TSDB_CODE_SUCCESS;
}
int32_t tqStreamTaskProcessTaskResetReq(SStreamMeta* pMeta, SRpcMsg* pMsg) {
SVPauseStreamTaskReq* pReq = (SVPauseStreamTaskReq*)pMsg->pCont;
SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId);
if (pTask == NULL) {
tqError("vgId:%d process task-reset req, failed to acquire task:0x%x, it may have been dropped already",
pMeta->vgId, pReq->taskId);
return TSDB_CODE_SUCCESS;
}
tqDebug("s-task:%s receive task-reset msg from mnode, reset status and ready for data processing", pTask->id.idStr);
// clear flag set during do checkpoint, and open inputQ for all upstream tasks
if (streamTaskGetStatus(pTask, NULL) == TASK_STATUS__CK) {
streamTaskClearCheckInfo(pTask, true);
streamTaskSetStatusReady(pTask);
}
streamMetaReleaseTask(pMeta, pTask);
return TSDB_CODE_SUCCESS;
}
int32_t tqStreamTaskProcessTaskPauseReq(SStreamMeta* pMeta, char* pMsg){
SVPauseStreamTaskReq* pReq = (SVPauseStreamTaskReq*)pMsg;
SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId);
if (pTask == NULL) {
tqError("vgId:%d process pause req, failed to acquire task:0x%x, it may have been dropped already", pMeta->vgId,
pReq->taskId);
// since task is in [STOP|DROPPING] state, it is safe to assume the pause is active
return TSDB_CODE_SUCCESS;
}
tqDebug("s-task:%s receive pause msg from mnode", pTask->id.idStr);
streamTaskPause(pTask, pMeta);
SStreamTask* pHistoryTask = NULL;
if (HAS_RELATED_FILLHISTORY_TASK(pTask)) {
pHistoryTask = streamMetaAcquireTask(pMeta, pTask->hTaskInfo.id.streamId, pTask->hTaskInfo.id.taskId);
if (pHistoryTask == NULL) {
tqError("vgId:%d process pause req, failed to acquire fill-history task:0x%" PRIx64
", it may have been dropped already",
pMeta->vgId, pTask->hTaskInfo.id.taskId);
streamMetaReleaseTask(pMeta, pTask);
// since task is in [STOP|DROPPING] state, it is safe to assume the pause is active
return TSDB_CODE_SUCCESS;
}
tqDebug("s-task:%s fill-history task handle paused along with related stream task", pHistoryTask->id.idStr);
streamTaskPause(pHistoryTask, pMeta);
streamMetaReleaseTask(pMeta, pHistoryTask);
}
streamMetaReleaseTask(pMeta, pTask);
return TSDB_CODE_SUCCESS;
}
static int32_t tqProcessTaskResumeImpl(void* handle, SStreamTask* pTask, int64_t sversion, int8_t igUntreated, bool fromVnode) {
SStreamMeta *pMeta = fromVnode ? ((STQ*)handle)->pStreamMeta : handle;
int32_t vgId = pMeta->vgId;
if (pTask == NULL) {
return -1;
}
streamTaskResume(pTask);
ETaskStatus status = streamTaskGetStatus(pTask, NULL);
int32_t level = pTask->info.taskLevel;
if (level == TASK_LEVEL__SINK) {
if (status == TASK_STATUS__UNINIT) {
}
streamMetaReleaseTask(pMeta, pTask);
return 0;
}
if (status == TASK_STATUS__READY || status == TASK_STATUS__SCAN_HISTORY || status == TASK_STATUS__CK) {
// no lock needs to secure the access of the version
if (igUntreated && level == TASK_LEVEL__SOURCE && !pTask->info.fillHistory) {
// discard all the data when the stream task is suspended.
walReaderSetSkipToVersion(pTask->exec.pWalReader, sversion);
tqDebug("vgId:%d s-task:%s resume to exec, prev paused version:%" PRId64 ", start from vnode ver:%" PRId64
", schedStatus:%d",
vgId, pTask->id.idStr, pTask->chkInfo.nextProcessVer, sversion, pTask->status.schedStatus);
} else { // from the previous paused version and go on
tqDebug("vgId:%d s-task:%s resume to exec, from paused ver:%" PRId64 ", vnode ver:%" PRId64 ", schedStatus:%d",
vgId, pTask->id.idStr, pTask->chkInfo.nextProcessVer, sversion, pTask->status.schedStatus);
}
if (level == TASK_LEVEL__SOURCE && pTask->info.fillHistory && status == TASK_STATUS__SCAN_HISTORY) {
streamStartScanHistoryAsync(pTask, igUntreated);
} else if (level == TASK_LEVEL__SOURCE && (streamQueueGetNumOfItems(pTask->inputq.queue) == 0)) {
tqScanWalAsync((STQ*)handle, false);
} else {
streamSchedExec(pTask);
}
} else if (status == TASK_STATUS__UNINIT) {
// todo: fill-history task init ?
if (pTask->info.fillHistory == 0) {
EStreamTaskEvent event = /*HAS_RELATED_FILLHISTORY_TASK(pTask) ? TASK_EVENT_INIT_STREAM_SCANHIST : */TASK_EVENT_INIT;
streamTaskHandleEvent(pTask->status.pSM, event);
}
}
streamMetaReleaseTask(pMeta, pTask);
return 0;
}
int32_t tqStreamTaskProcessTaskResumeReq(void* handle, int64_t sversion, char* msg, bool fromVnode){
SVResumeStreamTaskReq* pReq = (SVResumeStreamTaskReq*)msg;
SStreamMeta *pMeta = fromVnode ? ((STQ*)handle)->pStreamMeta : handle;
SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId);
int32_t code = tqProcessTaskResumeImpl(handle, pTask, sversion, pReq->igUntreated, fromVnode);
if (code != 0) {
return code;
}
STaskId* pHTaskId = &pTask->hTaskInfo.id;
SStreamTask* pHistoryTask = streamMetaAcquireTask(pMeta, pHTaskId->streamId, pHTaskId->taskId);
if (pHistoryTask) {
code = tqProcessTaskResumeImpl(handle, pHistoryTask, sversion, pReq->igUntreated, fromVnode);
}
return code;
}

View File

@ -885,6 +885,14 @@ static int32_t tsdbCacheLoadFromRaw(STsdb *pTsdb, tb_uid_t uid, SArray *pLastArr
int32_t code = 0;
rocksdb_writebatch_t *wb = NULL;
SArray *pTmpColArray = NULL;
SIdxKey *idxKey = taosArrayGet(remainCols, 0);
if (idxKey->key.cid != PRIMARYKEY_TIMESTAMP_COL_ID) {
SLastKey *key = &(SLastKey){.ltype = ltype, .uid = uid, .cid = PRIMARYKEY_TIMESTAMP_COL_ID};
taosArrayInsert(remainCols, 0, &(SIdxKey){0, *key});
}
int num_keys = TARRAY_SIZE(remainCols);
int16_t *aCols = taosMemoryMalloc(num_keys * sizeof(int16_t));
int16_t *slotIds = taosMemoryMalloc(num_keys * sizeof(int16_t));
@ -1175,8 +1183,10 @@ int32_t tsdbCacheDel(STsdb *pTsdb, tb_uid_t suid, tb_uid_t uid, TSKEY sKey, TSKE
LRUHandle *h = taosLRUCacheLookup(pTsdb->lruCache, keys_list[i], klen);
if (h) {
SLastCol *pLastCol = (SLastCol *)taosLRUCacheValue(pTsdb->lruCache, h);
if (pLastCol->dirty && (pLastCol->ts <= eKey && pLastCol->ts >= sKey)) {
if (pLastCol->dirty) {
pLastCol->dirty = 0;
}
if (pLastCol->ts <= eKey && pLastCol->ts >= sKey) {
erase = true;
}
taosLRUCacheRelease(pTsdb->lruCache, h, erase);
@ -1189,8 +1199,10 @@ int32_t tsdbCacheDel(STsdb *pTsdb, tb_uid_t suid, tb_uid_t uid, TSKEY sKey, TSKE
h = taosLRUCacheLookup(pTsdb->lruCache, keys_list[num_keys + i], klen);
if (h) {
SLastCol *pLastCol = (SLastCol *)taosLRUCacheValue(pTsdb->lruCache, h);
if (pLastCol->dirty && (pLastCol->ts <= eKey && pLastCol->ts >= sKey)) {
if (pLastCol->dirty) {
pLastCol->dirty = 0;
}
if (pLastCol->ts <= eKey && pLastCol->ts >= sKey) {
erase = true;
}
taosLRUCacheRelease(pTsdb->lruCache, h, erase);
@ -1943,6 +1955,7 @@ typedef struct SFSNextRowIter {
SArray *pIndexList;
int32_t iBrinIndex;
SBrinBlock brinBlock;
SBrinBlock *pBrinBlock;
int32_t iBrinRecord;
SBrinRecord brinRecord;
SBlockData blockData;
@ -2131,6 +2144,11 @@ static int32_t getNextRowFromFS(void *iter, TSDBROW **ppRow, bool *pIgnoreEarlie
pBrinBlk = taosArrayGet(state->pIndexList, state->iBrinIndex);
}
if (!state->pBrinBlock) {
state->pBrinBlock = &state->brinBlock;
} else {
tBrinBlockClear(&state->brinBlock);
}
code = tsdbDataFileReadBrinBlock(state->pr->pFileReader, pBrinBlk, &state->brinBlock);
if (code != TSDB_CODE_SUCCESS) {
goto _err;
@ -2408,6 +2426,16 @@ int32_t clearNextRowFromFS(void *iter) {
state->pBlockData = NULL;
}
if (state->pBrinBlock) {
tBrinBlockDestroy(state->pBrinBlock);
state->pBrinBlock = NULL;
}
if (state->pIndexList) {
taosArrayDestroy(state->pIndexList);
state->pIndexList = NULL;
}
if (state->pTSRow) {
taosMemoryFree(state->pTSRow);
state->pTSRow = NULL;

File diff suppressed because it is too large Load Diff

View File

@ -702,7 +702,7 @@ _exit:
}
// EXPOSED APIS ====================================================================================
int32_t tsdbFSCommit(STsdb *pTsdb) {
static int32_t tsdbFSCommit(STsdb *pTsdb) {
int32_t code = 0;
int32_t lino = 0;
STsdbFS fs = {0};
@ -738,7 +738,7 @@ _exit:
return code;
}
int32_t tsdbFSRollback(STsdb *pTsdb) {
static int32_t tsdbFSRollback(STsdb *pTsdb) {
int32_t code = 0;
int32_t lino = 0;
@ -833,312 +833,3 @@ int32_t tsdbFSClose(STsdb *pTsdb) {
return code;
}
int32_t tsdbFSCopy(STsdb *pTsdb, STsdbFS *pFS) {
int32_t code = 0;
int32_t lino = 0;
pFS->pDelFile = NULL;
if (pFS->aDFileSet) {
taosArrayClear(pFS->aDFileSet);
} else {
pFS->aDFileSet = taosArrayInit(taosArrayGetSize(pTsdb->fs.aDFileSet), sizeof(SDFileSet));
if (pFS->aDFileSet == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
}
if (pTsdb->fs.pDelFile) {
pFS->pDelFile = (SDelFile *)taosMemoryMalloc(sizeof(SDelFile));
if (pFS->pDelFile == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
*pFS->pDelFile = *pTsdb->fs.pDelFile;
}
for (int32_t iSet = 0; iSet < taosArrayGetSize(pTsdb->fs.aDFileSet); iSet++) {
SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet);
SDFileSet fSet = {.diskId = pSet->diskId, .fid = pSet->fid};
// head
fSet.pHeadF = (SHeadFile *)taosMemoryMalloc(sizeof(SHeadFile));
if (fSet.pHeadF == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
*fSet.pHeadF = *pSet->pHeadF;
// data
fSet.pDataF = (SDataFile *)taosMemoryMalloc(sizeof(SDataFile));
if (fSet.pDataF == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
*fSet.pDataF = *pSet->pDataF;
// sma
fSet.pSmaF = (SSmaFile *)taosMemoryMalloc(sizeof(SSmaFile));
if (fSet.pSmaF == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
*fSet.pSmaF = *pSet->pSmaF;
// stt
for (fSet.nSttF = 0; fSet.nSttF < pSet->nSttF; fSet.nSttF++) {
fSet.aSttF[fSet.nSttF] = (SSttFile *)taosMemoryMalloc(sizeof(SSttFile));
if (fSet.aSttF[fSet.nSttF] == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
*fSet.aSttF[fSet.nSttF] = *pSet->aSttF[fSet.nSttF];
}
if (taosArrayPush(pFS->aDFileSet, &fSet) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
}
_exit:
if (code) {
tsdbError("vgId:%d, %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code));
}
return code;
}
int32_t tsdbFSUpsertDelFile(STsdbFS *pFS, SDelFile *pDelFile) {
int32_t code = 0;
if (pDelFile) {
if (pFS->pDelFile == NULL) {
pFS->pDelFile = (SDelFile *)taosMemoryMalloc(sizeof(SDelFile));
if (pFS->pDelFile == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
}
}
*pFS->pDelFile = *pDelFile;
} else {
if (pFS->pDelFile) {
taosMemoryFree(pFS->pDelFile);
pFS->pDelFile = NULL;
}
}
_exit:
return code;
}
int32_t tsdbFSUpsertFSet(STsdbFS *pFS, SDFileSet *pSet) {
int32_t code = 0;
int32_t idx = taosArraySearchIdx(pFS->aDFileSet, pSet, tDFileSetCmprFn, TD_GE);
if (idx < 0) {
idx = taosArrayGetSize(pFS->aDFileSet);
} else {
SDFileSet *pDFileSet = (SDFileSet *)taosArrayGet(pFS->aDFileSet, idx);
int32_t c = tDFileSetCmprFn(pSet, pDFileSet);
if (c == 0) {
*pDFileSet->pHeadF = *pSet->pHeadF;
*pDFileSet->pDataF = *pSet->pDataF;
*pDFileSet->pSmaF = *pSet->pSmaF;
// stt
if (pSet->nSttF > pDFileSet->nSttF) {
ASSERT(pSet->nSttF == pDFileSet->nSttF + 1);
pDFileSet->aSttF[pDFileSet->nSttF] = (SSttFile *)taosMemoryMalloc(sizeof(SSttFile));
if (pDFileSet->aSttF[pDFileSet->nSttF] == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
}
*pDFileSet->aSttF[pDFileSet->nSttF] = *pSet->aSttF[pSet->nSttF - 1];
pDFileSet->nSttF++;
} else if (pSet->nSttF < pDFileSet->nSttF) {
ASSERT(pSet->nSttF == 1);
for (int32_t iStt = 1; iStt < pDFileSet->nSttF; iStt++) {
taosMemoryFree(pDFileSet->aSttF[iStt]);
}
*pDFileSet->aSttF[0] = *pSet->aSttF[0];
pDFileSet->nSttF = 1;
} else {
for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) {
*pDFileSet->aSttF[iStt] = *pSet->aSttF[iStt];
}
}
pDFileSet->diskId = pSet->diskId;
goto _exit;
}
}
ASSERT(pSet->nSttF == 1);
SDFileSet fSet = {.diskId = pSet->diskId, .fid = pSet->fid, .nSttF = 1};
// head
fSet.pHeadF = (SHeadFile *)taosMemoryMalloc(sizeof(SHeadFile));
if (fSet.pHeadF == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
}
*fSet.pHeadF = *pSet->pHeadF;
// data
fSet.pDataF = (SDataFile *)taosMemoryMalloc(sizeof(SDataFile));
if (fSet.pDataF == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
}
*fSet.pDataF = *pSet->pDataF;
// sma
fSet.pSmaF = (SSmaFile *)taosMemoryMalloc(sizeof(SSmaFile));
if (fSet.pSmaF == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
}
*fSet.pSmaF = *pSet->pSmaF;
// stt
fSet.aSttF[0] = (SSttFile *)taosMemoryMalloc(sizeof(SSttFile));
if (fSet.aSttF[0] == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
}
*fSet.aSttF[0] = *pSet->aSttF[0];
if (taosArrayInsert(pFS->aDFileSet, idx, &fSet) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
}
_exit:
return code;
}
int32_t tsdbFSPrepareCommit(STsdb *pTsdb, STsdbFS *pFSNew) {
int32_t code = 0;
int32_t lino = 0;
char tfname[TSDB_FILENAME_LEN];
tsdbGetCurrentFName(pTsdb, NULL, tfname);
// gnrt CURRENT.t
code = tsdbSaveFSToFile(pFSNew, tfname);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
tsdbError("vgId:%d, %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code));
}
return code;
}
int32_t tsdbFSRef(STsdb *pTsdb, STsdbFS *pFS) {
int32_t code = 0;
int32_t nRef;
pFS->aDFileSet = taosArrayInit(taosArrayGetSize(pTsdb->fs.aDFileSet), sizeof(SDFileSet));
if (pFS->aDFileSet == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
}
pFS->pDelFile = pTsdb->fs.pDelFile;
if (pFS->pDelFile) {
nRef = atomic_fetch_add_32(&pFS->pDelFile->nRef, 1);
ASSERT(nRef > 0);
}
SDFileSet fSet;
for (int32_t iSet = 0; iSet < taosArrayGetSize(pTsdb->fs.aDFileSet); iSet++) {
SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet);
fSet = *pSet;
nRef = atomic_fetch_add_32(&pSet->pHeadF->nRef, 1);
ASSERT(nRef > 0);
nRef = atomic_fetch_add_32(&pSet->pDataF->nRef, 1);
ASSERT(nRef > 0);
nRef = atomic_fetch_add_32(&pSet->pSmaF->nRef, 1);
ASSERT(nRef > 0);
for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) {
nRef = atomic_fetch_add_32(&pSet->aSttF[iStt]->nRef, 1);
ASSERT(nRef > 0);
}
if (taosArrayPush(pFS->aDFileSet, &fSet) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
}
}
_exit:
return code;
}
void tsdbFSUnref(STsdb *pTsdb, STsdbFS *pFS) {
int32_t nRef;
char fname[TSDB_FILENAME_LEN];
if (pFS->pDelFile) {
nRef = atomic_sub_fetch_32(&pFS->pDelFile->nRef, 1);
ASSERT(nRef >= 0);
if (nRef == 0) {
tsdbDelFileName(pTsdb, pFS->pDelFile, fname);
(void)taosRemoveFile(fname);
taosMemoryFree(pFS->pDelFile);
}
}
for (int32_t iSet = 0; iSet < taosArrayGetSize(pFS->aDFileSet); iSet++) {
SDFileSet *pSet = (SDFileSet *)taosArrayGet(pFS->aDFileSet, iSet);
// head
nRef = atomic_sub_fetch_32(&pSet->pHeadF->nRef, 1);
ASSERT(nRef >= 0);
if (nRef == 0) {
tsdbHeadFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pHeadF, fname);
(void)taosRemoveFile(fname);
taosMemoryFree(pSet->pHeadF);
}
// data
nRef = atomic_sub_fetch_32(&pSet->pDataF->nRef, 1);
ASSERT(nRef >= 0);
if (nRef == 0) {
tsdbDataFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pDataF, fname);
(void)taosRemoveFile(fname);
taosMemoryFree(pSet->pDataF);
}
// sma
nRef = atomic_sub_fetch_32(&pSet->pSmaF->nRef, 1);
ASSERT(nRef >= 0);
if (nRef == 0) {
tsdbSmaFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pSmaF, fname);
(void)taosRemoveFile(fname);
taosMemoryFree(pSet->pSmaF);
}
// stt
for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) {
nRef = atomic_sub_fetch_32(&pSet->aSttF[iStt]->nRef, 1);
ASSERT(nRef >= 0);
if (nRef == 0) {
tsdbSttFileName(pTsdb, pSet->diskId, pSet->fid, pSet->aSttF[iStt], fname);
(void)taosRemoveFile(fname);
taosMemoryFree(pSet->aSttF[iStt]);
/* code */
}
}
}
taosArrayDestroy(pFS->aDFileSet);
}

View File

@ -805,6 +805,12 @@ int64_t tsdbFSAllocEid(STFileSystem *fs) {
return cid;
}
void tsdbFSUpdateEid(STFileSystem *fs, int64_t cid) {
taosThreadMutexLock(&fs->tsdb->mutex);
fs->neid = TMAX(fs->neid, cid);
taosThreadMutexUnlock(&fs->tsdb->mutex);
}
int32_t tsdbFSEditBegin(STFileSystem *fs, const TFileOpArray *opArray, EFEditT etype) {
int32_t code = 0;
int32_t lino;

View File

@ -52,6 +52,7 @@ int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ev
int32_t tsdbFSDestroyRefRangedSnapshot(TFileSetRangeArray **fsrArr);
// txn
int64_t tsdbFSAllocEid(STFileSystem *fs);
void tsdbFSUpdateEid(STFileSystem *fs, int64_t cid);
int32_t tsdbFSEditBegin(STFileSystem *fs, const TFileOpArray *opArray, EFEditT etype);
int32_t tsdbFSEditCommit(STFileSystem *fs);
int32_t tsdbFSEditAbort(STFileSystem *fs);

View File

@ -14,6 +14,7 @@
*/
#include "tsdbFSetRAW.h"
#include "tsdbFS2.h"
// SFSetRAWWriter ==================================================
typedef struct SFSetRAWWriter {
@ -76,7 +77,7 @@ static int32_t tsdbFSetRAWWriteFileDataBegin(SFSetRAWWriter *writer, STsdbDataRA
.szPage = writer->config->szPage,
.fid = bHdr->file.fid,
.did = writer->config->did,
.cid = writer->config->cid,
.cid = bHdr->file.cid,
.level = writer->config->level,
.file =
@ -84,7 +85,7 @@ static int32_t tsdbFSetRAWWriteFileDataBegin(SFSetRAWWriter *writer, STsdbDataRA
.type = bHdr->file.type,
.fid = bHdr->file.fid,
.did = writer->config->did,
.cid = writer->config->cid,
.cid = bHdr->file.cid,
.size = bHdr->file.size,
.minVer = bHdr->file.minVer,
.maxVer = bHdr->file.maxVer,
@ -94,6 +95,7 @@ static int32_t tsdbFSetRAWWriteFileDataBegin(SFSetRAWWriter *writer, STsdbDataRA
},
};
tsdbFSUpdateEid(config.tsdb->pFS, config.cid);
writer->ctx->offset = 0;
writer->ctx->file = config.file;

View File

@ -181,6 +181,7 @@ int32_t tsdbDeleteTableData(STsdb *pTsdb, int64_t version, tb_uid_t suid, tb_uid
pDelData->sKey = sKey;
pDelData->eKey = eKey;
pDelData->pNext = NULL;
taosWLockLatch(&pTbData->lock);
if (pTbData->pHead == NULL) {
ASSERT(pTbData->pTail == NULL);
pTbData->pHead = pTbData->pTail = pDelData;
@ -188,6 +189,7 @@ int32_t tsdbDeleteTableData(STsdb *pTsdb, int64_t version, tb_uid_t suid, tb_uid
pTbData->pTail->pNext = pDelData;
pTbData->pTail = pDelData;
}
taosWUnLockLatch(&pTbData->lock);
pMemTable->nDel++;
pMemTable->minVer = TMIN(pMemTable->minVer, version);
@ -401,6 +403,7 @@ static int32_t tsdbGetOrCreateTbData(SMemTable *pMemTable, tb_uid_t suid, tb_uid
SL_NODE_BACKWARD(pTbData->sl.pHead, iLevel) = NULL;
SL_NODE_FORWARD(pTbData->sl.pTail, iLevel) = NULL;
}
taosInitRWLatch(&pTbData->lock);
taosWLockLatch(&pMemTable->latch);

View File

@ -120,7 +120,7 @@ static int32_t updateBlockSMAInfo(STSchema* pSchema, SBlockLoadSuppInfo* pSupInf
} else if (pTCol->colId < pSupInfo->colId[j]) { // do nothing
i += 1;
} else {
return TSDB_CODE_INVALID_PARA;
return TSDB_CODE_TDB_INVALID_TABLE_SCHEMA_VER;
}
}
@ -676,6 +676,10 @@ static int32_t doCopyColVal(SColumnInfoData* pColInfoData, int32_t rowIndex, int
}
static SFileDataBlockInfo* getCurrentBlockInfo(SDataBlockIter* pBlockIter) {
if (pBlockIter->blockList == NULL) {
return NULL;
}
size_t num = TARRAY_SIZE(pBlockIter->blockList);
if (num == 0) {
ASSERT(pBlockIter->numOfBlocks == num);
@ -882,6 +886,21 @@ static void copyNumericCols(const SColData* pData, SFileBlockDumpInfo* pDumpInfo
}
}
static void blockInfoToRecord(SBrinRecord* record, SFileDataBlockInfo* pBlockInfo){
record->uid = pBlockInfo->uid;
record->firstKey = pBlockInfo->firstKey;
record->lastKey = pBlockInfo->lastKey;
record->minVer = pBlockInfo->minVer;
record->maxVer = pBlockInfo->maxVer;
record->blockOffset = pBlockInfo->blockOffset;
record->smaOffset = pBlockInfo->smaOffset;
record->blockSize = pBlockInfo->blockSize;
record->blockKeySize = pBlockInfo->blockKeySize;
record->smaSize = pBlockInfo->smaSize;
record->numRow = pBlockInfo->numRow;
record->count = pBlockInfo->count;
}
static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader) {
SReaderStatus* pStatus = &pReader->status;
SDataBlockIter* pBlockIter = &pStatus->blockIter;
@ -899,7 +918,9 @@ static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader) {
bool asc = ASCENDING_TRAVERSE(pReader->info.order);
int32_t step = asc ? 1 : -1;
SBrinRecord* pRecord = &pBlockInfo->record;
SBrinRecord tmp;
blockInfoToRecord(&tmp, pBlockInfo);
SBrinRecord* pRecord = &tmp;
// no data exists, return directly.
if (pBlockData->nRow == 0 || pBlockData->aTSKEY == 0) {
@ -1092,14 +1113,16 @@ static int32_t doLoadFileBlockData(STsdbReader* pReader, SDataBlockIter* pBlockI
SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(pBlockIter);
SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;
SBrinRecord* pRecord = &pBlockInfo->record;
SBrinRecord tmp;
blockInfoToRecord(&tmp, pBlockInfo);
SBrinRecord* pRecord = &tmp;
code = tsdbDataFileReadBlockDataByColumn(pReader->pFileReader, pRecord, pBlockData, pSchema, &pSup->colId[1],
pSup->numOfCols - 1);
if (code != TSDB_CODE_SUCCESS) {
tsdbError("%p error occurs in loading file block, global index:%d, table index:%d, brange:%" PRId64 "-%" PRId64
", rows:%d, code:%s %s",
pReader, pBlockIter->index, pBlockInfo->tbBlockIdx, pBlockInfo->record.firstKey,
pBlockInfo->record.lastKey, pBlockInfo->record.numRow, tstrerror(code), pReader->idStr);
pReader, pBlockIter->index, pBlockInfo->tbBlockIdx, pBlockInfo->firstKey,
pBlockInfo->lastKey, pBlockInfo->numRow, tstrerror(code), pReader->idStr);
return code;
}
@ -1120,10 +1143,10 @@ static int32_t doLoadFileBlockData(STsdbReader* pReader, SDataBlockIter* pBlockI
* This is an two rectangles overlap cases.
*/
static int32_t dataBlockPartiallyRequired(STimeWindow* pWindow, SVersionRange* pVerRange, SFileDataBlockInfo* pBlock) {
return (pWindow->ekey < pBlock->record.lastKey && pWindow->ekey >= pBlock->record.firstKey) ||
(pWindow->skey > pBlock->record.firstKey && pWindow->skey <= pBlock->record.lastKey) ||
(pVerRange->minVer > pBlock->record.minVer && pVerRange->minVer <= pBlock->record.maxVer) ||
(pVerRange->maxVer < pBlock->record.maxVer && pVerRange->maxVer >= pBlock->record.minVer);
return (pWindow->ekey < pBlock->lastKey && pWindow->ekey >= pBlock->firstKey) ||
(pWindow->skey > pBlock->firstKey && pWindow->skey <= pBlock->lastKey) ||
(pVerRange->minVer > pBlock->minVer && pVerRange->minVer <= pBlock->maxVer) ||
(pVerRange->maxVer < pBlock->maxVer && pVerRange->maxVer >= pBlock->minVer);
}
static bool getNeighborBlockOfSameTable(SDataBlockIter* pBlockIter, SFileDataBlockInfo* pBlockInfo,
@ -1142,7 +1165,7 @@ static bool getNeighborBlockOfSameTable(SDataBlockIter* pBlockIter, SFileDataBlo
STableDataBlockIdx* pTableDataBlockIdx =
taosArrayGet(pTableBlockScanInfo->pBlockIdxList, pBlockInfo->tbBlockIdx + step);
SFileDataBlockInfo* p = taosArrayGet(pBlockIter->blockList, pTableDataBlockIdx->globalIndex);
memcpy(pRecord, &p->record, sizeof(SBrinRecord));
blockInfoToRecord(pRecord, p);
*nextIndex = pBlockInfo->tbBlockIdx + step;
return true;
@ -1211,9 +1234,9 @@ static int32_t setFileBlockActiveInBlockIter(STsdbReader* pReader, SDataBlockIte
static bool overlapWithNeighborBlock2(SFileDataBlockInfo* pBlock, SBrinRecord* pRec, int32_t order) {
// it is the last block in current file, no chance to overlap with neighbor blocks.
if (ASCENDING_TRAVERSE(order)) {
return pBlock->record.lastKey == pRec->firstKey;
return pBlock->lastKey == pRec->firstKey;
} else {
return pBlock->record.firstKey == pRec->lastKey;
return pBlock->firstKey == pRec->lastKey;
}
}
@ -1223,9 +1246,9 @@ static int64_t getBoarderKeyInFiles(SFileDataBlockInfo* pBlock, STableBlockScanI
int64_t key = 0;
if (pScanInfo->sttKeyInfo.status == STT_FILE_HAS_DATA) {
int64_t keyInStt = pScanInfo->sttKeyInfo.nextProcKey;
key = ascScan ? TMIN(pBlock->record.firstKey, keyInStt) : TMAX(pBlock->record.lastKey, keyInStt);
key = ascScan ? TMIN(pBlock->firstKey, keyInStt) : TMAX(pBlock->lastKey, keyInStt);
} else {
key = ascScan ? pBlock->record.firstKey : pBlock->record.lastKey;
key = ascScan ? pBlock->firstKey : pBlock->lastKey;
}
return key;
@ -1241,8 +1264,8 @@ static bool bufferDataInFileBlockGap(TSDBKEY keyInBuf, SFileDataBlockInfo* pBloc
}
static bool keyOverlapFileBlock(TSDBKEY key, SFileDataBlockInfo* pBlock, SVersionRange* pVerRange) {
return (key.ts >= pBlock->record.firstKey && key.ts <= pBlock->record.lastKey) &&
(pBlock->record.maxVer >= pVerRange->minVer) && (pBlock->record.minVer <= pVerRange->maxVer);
return (key.ts >= pBlock->firstKey && key.ts <= pBlock->lastKey) &&
(pBlock->maxVer >= pVerRange->minVer) && (pBlock->minVer <= pVerRange->maxVer);
}
static void getBlockToLoadInfo(SDataBlockToLoadInfo* pInfo, SFileDataBlockInfo* pBlockInfo,
@ -1258,18 +1281,20 @@ static void getBlockToLoadInfo(SDataBlockToLoadInfo* pInfo, SFileDataBlockInfo*
pInfo->overlapWithNeighborBlock = overlapWithNeighborBlock2(pBlockInfo, &rec, pReader->info.order);
}
SBrinRecord pRecord;
blockInfoToRecord(&pRecord, pBlockInfo);
// has duplicated ts of different version in this block
pInfo->hasDupTs = (pBlockInfo->record.numRow > pBlockInfo->record.count) || (pBlockInfo->record.count <= 0);
pInfo->overlapWithDelInfo = overlapWithDelSkyline(pScanInfo, &pBlockInfo->record, pReader->info.order);
pInfo->hasDupTs = (pBlockInfo->numRow > pBlockInfo->count) || (pBlockInfo->count <= 0);
pInfo->overlapWithDelInfo = overlapWithDelSkyline(pScanInfo, &pRecord, pReader->info.order);
ASSERT(pScanInfo->sttKeyInfo.status != STT_FILE_READER_UNINIT);
if (pScanInfo->sttKeyInfo.status == STT_FILE_HAS_DATA) {
int64_t nextProcKeyInStt = pScanInfo->sttKeyInfo.nextProcKey;
pInfo->overlapWithSttBlock =
!(pBlockInfo->record.lastKey < nextProcKeyInStt || pBlockInfo->record.firstKey > nextProcKeyInStt);
!(pBlockInfo->lastKey < nextProcKeyInStt || pBlockInfo->firstKey > nextProcKeyInStt);
}
pInfo->moreThanCapcity = pBlockInfo->record.numRow > pReader->resBlockInfo.capacity;
pInfo->moreThanCapcity = pBlockInfo->numRow > pReader->resBlockInfo.capacity;
pInfo->partiallyRequired = dataBlockPartiallyRequired(&pReader->info.window, &pReader->info.verRange, pBlockInfo);
pInfo->overlapWithKeyInBuf = keyOverlapFileBlock(keyInBuf, pBlockInfo, &pReader->info.verRange);
}
@ -1325,8 +1350,8 @@ static int32_t buildDataBlockFromBuf(STsdbReader* pReader, STableBlockScanInfo*
tsdbDebug("%p build data block from cache completed, elapsed time:%.2f ms, numOfRows:%" PRId64 ", brange:%" PRId64
" - %" PRId64 ", uid:%" PRIu64 ", %s",
pReader, el, pBlock->info.rows, pBlock->info.window.skey, pBlock->info.window.ekey,
pBlockScanInfo->uid, pReader->idStr);
pReader, el, pBlock->info.rows, pBlock->info.window.skey, pBlock->info.window.ekey, pBlockScanInfo->uid,
pReader->idStr);
pReader->cost.buildmemBlock += el;
return code;
@ -1360,7 +1385,8 @@ static bool tryCopyDistinctRowFromFileBlock(STsdbReader* pReader, SBlockData* pB
static bool nextRowFromSttBlocks(SSttBlockReader* pSttBlockReader, STableBlockScanInfo* pScanInfo,
SVersionRange* pVerRange) {
int32_t step = ASCENDING_TRAVERSE(pSttBlockReader->order) ? 1 : -1;
int32_t order = pSttBlockReader->order;
int32_t step = ASCENDING_TRAVERSE(order) ? 1 : -1;
while (1) {
bool hasVal = tMergeTreeNext(&pSttBlockReader->mergeTree);
@ -1377,21 +1403,21 @@ static bool nextRowFromSttBlocks(SSttBlockReader* pSttBlockReader, STableBlockSc
pSttBlockReader->currentKey = key;
pScanInfo->sttKeyInfo.nextProcKey = key;
if (!hasBeenDropped(pScanInfo->delSkyline, &pScanInfo->sttBlockDelIndex, key, ver, pSttBlockReader->order,
pVerRange)) {
if (pScanInfo->delSkyline != NULL && TARRAY_SIZE(pScanInfo->delSkyline) > 0) {
if (!hasBeenDropped(pScanInfo->delSkyline, &pScanInfo->sttBlockDelIndex, key, ver, order, pVerRange)) {
pScanInfo->sttKeyInfo.status = STT_FILE_HAS_DATA;
return true;
}
} else {
pScanInfo->sttKeyInfo.status = STT_FILE_HAS_DATA;
return true;
}
}
}
static void doPinSttBlock(SSttBlockReader* pSttBlockReader) {
tMergeTreePinSttBlock(&pSttBlockReader->mergeTree);
}
static void doPinSttBlock(SSttBlockReader* pSttBlockReader) { tMergeTreePinSttBlock(&pSttBlockReader->mergeTree); }
static void doUnpinSttBlock(SSttBlockReader* pSttBlockReader) {
tMergeTreeUnpinSttBlock(&pSttBlockReader->mergeTree);
}
static void doUnpinSttBlock(SSttBlockReader* pSttBlockReader) { tMergeTreeUnpinSttBlock(&pSttBlockReader->mergeTree); }
static bool tryCopyDistinctRowFromSttBlock(TSDBROW* fRow, SSttBlockReader* pSttBlockReader,
STableBlockScanInfo* pScanInfo, int64_t ts, STsdbReader* pReader,
@ -1468,7 +1494,7 @@ static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo*
SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;
int64_t tsLast = INT64_MIN;
if (hasDataInSttBlock(pBlockScanInfo)) {
if (hasDataInSttBlock(pBlockScanInfo) && (!pBlockScanInfo->cleanSttBlocks)) {
tsLast = getCurrentKeyInSttBlock(pSttBlockReader);
}
@ -1487,7 +1513,7 @@ static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo*
int64_t minKey = 0;
if (pReader->info.order == TSDB_ORDER_ASC) {
minKey = INT64_MAX; // chosen the minimum value
if (minKey > tsLast && hasDataInSttBlock(pBlockScanInfo)) {
if (minKey > tsLast && hasDataInSttBlock(pBlockScanInfo) && (!pBlockScanInfo->cleanSttBlocks)) {
minKey = tsLast;
}
@ -1500,7 +1526,7 @@ static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo*
}
} else {
minKey = INT64_MIN;
if (minKey < tsLast && hasDataInSttBlock(pBlockScanInfo)) {
if (minKey < tsLast && hasDataInSttBlock(pBlockScanInfo) && (!pBlockScanInfo->cleanSttBlocks)) {
minKey = tsLast;
}
@ -1530,8 +1556,7 @@ static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo*
if (code != TSDB_CODE_SUCCESS) {
return code;
}
doMergeRowsInSttBlock(pSttBlockReader, pBlockScanInfo, tsLast, pMerger, &pReader->info.verRange,
pReader->idStr);
doMergeRowsInSttBlock(pSttBlockReader, pBlockScanInfo, tsLast, pMerger, &pReader->info.verRange, pReader->idStr);
}
if (minKey == k.ts) {
@ -1580,8 +1605,7 @@ static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo*
if (code != TSDB_CODE_SUCCESS) {
return code;
}
doMergeRowsInSttBlock(pSttBlockReader, pBlockScanInfo, tsLast, pMerger, &pReader->info.verRange,
pReader->idStr);
doMergeRowsInSttBlock(pSttBlockReader, pBlockScanInfo, tsLast, pMerger, &pReader->info.verRange, pReader->idStr);
}
if (minKey == key) {
@ -1606,91 +1630,6 @@ static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo*
return code;
}
static int32_t doMergeFileBlockAndLastBlock(SSttBlockReader* pSttBlockReader, STsdbReader* pReader,
STableBlockScanInfo* pBlockScanInfo, SBlockData* pBlockData,
bool mergeBlockData) {
SRowMerger* pMerger = &pReader->status.merger;
SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;
int64_t tsLastBlock = getCurrentKeyInSttBlock(pSttBlockReader);
bool copied = false;
int32_t code = TSDB_CODE_SUCCESS;
SRow* pTSRow = NULL;
TSDBROW* pRow = tMergeTreeGetRow(&pSttBlockReader->mergeTree);
// create local variable to hold the row value
TSDBROW fRow = {.iRow = pRow->iRow, .type = TSDBROW_COL_FMT, .pBlockData = pRow->pBlockData};
tsdbTrace("fRow ptr:%p, %d, uid:%" PRIu64 ", ts:%" PRId64 " %s", pRow->pBlockData, pRow->iRow, pSttBlockReader->uid,
fRow.pBlockData->aTSKEY[fRow.iRow], pReader->idStr);
// only stt block exists
if ((!mergeBlockData) || (tsLastBlock != pBlockData->aTSKEY[pDumpInfo->rowIndex])) {
code = tryCopyDistinctRowFromSttBlock(&fRow, pSttBlockReader, pBlockScanInfo, tsLastBlock, pReader, &copied);
if (code) {
return code;
}
if (copied) {
pBlockScanInfo->lastProcKey = tsLastBlock;
return TSDB_CODE_SUCCESS;
} else {
code = tsdbRowMergerAdd(pMerger, &fRow, NULL);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
TSDBROW* pRow1 = tMergeTreeGetRow(&pSttBlockReader->mergeTree);
tsdbRowMergerAdd(pMerger, pRow1, NULL);
doMergeRowsInSttBlock(pSttBlockReader, pBlockScanInfo, tsLastBlock, pMerger, &pReader->info.verRange,
pReader->idStr);
code = tsdbRowMergerGetRow(pMerger, &pTSRow);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
code = doAppendRowFromTSRow(pReader->resBlockInfo.pResBlock, pReader, pTSRow, pBlockScanInfo);
taosMemoryFree(pTSRow);
tsdbRowMergerClear(pMerger);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
}
} else { // not merge block data
code = tsdbRowMergerAdd(pMerger, &fRow, NULL);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
doMergeRowsInSttBlock(pSttBlockReader, pBlockScanInfo, tsLastBlock, pMerger, &pReader->info.verRange,
pReader->idStr);
// merge with block data if ts == key
if (tsLastBlock == pBlockData->aTSKEY[pDumpInfo->rowIndex]) {
doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader);
}
code = tsdbRowMergerGetRow(pMerger, &pTSRow);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
code = doAppendRowFromTSRow(pReader->resBlockInfo.pResBlock, pReader, pTSRow, pBlockScanInfo);
taosMemoryFree(pTSRow);
tsdbRowMergerClear(pMerger);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
}
return TSDB_CODE_SUCCESS;
}
static int32_t mergeFileBlockAndSttBlock(STsdbReader* pReader, SSttBlockReader* pSttBlockReader, int64_t key,
STableBlockScanInfo* pBlockScanInfo, SBlockData* pBlockData) {
SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;
@ -1714,6 +1653,9 @@ static int32_t mergeFileBlockAndSttBlock(STsdbReader* pReader, SSttBlockReader*
} else if ((!dataInDataFile) && dataInSttFile) {
// no data ile block exists
return mergeRowsInSttBlocks(pSttBlockReader, pBlockScanInfo, pReader);
} else if (pBlockScanInfo->cleanSttBlocks && pReader->info.execMode == READER_EXEC_ROWS) {
// opt model for count data in stt file, which is not overlap with data blocks in files.
return mergeRowsInFileBlocks(pBlockData, pBlockScanInfo, key, pReader);
} else {
// row in both stt file blocks and data file blocks
TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex);
@ -1793,7 +1735,7 @@ static int32_t doMergeMultiLevelRows(STsdbReader* pReader, STableBlockScanInfo*
TSDBROW* piRow = getValidMemRow(&pBlockScanInfo->iiter, pDelList, pReader);
int64_t tsLast = INT64_MIN;
if (hasDataInSttBlock(pBlockScanInfo)) {
if (hasDataInSttBlock(pBlockScanInfo) && (!pBlockScanInfo->cleanSttBlocks)) {
tsLast = getCurrentKeyInSttBlock(pSttBlockReader);
}
@ -1842,7 +1784,7 @@ static int32_t doMergeMultiLevelRows(STsdbReader* pReader, STableBlockScanInfo*
minKey = key;
}
if (minKey > tsLast && hasDataInSttBlock(pBlockScanInfo)) {
if (minKey > tsLast && hasDataInSttBlock(pBlockScanInfo) && (!pBlockScanInfo->cleanSttBlocks)) {
minKey = tsLast;
}
} else {
@ -1859,7 +1801,7 @@ static int32_t doMergeMultiLevelRows(STsdbReader* pReader, STableBlockScanInfo*
minKey = key;
}
if (minKey < tsLast && hasDataInSttBlock(pBlockScanInfo)) {
if (minKey < tsLast && hasDataInSttBlock(pBlockScanInfo) && (!pBlockScanInfo->cleanSttBlocks)) {
minKey = tsLast;
}
}
@ -1884,8 +1826,7 @@ static int32_t doMergeMultiLevelRows(STsdbReader* pReader, STableBlockScanInfo*
return code;
}
doMergeRowsInSttBlock(pSttBlockReader, pBlockScanInfo, tsLast, pMerger, &pReader->info.verRange,
pReader->idStr);
doMergeRowsInSttBlock(pSttBlockReader, pBlockScanInfo, tsLast, pMerger, &pReader->info.verRange, pReader->idStr);
}
if (minKey == ik.ts) {
@ -1943,8 +1884,7 @@ static int32_t doMergeMultiLevelRows(STsdbReader* pReader, STableBlockScanInfo*
return code;
}
doMergeRowsInSttBlock(pSttBlockReader, pBlockScanInfo, tsLast, pMerger, &pReader->info.verRange,
pReader->idStr);
doMergeRowsInSttBlock(pSttBlockReader, pBlockScanInfo, tsLast, pMerger, &pReader->info.verRange, pReader->idStr);
}
if (minKey == key) {
@ -2033,32 +1973,39 @@ static int32_t initMemDataIterator(STableBlockScanInfo* pBlockScanInfo, STsdbRea
return TSDB_CODE_SUCCESS;
}
static bool isValidFileBlockRow(SBlockData* pBlockData, SFileBlockDumpInfo* pDumpInfo,
STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader) {
static bool isValidFileBlockRow(SBlockData* pBlockData, int32_t rowIndex, STableBlockScanInfo* pBlockScanInfo, bool asc,
STsdbReaderInfo* pInfo) {
// it is an multi-table data block
if (pBlockData->aUid != NULL) {
uint64_t uid = pBlockData->aUid[pDumpInfo->rowIndex];
uint64_t uid = pBlockData->aUid[rowIndex];
if (uid != pBlockScanInfo->uid) { // move to next row
return false;
}
}
// check for version and time range
int64_t ver = pBlockData->aVersion[pDumpInfo->rowIndex];
if (ver > pReader->info.verRange.maxVer || ver < pReader->info.verRange.minVer) {
int64_t ver = pBlockData->aVersion[rowIndex];
if (ver > pInfo->verRange.maxVer || ver < pInfo->verRange.minVer) {
return false;
}
int64_t ts = pBlockData->aTSKEY[pDumpInfo->rowIndex];
if (ts > pReader->info.window.ekey || ts < pReader->info.window.skey) {
int64_t ts = pBlockData->aTSKEY[rowIndex];
if (ts > pInfo->window.ekey || ts < pInfo->window.skey) {
return false;
}
if (hasBeenDropped(pBlockScanInfo->delSkyline, &pBlockScanInfo->fileDelIndex, ts, ver, pReader->info.order,
&pReader->info.verRange)) {
if ((asc && (ts <= pBlockScanInfo->lastProcKey)) || ((!asc) && (ts >= pBlockScanInfo->lastProcKey))) {
return false;
}
if (pBlockScanInfo->delSkyline != NULL && TARRAY_SIZE(pBlockScanInfo->delSkyline) > 0) {
bool dropped = hasBeenDropped(pBlockScanInfo->delSkyline, &pBlockScanInfo->fileDelIndex, ts, ver,
pInfo->order, &pInfo->verRange);
if (dropped) {
return false;
}
}
return true;
}
@ -2142,12 +2089,18 @@ static bool initSttBlockReader(SSttBlockReader* pSttBlockReader, STableBlockScan
}
pScanInfo->sttKeyInfo.status = taosArrayGetSize(info.pTimeWindowList) ? STT_FILE_HAS_DATA : STT_FILE_NO_DATA;
pScanInfo->sttKeyInfo.nextProcKey = ASCENDING_TRAVERSE(pReader->info.order)? pScanInfo->sttWindow.skey:pScanInfo->sttWindow.ekey;
pScanInfo->sttKeyInfo.nextProcKey =
ASCENDING_TRAVERSE(pReader->info.order) ? pScanInfo->sttWindow.skey : pScanInfo->sttWindow.ekey;
hasData = true;
} else {
} else { // not clean stt blocks
INIT_TIMEWINDOW(&pScanInfo->sttWindow); //reset the time window
pScanInfo->sttBlockReturned = false;
hasData = nextRowFromSttBlocks(pSttBlockReader, pScanInfo, &pReader->info.verRange);
}
} else {
pScanInfo->cleanSttBlocks = false;
INIT_TIMEWINDOW(&pScanInfo->sttWindow); //reset the time window
pScanInfo->sttBlockReturned = false;
hasData = nextRowFromSttBlocks(pSttBlockReader, pScanInfo, &pReader->info.verRange);
}
@ -2160,9 +2113,7 @@ static bool initSttBlockReader(SSttBlockReader* pSttBlockReader, STableBlockScan
return hasData;
}
static bool hasDataInSttBlock(STableBlockScanInfo *pInfo) {
return pInfo->sttKeyInfo.status == STT_FILE_HAS_DATA;
}
static bool hasDataInSttBlock(STableBlockScanInfo* pInfo) { return pInfo->sttKeyInfo.status == STT_FILE_HAS_DATA; }
bool hasDataInFileBlock(const SBlockData* pBlockData, const SFileBlockDumpInfo* pDumpInfo) {
if ((pBlockData->nRow > 0) && (pBlockData->nRow != pDumpInfo->totalRows)) {
@ -2217,7 +2168,8 @@ int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pBloc
}
}
int32_t mergeRowsInSttBlocks(SSttBlockReader* pSttBlockReader, STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader) {
int32_t mergeRowsInSttBlocks(SSttBlockReader* pSttBlockReader, STableBlockScanInfo* pBlockScanInfo,
STsdbReader* pReader) {
bool copied = false;
SRow* pTSRow = NULL;
int64_t tsLastBlock = getCurrentKeyInSttBlock(pSttBlockReader);
@ -2358,17 +2310,14 @@ static int32_t buildComposedDataBlock(STsdbReader* pReader) {
SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter);
SSttBlockReader* pSttBlockReader = pReader->status.fileIter.pSttBlockReader;
SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;
SBrinRecord* pRecord = NULL;
STableBlockScanInfo* pBlockScanInfo = NULL;
if (pBlockInfo == NULL) {
return 0;
}
pRecord = &pBlockInfo->record;
if (pReader->pIgnoreTables && taosHashGet(*pReader->pIgnoreTables, &pBlockInfo->uid, sizeof(pBlockInfo->uid))) {
setBlockAllDumped(pDumpInfo, pRecord->lastKey, pReader->info.order);
setBlockAllDumped(pDumpInfo, pBlockInfo->lastKey, pReader->info.order);
return code;
}
@ -2377,13 +2326,12 @@ static int32_t buildComposedDataBlock(STsdbReader* pReader) {
goto _end;
}
pRecord = &pBlockInfo->record;
TSDBKEY keyInBuf = getCurrentKeyInBuf(pBlockScanInfo, pReader);
// it is a clean block, load it directly
int64_t cap = pReader->resBlockInfo.capacity;
if (isCleanFileDataBlock(pReader, pBlockInfo, pBlockScanInfo, keyInBuf) && (pRecord->numRow <= cap)) {
if (((asc && (pRecord->firstKey < keyInBuf.ts)) || (!asc && (pRecord->lastKey > keyInBuf.ts))) &&
if (isCleanFileDataBlock(pReader, pBlockInfo, pBlockScanInfo, keyInBuf) && (pBlockInfo->numRow <= cap)) {
if (((asc && (pBlockInfo->firstKey < keyInBuf.ts)) || (!asc && (pBlockInfo->lastKey > keyInBuf.ts))) &&
(pBlockScanInfo->sttKeyInfo.status == STT_FILE_NO_DATA)) {
code = copyBlockDataToSDataBlock(pReader);
if (code) {
@ -2391,7 +2339,7 @@ static int32_t buildComposedDataBlock(STsdbReader* pReader) {
}
// record the last key value
pBlockScanInfo->lastProcKey = asc ? pRecord->lastKey : pRecord->firstKey;
pBlockScanInfo->lastProcKey = asc ? pBlockInfo->lastKey : pBlockInfo->firstKey;
goto _end;
}
}
@ -2404,7 +2352,7 @@ static int32_t buildComposedDataBlock(STsdbReader* pReader) {
{
while (pBlockData->nRow > 0 && pBlockData->uid == pBlockScanInfo->uid) {
// find the first qualified row in data block
if (isValidFileBlockRow(pBlockData, pDumpInfo, pBlockScanInfo, pReader)) {
if (isValidFileBlockRow(pBlockData, pDumpInfo->rowIndex, pBlockScanInfo, asc, &pReader->info)) {
hasBlockData = true;
break;
}
@ -2419,7 +2367,7 @@ static int32_t buildComposedDataBlock(STsdbReader* pReader) {
bool loadNeighbor = false;
code = loadNeighborIfOverlap(pBlockInfo, pBlockScanInfo, pReader, &loadNeighbor);
if ((!loadNeighbor) || (code != 0)) {
setBlockAllDumped(pDumpInfo, pRecord->lastKey, pReader->info.order);
setBlockAllDumped(pDumpInfo, pBlockInfo->lastKey, pReader->info.order);
break;
}
}
@ -2438,7 +2386,7 @@ static int32_t buildComposedDataBlock(STsdbReader* pReader) {
// currently loaded file data block is consumed
if ((pBlockData->nRow > 0) && (pDumpInfo->rowIndex >= pBlockData->nRow || pDumpInfo->rowIndex < 0)) {
setBlockAllDumped(pDumpInfo, pRecord->lastKey, pReader->info.order);
setBlockAllDumped(pDumpInfo, pBlockInfo->lastKey, pReader->info.order);
break;
}
@ -2798,7 +2746,7 @@ static bool notOverlapWithSttFiles(SFileDataBlockInfo* pBlockInfo, STableBlockSc
return true;
} else {
int64_t keyInStt = pScanInfo->sttKeyInfo.nextProcKey;
return (asc && pBlockInfo->record.lastKey < keyInStt) || (!asc && pBlockInfo->record.firstKey > keyInStt);
return (asc && pBlockInfo->lastKey < keyInStt) || (!asc && pBlockInfo->firstKey > keyInStt);
}
}
@ -2812,7 +2760,7 @@ static int32_t doBuildDataBlock(STsdbReader* pReader) {
int32_t code = TSDB_CODE_SUCCESS;
if (pReader->pIgnoreTables && taosHashGet(*pReader->pIgnoreTables, &pBlockInfo->uid, sizeof(pBlockInfo->uid))) {
setBlockAllDumped(&pStatus->fBlockDumpInfo, pBlockInfo->record.lastKey, pReader->info.order);
setBlockAllDumped(&pStatus->fBlockDumpInfo, pBlockInfo->lastKey, pReader->info.order);
return code;
}
@ -2847,20 +2795,20 @@ static int32_t doBuildDataBlock(STsdbReader* pReader) {
if (notOverlapWithSttFiles(pBlockInfo, pScanInfo, asc)) {
// whole block is required, return it directly
SDataBlockInfo* pInfo = &pReader->resBlockInfo.pResBlock->info;
pInfo->rows = pBlockInfo->record.numRow;
pInfo->rows = pBlockInfo->numRow;
pInfo->id.uid = pScanInfo->uid;
pInfo->dataLoad = 0;
pInfo->window = (STimeWindow){.skey = pBlockInfo->record.firstKey, .ekey = pBlockInfo->record.lastKey};
pInfo->window = (STimeWindow){.skey = pBlockInfo->firstKey, .ekey = pBlockInfo->lastKey};
setComposedBlockFlag(pReader, false);
setBlockAllDumped(&pStatus->fBlockDumpInfo, pBlockInfo->record.lastKey, pReader->info.order);
setBlockAllDumped(&pStatus->fBlockDumpInfo, pBlockInfo->lastKey, pReader->info.order);
// update the last key for the corresponding table
pScanInfo->lastProcKey = asc ? pInfo->window.ekey : pInfo->window.skey;
tsdbDebug("%p uid:%" PRIu64
" clean file block retrieved from file, global index:%d, "
"table index:%d, rows:%d, brange:%" PRId64 "-%" PRId64 ", %s",
pReader, pScanInfo->uid, pBlockIter->index, pBlockInfo->tbBlockIdx, pBlockInfo->record.numRow,
pBlockInfo->record.firstKey, pBlockInfo->record.lastKey, pReader->idStr);
pReader, pScanInfo->uid, pBlockIter->index, pBlockInfo->tbBlockIdx, pBlockInfo->numRow,
pBlockInfo->firstKey, pBlockInfo->lastKey, pReader->idStr);
} else {
SBlockData* pBData = &pReader->status.fileBlockData;
tBlockDataReset(pBData);
@ -2889,10 +2837,10 @@ static int32_t doBuildDataBlock(STsdbReader* pReader) {
// data in stt now overlaps with current active file data block, need to composed with file data block.
int64_t lastKeyInStt = getCurrentKeyInSttBlock(pSttBlockReader);
if ((lastKeyInStt >= pBlockInfo->record.firstKey && asc) ||
(lastKeyInStt <= pBlockInfo->record.lastKey && (!asc))) {
if ((lastKeyInStt >= pBlockInfo->firstKey && asc) ||
(lastKeyInStt <= pBlockInfo->lastKey && (!asc))) {
tsdbDebug("%p lastKeyInStt:%" PRId64 ", overlap with file block, brange:%" PRId64 "-%" PRId64 " %s", pReader,
lastKeyInStt, pBlockInfo->record.firstKey, pBlockInfo->record.lastKey, pReader->idStr);
lastKeyInStt, pBlockInfo->firstKey, pBlockInfo->lastKey, pReader->idStr);
break;
}
}
@ -2915,7 +2863,8 @@ static int32_t doBuildDataBlock(STsdbReader* pReader) {
static int32_t buildBlockFromBufferSeqForPreFileset(STsdbReader* pReader, int64_t endKey) {
SReaderStatus* pStatus = &pReader->status;
tsdbDebug("seq load data blocks from cache that preceeds fileset %d, %s", pReader->status.pCurrentFileset->fid, pReader->idStr);
tsdbDebug("seq load data blocks from cache that preceeds fileset %d, %s", pReader->status.pCurrentFileset->fid,
pReader->idStr);
while (1) {
if (pReader->code != TSDB_CODE_SUCCESS) {
@ -3008,8 +2957,8 @@ static void initBlockDumpInfo(STsdbReader* pReader, SDataBlockIter* pBlockIter)
lastKey = pScanInfo->lastProcKey;
}
pDumpInfo->totalRows = pBlockInfo->record.numRow;
pDumpInfo->rowIndex = ASCENDING_TRAVERSE(pReader->info.order) ? 0 : pBlockInfo->record.numRow - 1;
pDumpInfo->totalRows = pBlockInfo->numRow;
pDumpInfo->rowIndex = ASCENDING_TRAVERSE(pReader->info.order) ? 0 : pBlockInfo->numRow - 1;
} else {
pDumpInfo->totalRows = 0;
pDumpInfo->rowIndex = 0;
@ -3219,7 +3168,7 @@ SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_
bool hasBeenDropped(const SArray* pDelList, int32_t* index, int64_t key, int64_t ver, int32_t order,
SVersionRange* pVerRange) {
if (pDelList == NULL || (taosArrayGetSize(pDelList) == 0)) {
if (pDelList == NULL || (TARRAY_SIZE(pDelList) == 0)) {
return false;
}
@ -3327,16 +3276,22 @@ TSDBROW* getValidMemRow(SIterInfo* pIter, const SArray* pDelList, STsdbReader* p
TSDBROW* pRow = tsdbTbDataIterGet(pIter->iter);
TSDBKEY key = TSDBROW_KEY(pRow);
int32_t order = pReader->info.order;
if (outOfTimeWindow(key.ts, &pReader->info.window)) {
pIter->hasVal = false;
return NULL;
}
// it is a valid data version
if ((key.version <= pReader->info.verRange.maxVer && key.version >= pReader->info.verRange.minVer) &&
(!hasBeenDropped(pDelList, &pIter->index, key.ts, key.version, pReader->info.order, &pReader->info.verRange))) {
if (key.version <= pReader->info.verRange.maxVer && key.version >= pReader->info.verRange.minVer) {
if (pDelList == NULL || TARRAY_SIZE(pDelList) == 0) {
return pRow;
} else {
bool dropped = hasBeenDropped(pDelList, &pIter->index, key.ts, key.version, order, &pReader->info.verRange);
if (!dropped) {
return pRow;
}
}
}
while (1) {
@ -3353,9 +3308,15 @@ TSDBROW* getValidMemRow(SIterInfo* pIter, const SArray* pDelList, STsdbReader* p
return NULL;
}
if (key.version <= pReader->info.verRange.maxVer && key.version >= pReader->info.verRange.minVer &&
(!hasBeenDropped(pDelList, &pIter->index, key.ts, key.version, pReader->info.order, &pReader->info.verRange))) {
if (key.version <= pReader->info.verRange.maxVer && key.version >= pReader->info.verRange.minVer) {
if (pDelList == NULL || TARRAY_SIZE(pDelList) == 0) {
return pRow;
} else {
bool dropped = hasBeenDropped(pDelList, &pIter->index, key.ts, key.version, order, &pReader->info.verRange);
if (!dropped) {
return pRow;
}
}
}
}
}
@ -3827,7 +3788,6 @@ int32_t buildDataBlockFromBufImpl(STableBlockScanInfo* pBlockScanInfo, int64_t e
pBlockScanInfo->lastProcKey = row.pBlockData->aTSKEY[row.iRow];
}
// no data in buffer, return immediately
if (!(pBlockScanInfo->iter.hasVal || pBlockScanInfo->iiter.hasVal)) {
break;
@ -4178,37 +4138,21 @@ void tsdbReaderClose2(STsdbReader* pReader) {
taosMemoryFreeClear(pReader);
}
int32_t tsdbReaderSuspend2(STsdbReader* pReader) {
// save reader's base state & reset top state to be reconstructed from base state
int32_t code = 0;
SReaderStatus* pStatus = &pReader->status;
STableBlockScanInfo* pBlockScanInfo = NULL;
pReader->status.suspendInvoked = true; // record the suspend status
static int32_t doSuspendCurrentReader(STsdbReader* pCurrentReader) {
SReaderStatus* pStatus = &pCurrentReader->status;
if (pStatus->loadFromFile) {
SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter);
if (pBlockInfo != NULL) {
pBlockScanInfo = getTableBlockScanInfo(pStatus->pTableMap, pBlockInfo->uid, pReader->idStr);
if (pBlockScanInfo == NULL) {
goto _err;
}
} else {
pBlockScanInfo = *pStatus->pTableIter;
}
tsdbDataFileReaderClose(&pCurrentReader->pFileReader);
tsdbDataFileReaderClose(&pReader->pFileReader);
SReadCostSummary* pCost = &pReader->cost;
pReader->status.pLDataIterArray = destroySttBlockReader(pReader->status.pLDataIterArray, &pCost->sttCost);
pReader->status.pLDataIterArray = taosArrayInit(4, POINTER_BYTES);
SReadCostSummary* pCost = &pCurrentReader->cost;
pStatus->pLDataIterArray = destroySttBlockReader(pStatus->pLDataIterArray, &pCost->sttCost);
pStatus->pLDataIterArray = taosArrayInit(4, POINTER_BYTES);
}
// resetDataBlockScanInfo excluding lastKey
STableBlockScanInfo** p = NULL;
int32_t step = ASCENDING_TRAVERSE(pReader->info.order)? 1:-1;
int32_t step = ASCENDING_TRAVERSE(pCurrentReader->info.order) ? 1 : -1;
int32_t iter = 0;
while ((p = tSimpleHashIterate(pStatus->pTableMap, p, &iter)) != NULL) {
STableBlockScanInfo* pInfo = *(STableBlockScanInfo**)p;
@ -4219,6 +4163,26 @@ int32_t tsdbReaderSuspend2(STsdbReader* pReader) {
pStatus->uidList.currentIndex = 0;
initReaderStatus(pStatus);
return TSDB_CODE_SUCCESS;
}
int32_t tsdbReaderSuspend2(STsdbReader* pReader) {
// save reader's base state & reset top state to be reconstructed from base state
int32_t code = 0;
pReader->status.suspendInvoked = true; // record the suspend status
if (pReader->type == TIMEWINDOW_RANGE_EXTERNAL) {
if (pReader->step == EXTERNAL_ROWS_PREV) {
doSuspendCurrentReader(pReader->innerReader[0]);
} else if (pReader->step == EXTERNAL_ROWS_MAIN) {
doSuspendCurrentReader(pReader);
} else {
doSuspendCurrentReader(pReader->innerReader[1]);
}
} else {
doSuspendCurrentReader(pReader);
}
tsdbUntakeReadSnap2(pReader, pReader->pReadSnap, false);
pReader->pReadSnap = NULL;
if (pReader->bFilesetDelimited) {
@ -4227,16 +4191,16 @@ int32_t tsdbReaderSuspend2(STsdbReader* pReader) {
}
pReader->flag = READER_STATUS_SUSPEND;
if (pReader->type == TIMEWINDOW_RANGE_EXTERNAL) {
clearSharedPtr(pReader->innerReader[0]);
clearSharedPtr(pReader->innerReader[1]);
}
#if SUSPEND_RESUME_TEST
tsem_post(&pReader->resumeAfterSuspend);
#endif
tsdbDebug("reader: %p suspended uid %" PRIu64 " in this query %s", pReader, pBlockScanInfo ? pBlockScanInfo->uid : 0,
pReader->idStr);
return code;
_err:
tsdbError("failed to suspend data reader, code:%s %s", tstrerror(code), pReader->idStr);
tsdbDebug("reader: %p suspended in this query %s, step:%d", pReader, pReader->idStr, pReader->step);
return code;
}
@ -4267,8 +4231,7 @@ int32_t tsdbReaderResume2(STsdbReader* pReader) {
int32_t code = 0;
STableBlockScanInfo** pBlockScanInfo = pReader->status.pTableIter;
// restore reader's state
// task snapshot
// restore reader's state, task snapshot
int32_t numOfTables = tSimpleHashGetSize(pReader->status.pTableMap);
if (numOfTables > 0) {
qTrace("tsdb/reader: %p, take snapshot", pReader);
@ -4293,7 +4256,14 @@ int32_t tsdbReaderResume2(STsdbReader* pReader) {
pNextReader->resBlockInfo.capacity = 1;
setSharedPtr(pNextReader, pReader);
if (pReader->step == 0 || pReader->step == EXTERNAL_ROWS_PREV) {
code = doOpenReaderImpl(pPrevReader);
} else if (pReader->step == EXTERNAL_ROWS_MAIN) {
code = doOpenReaderImpl(pReader);
} else {
code = doOpenReaderImpl(pNextReader);
}
if (code != TSDB_CODE_SUCCESS) {
return code;
}
@ -4355,8 +4325,8 @@ static int32_t doTsdbNextDataBlockFilesetDelimited(STsdbReader* pReader) {
return code;
}
tsdbTrace("block from file rows: %"PRId64", will process pre-file set buffer: %d. %s",
pBlock->info.rows, pStatus->bProcMemFirstFileset, pReader->idStr);
tsdbTrace("block from file rows: %" PRId64 ", will process pre-file set buffer: %d. %s", pBlock->info.rows,
pStatus->bProcMemFirstFileset, pReader->idStr);
if (pStatus->bProcMemPreFileset) {
if (pBlock->info.rows > 0) {
if (pReader->notifyFn) {
@ -4617,7 +4587,9 @@ int32_t tsdbRetrieveDatablockSMA2(STsdbReader* pReader, SSDataBlock* pDataBlock,
// int64_t st = taosGetTimestampUs();
TARRAY2_CLEAR(&pSup->colAggArray, 0);
code = tsdbDataFileReadBlockSma(pReader->pFileReader, &pFBlock->record, &pSup->colAggArray);
SBrinRecord pRecord;
blockInfoToRecord(&pRecord, pFBlock);
code = tsdbDataFileReadBlockSma(pReader->pFileReader, &pRecord, &pSup->colAggArray);
if (code != TSDB_CODE_SUCCESS) {
tsdbDebug("vgId:%d, failed to load block SMA for uid %" PRIu64 ", code:%s, %s", 0, pFBlock->uid, tstrerror(code),
pReader->idStr);
@ -4648,7 +4620,7 @@ int32_t tsdbRetrieveDatablockSMA2(STsdbReader* pReader, SSDataBlock* pDataBlock,
}
// do fill all null column value SMA info
doFillNullColSMA(pSup, pFBlock->record.numRow, numOfCols, pTsAgg);
doFillNullColSMA(pSup, pFBlock->numRow, numOfCols, pTsAgg);
size_t size = pSup->colAggArray.size;
@ -4869,7 +4841,7 @@ int32_t tsdbGetFileBlocksDistInfo2(STsdbReader* pReader, STableBlockDistInfo* pT
while (true) {
if (hasNext) {
SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(pBlockIter);
int32_t numOfRows = pBlockInfo->record.numRow;
int32_t numOfRows = pBlockInfo->numRow;
pTableBlockInfo->totalRows += numOfRows;
@ -4881,7 +4853,7 @@ int32_t tsdbGetFileBlocksDistInfo2(STsdbReader* pReader, STableBlockDistInfo* pT
pTableBlockInfo->minRows = numOfRows;
}
pTableBlockInfo->totalSize += pBlockInfo->record.blockSize;
pTableBlockInfo->totalSize += pBlockInfo->blockSize;
int32_t bucketIndex = getBucketIndex(pTableBlockInfo->defMinRows, bucketRange, numOfRows, numOfBuckets);
pTableBlockInfo->blockRowsHisto[bucketIndex]++;
@ -5124,7 +5096,6 @@ void tsdbUntakeReadSnap2(STsdbReader* pReader, STsdbReadSnap* pSnap, bool proact
tsdbUnrefMemTable(pSnap->pIMem, pSnap->pINode, proactive);
}
tsdbFSUnref(pTsdb, &pSnap->fs);
if (pSnap->pNode) taosMemoryFree(pSnap->pNode);
if (pSnap->pINode) taosMemoryFree(pSnap->pINode);
@ -5145,9 +5116,7 @@ void tsdbReaderSetId2(STsdbReader* pReader, const char* idstr) {
void tsdbReaderSetCloseFlag(STsdbReader* pReader) { /*pReader->code = TSDB_CODE_TSC_QUERY_CANCELLED;*/
}
void tsdbSetFilesetDelimited(STsdbReader* pReader) {
pReader->bFilesetDelimited = true;
}
void tsdbSetFilesetDelimited(STsdbReader* pReader) { pReader->bFilesetDelimited = true; }
void tsdbReaderSetNotifyCb(STsdbReader* pReader, TsdReaderNotifyCbFn notifyFn, void* param) {
pReader->notifyFn = notifyFn;

View File

@ -22,9 +22,8 @@
#include "tsdbUtil2.h"
#include "tsimplehash.h"
#define INIT_TIMEWINDOW(_w) do { (_w)->skey = INT64_MAX; (_w)->ekey = INT64_MIN;} while(0);
static bool overlapWithDelSkylineWithoutVer(STableBlockScanInfo* pBlockScanInfo, const SBrinRecord* pRecord, int32_t order);
static bool overlapWithDelSkylineWithoutVer(STableBlockScanInfo* pBlockScanInfo, const SBrinRecord* pRecord,
int32_t order);
static int32_t initBlockScanInfoBuf(SBlockInfoBuf* pBuf, int32_t numOfTables) {
int32_t num = numOfTables / pBuf->numPerBucket;
@ -160,6 +159,9 @@ SSHashObj* createDataBlockScanInfo(STsdbReader* pTsdbReader, SBlockInfoBuf* pBuf
INIT_TIMEWINDOW(&pScanInfo->sttWindow);
INIT_TIMEWINDOW(&pScanInfo->filesetWindow);
pScanInfo->cleanSttBlocks = false;
pScanInfo->sttBlockReturned = false;
pUidList->tableUidList[j] = idList[j].uid;
if (ASCENDING_TRAVERSE(pTsdbReader->info.order)) {
@ -354,6 +356,21 @@ static int32_t fileDataBlockOrderCompar(const void* pLeft, const void* pRight, v
return pLeftBlock->offset > pRightBlock->offset ? 1 : -1;
}
static void recordToBlockInfo(SFileDataBlockInfo* pBlockInfo, SBrinRecord* record){
pBlockInfo->uid = record->uid;
pBlockInfo->firstKey = record->firstKey;
pBlockInfo->lastKey = record->lastKey;
pBlockInfo->minVer = record->minVer;
pBlockInfo->maxVer = record->maxVer;
pBlockInfo->blockOffset = record->blockOffset;
pBlockInfo->smaOffset = record->smaOffset;
pBlockInfo->blockSize = record->blockSize;
pBlockInfo->blockKeySize = record->blockKeySize;
pBlockInfo->smaSize = record->smaSize;
pBlockInfo->numRow = record->numRow;
pBlockInfo->count = record->count;
}
int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIter, int32_t numOfBlocks, SArray* pTableList) {
bool asc = ASCENDING_TRAVERSE(pReader->info.order);
@ -411,8 +428,9 @@ int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIter, int3
pTableScanInfo->pBlockIdxList = taosArrayInit(numOfBlocks, sizeof(STableDataBlockIdx));
}
for (int32_t i = 0; i < numOfBlocks; ++i) {
SFileDataBlockInfo blockInfo = {.uid = sup.pDataBlockInfo[0][i].uid, .tbBlockIdx = i};
blockInfo.record = *(SBrinRecord*)taosArrayGet(sup.pDataBlockInfo[0][i].pInfo->pBlockList, i);
SFileDataBlockInfo blockInfo = {.tbBlockIdx = i};
SBrinRecord* record = (SBrinRecord*)taosArrayGet(sup.pDataBlockInfo[0][i].pInfo->pBlockList, i);
recordToBlockInfo(&blockInfo, record);
taosArrayPush(pBlockIter->blockList, &blockInfo);
STableDataBlockIdx tableDataBlockIdx = {.globalIndex = i};
@ -445,8 +463,9 @@ int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIter, int3
int32_t pos = tMergeTreeGetChosenIndex(pTree);
int32_t index = sup.indexPerTable[pos]++;
SFileDataBlockInfo blockInfo = {.uid = sup.pDataBlockInfo[pos][index].uid, .tbBlockIdx = index};
blockInfo.record = *(SBrinRecord*)taosArrayGet(sup.pDataBlockInfo[pos][index].pInfo->pBlockList, index);
SFileDataBlockInfo blockInfo = {.tbBlockIdx = index};
SBrinRecord* record = (SBrinRecord*)taosArrayGet(sup.pDataBlockInfo[pos][index].pInfo->pBlockList, index);
recordToBlockInfo(&blockInfo, record);
taosArrayPush(pBlockIter->blockList, &blockInfo);
STableBlockScanInfo* pTableScanInfo = sup.pDataBlockInfo[pos][index].pInfo;
@ -650,6 +669,7 @@ void loadMemTombData(SArray** ppMemDelData, STbData* pMemTbData, STbData* piMemT
SDelData* p = NULL;
if (pMemTbData != NULL) {
taosRLockLatch(&pMemTbData->lock);
p = pMemTbData->pHead;
while (p) {
if (p->version <= ver) {
@ -658,6 +678,7 @@ void loadMemTombData(SArray** ppMemDelData, STbData* pMemTbData, STbData* piMemT
p = p->pNext;
}
taosRUnLockLatch(&pMemTbData->lock);
}
if (piMemTbData != NULL) {
@ -903,7 +924,8 @@ static int32_t sortUidComparFn(const void* p1, const void* p2) {
}
}
bool isCleanSttBlock(SArray* pTimewindowList, STimeWindow* pQueryWindow, STableBlockScanInfo *pScanInfo, int32_t order) {
bool isCleanSttBlock(SArray* pTimewindowList, STimeWindow* pQueryWindow, STableBlockScanInfo* pScanInfo,
int32_t order) {
// check if it overlap with del skyline
taosArraySort(pTimewindowList, sortUidComparFn);

View File

@ -26,6 +26,12 @@ extern "C" {
#define ASCENDING_TRAVERSE(o) (o == TSDB_ORDER_ASC)
#define INIT_TIMEWINDOW(_w) \
do { \
(_w)->skey = INT64_MAX; \
(_w)->ekey = INT64_MIN; \
} while (0);
typedef enum {
READER_STATUS_SUSPEND = 0x1,
READER_STATUS_NORMAL = 0x2,
@ -175,9 +181,22 @@ typedef struct SFilesetIter {
typedef struct SFileDataBlockInfo {
// index position in STableBlockScanInfo in order to check whether neighbor block overlaps with it
uint64_t uid;
// int64_t suid;
int64_t uid;
int64_t firstKey;
// int64_t firstKeyVer;
int64_t lastKey;
// int64_t lastKeyVer;
int64_t minVer;
int64_t maxVer;
int64_t blockOffset;
int64_t smaOffset;
int32_t blockSize;
int32_t blockKeySize;
int32_t smaSize;
int32_t numRow;
int32_t count;
int32_t tbBlockIdx;
SBrinRecord record;
} SFileDataBlockInfo;
typedef struct SDataBlockIter {

View File

@ -433,635 +433,6 @@ _exit:
return code;
}
// SDataFWriter ====================================================
int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pSet) {
int32_t code = 0;
int32_t flag;
int64_t n;
int32_t szPage = pTsdb->pVnode->config.tsdbPageSize;
SDataFWriter *pWriter = NULL;
char fname[TSDB_FILENAME_LEN];
char hdr[TSDB_FHDR_SIZE] = {0};
// alloc
pWriter = taosMemoryCalloc(1, sizeof(*pWriter));
if (pWriter == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
pWriter->pTsdb = pTsdb;
pWriter->wSet = (SDFileSet){.diskId = pSet->diskId,
.fid = pSet->fid,
.pHeadF = &pWriter->fHead,
.pDataF = &pWriter->fData,
.pSmaF = &pWriter->fSma,
.nSttF = pSet->nSttF};
pWriter->fHead = *pSet->pHeadF;
pWriter->fData = *pSet->pDataF;
pWriter->fSma = *pSet->pSmaF;
for (int8_t iStt = 0; iStt < pSet->nSttF; iStt++) {
pWriter->wSet.aSttF[iStt] = &pWriter->fStt[iStt];
pWriter->fStt[iStt] = *pSet->aSttF[iStt];
}
// head
flag = TD_FILE_READ | TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC;
tsdbHeadFileName(pTsdb, pWriter->wSet.diskId, pWriter->wSet.fid, &pWriter->fHead, fname);
code = tsdbOpenFile(fname, pTsdb, flag, &pWriter->pHeadFD);
if (code) goto _err;
code = tsdbWriteFile(pWriter->pHeadFD, 0, hdr, TSDB_FHDR_SIZE);
if (code) goto _err;
pWriter->fHead.size += TSDB_FHDR_SIZE;
// data
if (pWriter->fData.size == 0) {
flag = TD_FILE_READ | TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC;
} else {
flag = TD_FILE_READ | TD_FILE_WRITE;
}
tsdbDataFileName(pTsdb, pWriter->wSet.diskId, pWriter->wSet.fid, &pWriter->fData, fname);
code = tsdbOpenFile(fname, pTsdb, flag, &pWriter->pDataFD);
if (code) goto _err;
if (pWriter->fData.size == 0) {
code = tsdbWriteFile(pWriter->pDataFD, 0, hdr, TSDB_FHDR_SIZE);
if (code) goto _err;
pWriter->fData.size += TSDB_FHDR_SIZE;
}
// sma
if (pWriter->fSma.size == 0) {
flag = TD_FILE_READ | TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC;
} else {
flag = TD_FILE_READ | TD_FILE_WRITE;
}
tsdbSmaFileName(pTsdb, pWriter->wSet.diskId, pWriter->wSet.fid, &pWriter->fSma, fname);
code = tsdbOpenFile(fname, pTsdb, flag, &pWriter->pSmaFD);
if (code) goto _err;
if (pWriter->fSma.size == 0) {
code = tsdbWriteFile(pWriter->pSmaFD, 0, hdr, TSDB_FHDR_SIZE);
if (code) goto _err;
pWriter->fSma.size += TSDB_FHDR_SIZE;
}
// stt
ASSERT(pWriter->fStt[pSet->nSttF - 1].size == 0);
flag = TD_FILE_READ | TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC;
tsdbSttFileName(pTsdb, pWriter->wSet.diskId, pWriter->wSet.fid, &pWriter->fStt[pSet->nSttF - 1], fname);
code = tsdbOpenFile(fname, pTsdb, flag, &pWriter->pSttFD);
if (code) goto _err;
code = tsdbWriteFile(pWriter->pSttFD, 0, hdr, TSDB_FHDR_SIZE);
if (code) goto _err;
pWriter->fStt[pWriter->wSet.nSttF - 1].size += TSDB_FHDR_SIZE;
*ppWriter = pWriter;
return code;
_err:
tsdbError("vgId:%d, tsdb data file writer open failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
*ppWriter = NULL;
return code;
}
int32_t tsdbDataFWriterClose(SDataFWriter **ppWriter, int8_t sync) {
int32_t code = 0;
STsdb *pTsdb = NULL;
if (*ppWriter == NULL) goto _exit;
pTsdb = (*ppWriter)->pTsdb;
if (sync) {
code = tsdbFsyncFile((*ppWriter)->pHeadFD);
if (code) goto _err;
code = tsdbFsyncFile((*ppWriter)->pDataFD);
if (code) goto _err;
code = tsdbFsyncFile((*ppWriter)->pSmaFD);
if (code) goto _err;
code = tsdbFsyncFile((*ppWriter)->pSttFD);
if (code) goto _err;
}
tsdbCloseFile(&(*ppWriter)->pHeadFD);
tsdbCloseFile(&(*ppWriter)->pDataFD);
tsdbCloseFile(&(*ppWriter)->pSmaFD);
tsdbCloseFile(&(*ppWriter)->pSttFD);
for (int32_t iBuf = 0; iBuf < sizeof((*ppWriter)->aBuf) / sizeof(uint8_t *); iBuf++) {
tFree((*ppWriter)->aBuf[iBuf]);
}
taosMemoryFree(*ppWriter);
_exit:
*ppWriter = NULL;
return code;
_err:
tsdbError("vgId:%d, data file writer close failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
return code;
}
int32_t tsdbUpdateDFileSetHeader(SDataFWriter *pWriter) {
int32_t code = 0;
int64_t n;
char hdr[TSDB_FHDR_SIZE];
// head ==============
memset(hdr, 0, TSDB_FHDR_SIZE);
tPutHeadFile(hdr, &pWriter->fHead);
code = tsdbWriteFile(pWriter->pHeadFD, 0, hdr, TSDB_FHDR_SIZE);
if (code) goto _err;
// data ==============
memset(hdr, 0, TSDB_FHDR_SIZE);
tPutDataFile(hdr, &pWriter->fData);
code = tsdbWriteFile(pWriter->pDataFD, 0, hdr, TSDB_FHDR_SIZE);
if (code) goto _err;
// sma ==============
memset(hdr, 0, TSDB_FHDR_SIZE);
tPutSmaFile(hdr, &pWriter->fSma);
code = tsdbWriteFile(pWriter->pSmaFD, 0, hdr, TSDB_FHDR_SIZE);
if (code) goto _err;
// stt ==============
memset(hdr, 0, TSDB_FHDR_SIZE);
tPutSttFile(hdr, &pWriter->fStt[pWriter->wSet.nSttF - 1]);
code = tsdbWriteFile(pWriter->pSttFD, 0, hdr, TSDB_FHDR_SIZE);
if (code) goto _err;
return code;
_err:
tsdbError("vgId:%d, update DFileSet header failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code));
return code;
}
int32_t tsdbWriteBlockIdx(SDataFWriter *pWriter, SArray *aBlockIdx) {
int32_t code = 0;
SHeadFile *pHeadFile = &pWriter->fHead;
int64_t size;
int64_t n;
// check
if (taosArrayGetSize(aBlockIdx) == 0) {
pHeadFile->offset = pHeadFile->size;
goto _exit;
}
// prepare
size = 0;
for (int32_t iBlockIdx = 0; iBlockIdx < taosArrayGetSize(aBlockIdx); iBlockIdx++) {
size += tPutBlockIdx(NULL, taosArrayGet(aBlockIdx, iBlockIdx));
}
// alloc
code = tRealloc(&pWriter->aBuf[0], size);
if (code) goto _err;
// build
n = 0;
for (int32_t iBlockIdx = 0; iBlockIdx < taosArrayGetSize(aBlockIdx); iBlockIdx++) {
n += tPutBlockIdx(pWriter->aBuf[0] + n, taosArrayGet(aBlockIdx, iBlockIdx));
}
ASSERT(n == size);
// write
code = tsdbWriteFile(pWriter->pHeadFD, pHeadFile->size, pWriter->aBuf[0], size);
if (code) goto _err;
// update
pHeadFile->offset = pHeadFile->size;
pHeadFile->size += size;
_exit:
// tsdbTrace("vgId:%d, write block idx, offset:%" PRId64 " size:%" PRId64 " nBlockIdx:%d",
// TD_VID(pWriter->pTsdb->pVnode),
// pHeadFile->offset, size, taosArrayGetSize(aBlockIdx));
return code;
_err:
tsdbError("vgId:%d, write block idx failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code));
return code;
}
int32_t tsdbWriteDataBlk(SDataFWriter *pWriter, SMapData *mDataBlk, SBlockIdx *pBlockIdx) {
int32_t code = 0;
SHeadFile *pHeadFile = &pWriter->fHead;
int64_t size;
int64_t n;
ASSERT(mDataBlk->nItem > 0);
// alloc
size = tPutMapData(NULL, mDataBlk);
code = tRealloc(&pWriter->aBuf[0], size);
if (code) goto _err;
// build
n = tPutMapData(pWriter->aBuf[0], mDataBlk);
// write
code = tsdbWriteFile(pWriter->pHeadFD, pHeadFile->size, pWriter->aBuf[0], size);
if (code) goto _err;
// update
pBlockIdx->offset = pHeadFile->size;
pBlockIdx->size = size;
pHeadFile->size += size;
tsdbTrace("vgId:%d, write block, file ID:%d commit ID:%" PRId64 " suid:%" PRId64 " uid:%" PRId64 " offset:%" PRId64
" size:%" PRId64 " nItem:%d",
TD_VID(pWriter->pTsdb->pVnode), pWriter->wSet.fid, pHeadFile->commitID, pBlockIdx->suid, pBlockIdx->uid,
pBlockIdx->offset, pBlockIdx->size, mDataBlk->nItem);
return code;
_err:
tsdbError("vgId:%d, write block failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code));
return code;
}
int32_t tsdbWriteSttBlk(SDataFWriter *pWriter, SArray *aSttBlk) {
int32_t code = 0;
SSttFile *pSttFile = &pWriter->fStt[pWriter->wSet.nSttF - 1];
int64_t size = 0;
int64_t n;
// check
if (taosArrayGetSize(aSttBlk) == 0) {
pSttFile->offset = pSttFile->size;
goto _exit;
}
// size
size = 0;
for (int32_t iBlockL = 0; iBlockL < taosArrayGetSize(aSttBlk); iBlockL++) {
size += tPutSttBlk(NULL, taosArrayGet(aSttBlk, iBlockL));
}
// alloc
code = tRealloc(&pWriter->aBuf[0], size);
if (code) goto _err;
// encode
n = 0;
for (int32_t iBlockL = 0; iBlockL < taosArrayGetSize(aSttBlk); iBlockL++) {
n += tPutSttBlk(pWriter->aBuf[0] + n, taosArrayGet(aSttBlk, iBlockL));
}
// write
code = tsdbWriteFile(pWriter->pSttFD, pSttFile->size, pWriter->aBuf[0], size);
if (code) goto _err;
// update
pSttFile->offset = pSttFile->size;
pSttFile->size += size;
_exit:
tsdbTrace("vgId:%d, tsdb write stt block, loffset:%" PRId64 " size:%" PRId64, TD_VID(pWriter->pTsdb->pVnode),
pSttFile->offset, size);
return code;
_err:
tsdbError("vgId:%d, tsdb write blockl failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code));
return code;
}
static int32_t tsdbWriteBlockSma(SDataFWriter *pWriter, SBlockData *pBlockData, SSmaInfo *pSmaInfo) {
int32_t code = 0;
pSmaInfo->offset = 0;
pSmaInfo->size = 0;
// encode
for (int32_t iColData = 0; iColData < pBlockData->nColData; iColData++) {
SColData *pColData = tBlockDataGetColDataByIdx(pBlockData, iColData);
if ((!pColData->smaOn) || ((pColData->flag & HAS_VALUE) == 0)) continue;
SColumnDataAgg sma = {.colId = pColData->cid};
tColDataCalcSMA[pColData->type](pColData, &sma.sum, &sma.max, &sma.min, &sma.numOfNull);
code = tRealloc(&pWriter->aBuf[0], pSmaInfo->size + tPutColumnDataAgg(NULL, &sma));
if (code) goto _err;
pSmaInfo->size += tPutColumnDataAgg(pWriter->aBuf[0] + pSmaInfo->size, &sma);
}
// write
if (pSmaInfo->size) {
code = tsdbWriteFile(pWriter->pSmaFD, pWriter->fSma.size, pWriter->aBuf[0], pSmaInfo->size);
if (code) goto _err;
pSmaInfo->offset = pWriter->fSma.size;
pWriter->fSma.size += pSmaInfo->size;
}
return code;
_err:
tsdbError("vgId:%d, tsdb write block sma failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code));
return code;
}
int32_t tsdbWriteBlockData(SDataFWriter *pWriter, SBlockData *pBlockData, SBlockInfo *pBlkInfo, SSmaInfo *pSmaInfo,
int8_t cmprAlg, int8_t toLast) {
int32_t code = 0;
ASSERT(pBlockData->nRow > 0);
if (toLast) {
pBlkInfo->offset = pWriter->fStt[pWriter->wSet.nSttF - 1].size;
} else {
pBlkInfo->offset = pWriter->fData.size;
}
pBlkInfo->szBlock = 0;
pBlkInfo->szKey = 0;
int32_t aBufN[4] = {0};
code = tCmprBlockData(pBlockData, cmprAlg, NULL, NULL, pWriter->aBuf, aBufN);
if (code) goto _err;
// write =================
STsdbFD *pFD = toLast ? pWriter->pSttFD : pWriter->pDataFD;
pBlkInfo->szKey = aBufN[3] + aBufN[2];
pBlkInfo->szBlock = aBufN[0] + aBufN[1] + aBufN[2] + aBufN[3];
int64_t offset = pBlkInfo->offset;
code = tsdbWriteFile(pFD, offset, pWriter->aBuf[3], aBufN[3]);
if (code) goto _err;
offset += aBufN[3];
code = tsdbWriteFile(pFD, offset, pWriter->aBuf[2], aBufN[2]);
if (code) goto _err;
offset += aBufN[2];
if (aBufN[1]) {
code = tsdbWriteFile(pFD, offset, pWriter->aBuf[1], aBufN[1]);
if (code) goto _err;
offset += aBufN[1];
}
if (aBufN[0]) {
code = tsdbWriteFile(pFD, offset, pWriter->aBuf[0], aBufN[0]);
if (code) goto _err;
}
// update info
if (toLast) {
pWriter->fStt[pWriter->wSet.nSttF - 1].size += pBlkInfo->szBlock;
} else {
pWriter->fData.size += pBlkInfo->szBlock;
}
// ================= SMA ====================
if (pSmaInfo) {
code = tsdbWriteBlockSma(pWriter, pBlockData, pSmaInfo);
if (code) goto _err;
}
_exit:
tsdbTrace("vgId:%d, tsdb write block data, suid:%" PRId64 " uid:%" PRId64 " nRow:%d, offset:%" PRId64 " size:%d",
TD_VID(pWriter->pTsdb->pVnode), pBlockData->suid, pBlockData->uid, pBlockData->nRow, pBlkInfo->offset,
pBlkInfo->szBlock);
return code;
_err:
tsdbError("vgId:%d, tsdb write block data failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code));
return code;
}
int32_t tsdbWriteDiskData(SDataFWriter *pWriter, const SDiskData *pDiskData, SBlockInfo *pBlkInfo, SSmaInfo *pSmaInfo) {
int32_t code = 0;
int32_t lino = 0;
STsdbFD *pFD = NULL;
if (pSmaInfo) {
pFD = pWriter->pDataFD;
pBlkInfo->offset = pWriter->fData.size;
} else {
pFD = pWriter->pSttFD;
pBlkInfo->offset = pWriter->fStt[pWriter->wSet.nSttF - 1].size;
}
pBlkInfo->szBlock = 0;
pBlkInfo->szKey = 0;
// hdr
int32_t n = tPutDiskDataHdr(NULL, &pDiskData->hdr);
code = tRealloc(&pWriter->aBuf[0], n);
TSDB_CHECK_CODE(code, lino, _exit);
tPutDiskDataHdr(pWriter->aBuf[0], &pDiskData->hdr);
code = tsdbWriteFile(pFD, pBlkInfo->offset, pWriter->aBuf[0], n);
TSDB_CHECK_CODE(code, lino, _exit);
pBlkInfo->szKey += n;
pBlkInfo->szBlock += n;
// uid + ver + key
if (pDiskData->pUid) {
code = tsdbWriteFile(pFD, pBlkInfo->offset + pBlkInfo->szBlock, pDiskData->pUid, pDiskData->hdr.szUid);
TSDB_CHECK_CODE(code, lino, _exit);
pBlkInfo->szKey += pDiskData->hdr.szUid;
pBlkInfo->szBlock += pDiskData->hdr.szUid;
}
code = tsdbWriteFile(pFD, pBlkInfo->offset + pBlkInfo->szBlock, pDiskData->pVer, pDiskData->hdr.szVer);
TSDB_CHECK_CODE(code, lino, _exit);
pBlkInfo->szKey += pDiskData->hdr.szVer;
pBlkInfo->szBlock += pDiskData->hdr.szVer;
code = tsdbWriteFile(pFD, pBlkInfo->offset + pBlkInfo->szBlock, pDiskData->pKey, pDiskData->hdr.szKey);
TSDB_CHECK_CODE(code, lino, _exit);
pBlkInfo->szKey += pDiskData->hdr.szKey;
pBlkInfo->szBlock += pDiskData->hdr.szKey;
// aBlockCol
if (pDiskData->hdr.szBlkCol) {
code = tRealloc(&pWriter->aBuf[0], pDiskData->hdr.szBlkCol);
TSDB_CHECK_CODE(code, lino, _exit);
n = 0;
for (int32_t iDiskCol = 0; iDiskCol < taosArrayGetSize(pDiskData->aDiskCol); iDiskCol++) {
SDiskCol *pDiskCol = (SDiskCol *)taosArrayGet(pDiskData->aDiskCol, iDiskCol);
n += tPutBlockCol(pWriter->aBuf[0] + n, pDiskCol);
}
ASSERT(n == pDiskData->hdr.szBlkCol);
code = tsdbWriteFile(pFD, pBlkInfo->offset + pBlkInfo->szBlock, pWriter->aBuf[0], pDiskData->hdr.szBlkCol);
TSDB_CHECK_CODE(code, lino, _exit);
pBlkInfo->szBlock += pDiskData->hdr.szBlkCol;
}
// aDiskCol
for (int32_t iDiskCol = 0; iDiskCol < taosArrayGetSize(pDiskData->aDiskCol); iDiskCol++) {
SDiskCol *pDiskCol = (SDiskCol *)taosArrayGet(pDiskData->aDiskCol, iDiskCol);
if (pDiskCol->pBit) {
code = tsdbWriteFile(pFD, pBlkInfo->offset + pBlkInfo->szBlock, pDiskCol->pBit, pDiskCol->bCol.szBitmap);
TSDB_CHECK_CODE(code, lino, _exit);
pBlkInfo->szBlock += pDiskCol->bCol.szBitmap;
}
if (pDiskCol->pOff) {
code = tsdbWriteFile(pFD, pBlkInfo->offset + pBlkInfo->szBlock, pDiskCol->pOff, pDiskCol->bCol.szOffset);
TSDB_CHECK_CODE(code, lino, _exit);
pBlkInfo->szBlock += pDiskCol->bCol.szOffset;
}
if (pDiskCol->pVal) {
code = tsdbWriteFile(pFD, pBlkInfo->offset + pBlkInfo->szBlock, pDiskCol->pVal, pDiskCol->bCol.szValue);
TSDB_CHECK_CODE(code, lino, _exit);
pBlkInfo->szBlock += pDiskCol->bCol.szValue;
}
}
if (pSmaInfo) {
pWriter->fData.size += pBlkInfo->szBlock;
} else {
pWriter->fStt[pWriter->wSet.nSttF - 1].size += pBlkInfo->szBlock;
goto _exit;
}
pSmaInfo->offset = 0;
pSmaInfo->size = 0;
for (int32_t iDiskCol = 0; iDiskCol < taosArrayGetSize(pDiskData->aDiskCol); iDiskCol++) {
SDiskCol *pDiskCol = (SDiskCol *)taosArrayGet(pDiskData->aDiskCol, iDiskCol);
if (IS_VAR_DATA_TYPE(pDiskCol->bCol.type)) continue;
if (pDiskCol->bCol.flag == HAS_NULL || pDiskCol->bCol.flag == (HAS_NULL | HAS_NONE)) continue;
if (!pDiskCol->bCol.smaOn) continue;
code = tRealloc(&pWriter->aBuf[0], pSmaInfo->size + tPutColumnDataAgg(NULL, &pDiskCol->agg));
TSDB_CHECK_CODE(code, lino, _exit);
pSmaInfo->size += tPutColumnDataAgg(pWriter->aBuf[0] + pSmaInfo->size, &pDiskCol->agg);
}
if (pSmaInfo->size) {
pSmaInfo->offset = pWriter->fSma.size;
code = tsdbWriteFile(pWriter->pSmaFD, pSmaInfo->offset, pWriter->aBuf[0], pSmaInfo->size);
TSDB_CHECK_CODE(code, lino, _exit);
pWriter->fSma.size += pSmaInfo->size;
}
_exit:
if (code) {
tsdbError("vgId:%d, %s failed at %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code));
}
return code;
}
int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) {
int32_t code = 0;
int64_t n;
int64_t size;
TdFilePtr pOutFD = NULL;
TdFilePtr PInFD = NULL;
int32_t szPage = pTsdb->pVnode->config.tsdbPageSize;
char fNameFrom[TSDB_FILENAME_LEN];
char fNameTo[TSDB_FILENAME_LEN];
// head
tsdbHeadFileName(pTsdb, pSetFrom->diskId, pSetFrom->fid, pSetFrom->pHeadF, fNameFrom);
tsdbHeadFileName(pTsdb, pSetTo->diskId, pSetTo->fid, pSetTo->pHeadF, fNameTo);
pOutFD = taosCreateFile(fNameTo, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC);
if (pOutFD == NULL) {
code = TAOS_SYSTEM_ERROR(errno);
goto _err;
}
PInFD = taosOpenFile(fNameFrom, TD_FILE_READ);
if (PInFD == NULL) {
code = TAOS_SYSTEM_ERROR(errno);
goto _err;
}
n = taosFSendFile(pOutFD, PInFD, 0, tsdbLogicToFileSize(pSetFrom->pHeadF->size, szPage));
if (n < 0) {
code = TAOS_SYSTEM_ERROR(errno);
goto _err;
}
taosCloseFile(&pOutFD);
taosCloseFile(&PInFD);
// data
tsdbDataFileName(pTsdb, pSetFrom->diskId, pSetFrom->fid, pSetFrom->pDataF, fNameFrom);
tsdbDataFileName(pTsdb, pSetTo->diskId, pSetTo->fid, pSetTo->pDataF, fNameTo);
pOutFD = taosCreateFile(fNameTo, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC);
if (pOutFD == NULL) {
code = TAOS_SYSTEM_ERROR(errno);
goto _err;
}
PInFD = taosOpenFile(fNameFrom, TD_FILE_READ);
if (PInFD == NULL) {
code = TAOS_SYSTEM_ERROR(errno);
goto _err;
}
n = taosFSendFile(pOutFD, PInFD, 0, tsdbLogicToFileSize(pSetFrom->pDataF->size, szPage));
if (n < 0) {
code = TAOS_SYSTEM_ERROR(errno);
goto _err;
}
taosCloseFile(&pOutFD);
taosCloseFile(&PInFD);
// sma
tsdbSmaFileName(pTsdb, pSetFrom->diskId, pSetFrom->fid, pSetFrom->pSmaF, fNameFrom);
tsdbSmaFileName(pTsdb, pSetTo->diskId, pSetTo->fid, pSetTo->pSmaF, fNameTo);
pOutFD = taosCreateFile(fNameTo, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC);
if (pOutFD == NULL) {
code = TAOS_SYSTEM_ERROR(errno);
goto _err;
}
PInFD = taosOpenFile(fNameFrom, TD_FILE_READ);
if (PInFD == NULL) {
code = TAOS_SYSTEM_ERROR(errno);
goto _err;
}
n = taosFSendFile(pOutFD, PInFD, 0, tsdbLogicToFileSize(pSetFrom->pSmaF->size, szPage));
if (n < 0) {
code = TAOS_SYSTEM_ERROR(errno);
goto _err;
}
taosCloseFile(&pOutFD);
taosCloseFile(&PInFD);
// stt
for (int8_t iStt = 0; iStt < pSetFrom->nSttF; iStt++) {
tsdbSttFileName(pTsdb, pSetFrom->diskId, pSetFrom->fid, pSetFrom->aSttF[iStt], fNameFrom);
tsdbSttFileName(pTsdb, pSetTo->diskId, pSetTo->fid, pSetTo->aSttF[iStt], fNameTo);
pOutFD = taosCreateFile(fNameTo, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC);
if (pOutFD == NULL) {
code = TAOS_SYSTEM_ERROR(errno);
goto _err;
}
PInFD = taosOpenFile(fNameFrom, TD_FILE_READ);
if (PInFD == NULL) {
code = TAOS_SYSTEM_ERROR(errno);
goto _err;
}
n = taosFSendFile(pOutFD, PInFD, 0, tsdbLogicToFileSize(pSetFrom->aSttF[iStt]->size, szPage));
if (n < 0) {
code = TAOS_SYSTEM_ERROR(errno);
goto _err;
}
taosCloseFile(&pOutFD);
taosCloseFile(&PInFD);
}
return code;
_err:
tsdbError("vgId:%d, tsdb DFileSet copy failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
return code;
}
// SDataFReader ====================================================
int32_t tsdbDataFReaderOpen(SDataFReader **ppReader, STsdb *pTsdb, SDFileSet *pSet) {
int32_t code = 0;
@ -1478,173 +849,6 @@ _exit:
return code;
}
// SDelFWriter ====================================================
int32_t tsdbDelFWriterOpen(SDelFWriter **ppWriter, SDelFile *pFile, STsdb *pTsdb) {
int32_t code = 0;
int32_t lino = 0;
char fname[TSDB_FILENAME_LEN];
uint8_t hdr[TSDB_FHDR_SIZE] = {0};
SDelFWriter *pDelFWriter = NULL;
int64_t n;
// alloc
pDelFWriter = (SDelFWriter *)taosMemoryCalloc(1, sizeof(*pDelFWriter));
if (pDelFWriter == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
pDelFWriter->pTsdb = pTsdb;
pDelFWriter->fDel = *pFile;
tsdbDelFileName(pTsdb, pFile, fname);
code = tsdbOpenFile(fname, pTsdb, TD_FILE_READ | TD_FILE_WRITE | TD_FILE_CREATE, &pDelFWriter->pWriteH);
TSDB_CHECK_CODE(code, lino, _exit);
// update header
code = tsdbWriteFile(pDelFWriter->pWriteH, 0, hdr, TSDB_FHDR_SIZE);
TSDB_CHECK_CODE(code, lino, _exit);
pDelFWriter->fDel.size = TSDB_FHDR_SIZE;
pDelFWriter->fDel.offset = 0;
*ppWriter = pDelFWriter;
_exit:
if (code) {
if (pDelFWriter) {
tsdbCloseFile(&pDelFWriter->pWriteH);
taosMemoryFree(pDelFWriter);
}
*ppWriter = NULL;
tsdbError("vgId:%d, %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(errno));
} else {
*ppWriter = pDelFWriter;
}
return code;
}
int32_t tsdbDelFWriterClose(SDelFWriter **ppWriter, int8_t sync) {
int32_t code = 0;
SDelFWriter *pWriter = *ppWriter;
STsdb *pTsdb = pWriter->pTsdb;
// sync
if (sync) {
code = tsdbFsyncFile(pWriter->pWriteH);
if (code) goto _err;
}
// close
tsdbCloseFile(&pWriter->pWriteH);
for (int32_t iBuf = 0; iBuf < sizeof(pWriter->aBuf) / sizeof(uint8_t *); iBuf++) {
tFree(pWriter->aBuf[iBuf]);
}
taosMemoryFree(pWriter);
*ppWriter = NULL;
return code;
_err:
tsdbError("vgId:%d, failed to close del file writer since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
return code;
}
int32_t tsdbWriteDelData(SDelFWriter *pWriter, SArray *aDelData, SDelIdx *pDelIdx) {
int32_t code = 0;
int64_t size;
int64_t n;
// prepare
size = 0;
for (int32_t iDelData = 0; iDelData < taosArrayGetSize(aDelData); iDelData++) {
size += tPutDelData(NULL, taosArrayGet(aDelData, iDelData));
}
// alloc
code = tRealloc(&pWriter->aBuf[0], size);
if (code) goto _err;
// build
n = 0;
for (int32_t iDelData = 0; iDelData < taosArrayGetSize(aDelData); iDelData++) {
n += tPutDelData(pWriter->aBuf[0] + n, taosArrayGet(aDelData, iDelData));
}
ASSERT(n == size);
// write
code = tsdbWriteFile(pWriter->pWriteH, pWriter->fDel.size, pWriter->aBuf[0], size);
if (code) goto _err;
// update
pDelIdx->offset = pWriter->fDel.size;
pDelIdx->size = size;
pWriter->fDel.size += size;
return code;
_err:
tsdbError("vgId:%d, failed to write del data since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code));
return code;
}
int32_t tsdbWriteDelIdx(SDelFWriter *pWriter, SArray *aDelIdx) {
int32_t code = 0;
int64_t size;
int64_t n;
SDelIdx *pDelIdx;
// prepare
size = 0;
for (int32_t iDelIdx = 0; iDelIdx < taosArrayGetSize(aDelIdx); iDelIdx++) {
size += tPutDelIdx(NULL, taosArrayGet(aDelIdx, iDelIdx));
}
// alloc
code = tRealloc(&pWriter->aBuf[0], size);
if (code) goto _err;
// build
n = 0;
for (int32_t iDelIdx = 0; iDelIdx < taosArrayGetSize(aDelIdx); iDelIdx++) {
n += tPutDelIdx(pWriter->aBuf[0] + n, taosArrayGet(aDelIdx, iDelIdx));
}
ASSERT(n == size);
// write
code = tsdbWriteFile(pWriter->pWriteH, pWriter->fDel.size, pWriter->aBuf[0], size);
if (code) goto _err;
// update
pWriter->fDel.offset = pWriter->fDel.size;
pWriter->fDel.size += size;
return code;
_err:
tsdbError("vgId:%d, write del idx failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code));
return code;
}
int32_t tsdbUpdateDelFileHdr(SDelFWriter *pWriter) {
int32_t code = 0;
char hdr[TSDB_FHDR_SIZE] = {0};
int64_t size = TSDB_FHDR_SIZE;
int64_t n;
// build
tPutDelFile(hdr, &pWriter->fDel);
// write
code = tsdbWriteFile(pWriter->pWriteH, 0, hdr, size);
if (code) goto _err;
return code;
_err:
tsdbError("vgId:%d, update del file hdr failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code));
return code;
}
// SDelFReader ====================================================
struct SDelFReader {
STsdb *pTsdb;

View File

@ -220,7 +220,7 @@ void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs)
isWeak, isBlock, msg, numOfMsgs, arrayPos, pMsg->info.handle);
if (!pVnode->restored) {
vGError("vgId:%d, msg:%p failed to process since restore not finished, type:%s", vgId, pMsg,
vGWarn("vgId:%d, msg:%p failed to process since restore not finished, type:%s", vgId, pMsg,
TMSG_INFO(pMsg->msgType));
terrno = TSDB_CODE_SYN_RESTORING;
vnodeHandleProposeError(pVnode, pMsg, TSDB_CODE_SYN_RESTORING);
@ -284,7 +284,7 @@ void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs)
vnodeIsMsgBlock(pMsg->msgType), msg, numOfMsgs, pMsg->info.handle);
if (!pVnode->restored) {
vGError("vgId:%d, msg:%p failed to process since restore not finished, type:%s", vgId, pMsg,
vGWarn("vgId:%d, msg:%p failed to process since restore not finished, type:%s", vgId, pMsg,
TMSG_INFO(pMsg->msgType));
vnodeHandleProposeError(pVnode, pMsg, TSDB_CODE_SYN_RESTORING);
rpcFreeCont(pMsg->pCont);
@ -567,17 +567,29 @@ static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx)
if (vnodeIsRoleLeader(pVnode)) {
// start to restore all stream tasks
if (tsDisableStream) {
streamMetaWUnLock(pMeta);
vInfo("vgId:%d, sync restore finished, not launch stream tasks, since stream tasks are disabled", vgId);
} else {
vInfo("vgId:%d sync restore finished, start to launch stream tasks", pVnode->config.vgId);
resetStreamTaskStatus(pVnode->pTq->pStreamMeta);
tqStreamTaskResetStatus(pVnode->pTq->pStreamMeta);
{
if (pMeta->startInfo.taskStarting == 1) {
pMeta->startInfo.restartCount += 1;
tqDebug("vgId:%d in start tasks procedure, inc restartCounter by 1, remaining restart:%d", vgId,
pMeta->startInfo.restartCount);
streamMetaWUnLock(pMeta);
} else {
pMeta->startInfo.taskStarting = 1;
streamMetaWUnLock(pMeta);
tqStreamTaskStartAsync(pMeta, &pVnode->msgCb, false);
}
}
}
} else {
streamMetaWUnLock(pMeta);
vInfo("vgId:%d, sync restore finished, not launch stream tasks since not leader", vgId);
}
streamMetaWUnLock(pMeta);
}
static void vnodeBecomeFollower(const SSyncFSM *pFsm) {
@ -594,7 +606,7 @@ static void vnodeBecomeFollower(const SSyncFSM *pFsm) {
if (pVnode->pTq) {
tqUpdateNodeStage(pVnode->pTq, false);
tqStopStreamTasks(pVnode->pTq);
tqStopStreamTasksAsync(pVnode->pTq);
}
}

View File

@ -105,6 +105,7 @@ typedef struct STableListInfo {
int32_t* groupOffset; // keep the offset value for each group in the tableList
SArray* pTableList;
SHashObj* map; // speedup acquire the tableQueryInfo by table uid
SHashObj* remainGroups; // remaining group has not yet processed the empty group
STableListIdInfo idInfo; // this maybe the super table or ordinary table
} STableListInfo;

View File

@ -217,6 +217,13 @@ enum {
TABLE_SCAN__BLOCK_ORDER = 2,
};
typedef enum ETableCountState {
TABLE_COUNT_STATE_NONE = 0, // before start scan
TABLE_COUNT_STATE_SCAN = 1, // cur group scanning
TABLE_COUNT_STATE_PROCESSED = 2, // cur group processed
TABLE_COUNT_STATE_END = 3, // finish or noneed to process
} ETableCountState;
typedef struct SAggSupporter {
SSHashObj* pResultRowHashTable; // quick locate the window object for each result
char* keyBuf; // window key buffer
@ -263,14 +270,16 @@ typedef struct STableScanInfo {
SSDataBlock* pResBlock;
SHashObj* pIgnoreTables;
SSampleExecInfo sample; // sample execution info
int32_t currentGroupId;
int32_t currentTable;
int32_t tableStartIndex; // current group scan start
int32_t tableEndIndex; // current group scan end
int32_t currentGroupIndex; // current group index of groupOffset
int8_t scanMode;
int8_t assignBlockUid;
uint8_t countState; // empty table count state
bool hasGroupByTag;
bool countOnly;
// TsdReader readerAPI;
bool filesetDelimited;
bool needCountEmptyTable;
} STableScanInfo;
typedef struct STableMergeScanInfo {
@ -298,7 +307,8 @@ typedef struct STableMergeScanInfo {
SHashObj* mSkipTables;
int64_t mergeLimit;
SSortExecInfo sortExecInfo;
bool needCountEmptyTable;
bool bGroupProcessed; // the group return data means processed
bool filesetDelimited;
bool bNewFilesetEvent;
bool bNextDurationBlockEvent;
@ -845,6 +855,8 @@ void resetWinRange(STimeWindow* winRange);
bool checkExpiredData(SStateStore* pAPI, SUpdateInfo* pUpdateInfo, STimeWindowAggSupp* pTwSup, uint64_t tableId, TSKEY ts);
int64_t getDeleteMark(SWindowPhysiNode* pWinPhyNode, int64_t interval);
void resetUnCloseSessionWinInfo(SSHashObj* winMap);
void setStreamOperatorCompleted(struct SOperatorInfo* pOperator);
void reloadAggSupFromDownStream(struct SOperatorInfo* downstream, SStreamAggSupporter* pAggSup);
int32_t encodeSSessionKey(void** buf, SSessionKey* key);
void* decodeSSessionKey(void* buf, SSessionKey* key);

View File

@ -54,10 +54,10 @@ static int32_t removeRedundantTsCol(SLastRowScanPhysiNode* pScanNode, SColM
#define SCAN_ROW_TYPE(_t) ((_t) ? CACHESCAN_RETRIEVE_LAST : CACHESCAN_RETRIEVE_LAST_ROW)
static void setColIdForCacheReadBlock(SSDataBlock* pBlock, SNodeList* pTargets) {
static void setColIdForCacheReadBlock(SSDataBlock* pBlock, SLastRowScanPhysiNode* pScan) {
SNode* pNode;
int32_t idx = 0;
FOREACH(pNode, pTargets) {
FOREACH(pNode, pScan->pTargets) {
if (nodeType(pNode) == QUERY_NODE_COLUMN) {
SColumnNode* pCol = (SColumnNode*)pNode;
SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, idx);
@ -65,6 +65,19 @@ static void setColIdForCacheReadBlock(SSDataBlock* pBlock, SNodeList* pTargets)
}
idx++;
}
for (; idx < pBlock->pDataBlock->size; ++idx) {
SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, idx);
if (pScan->scan.pScanPseudoCols) {
FOREACH(pNode, pScan->scan.pScanPseudoCols) {
STargetNode* pTarget = (STargetNode*)pNode;
if (pColInfo->info.slotId == pTarget->slotId) {
pColInfo->info.colId = 0;
break;
}
}
}
}
}
SOperatorInfo* createCacherowsScanOperator(SLastRowScanPhysiNode* pScanNode, SReadHandle* readHandle,
@ -127,12 +140,12 @@ SOperatorInfo* createCacherowsScanOperator(SLastRowScanPhysiNode* pScanNode, SRe
capacity = TMIN(totalTables, 4096);
pInfo->pBufferredRes = createOneDataBlock(pInfo->pRes, false);
setColIdForCacheReadBlock(pInfo->pBufferredRes, pScanNode->pTargets);
setColIdForCacheReadBlock(pInfo->pBufferredRes, pScanNode);
blockDataEnsureCapacity(pInfo->pBufferredRes, capacity);
} else { // by tags
pInfo->retrieveType = CACHESCAN_RETRIEVE_TYPE_SINGLE | SCAN_ROW_TYPE(pScanNode->ignoreNull);
capacity = 1; // only one row output
setColIdForCacheReadBlock(pInfo->pRes, pScanNode->pTargets);
setColIdForCacheReadBlock(pInfo->pRes, pScanNode);
}
initResultSizeInfo(&pOperator->resultInfo, capacity);

View File

@ -16,16 +16,14 @@
#include "executorInt.h"
#include "filter.h"
#include "function.h"
#include "index.h"
#include "operator.h"
#include "os.h"
#include "query.h"
#include "querytask.h"
#include "tdatablock.h"
#include "thash.h"
#include "tmsg.h"
#include "tname.h"
#include "tref.h"
#include "trpc.h"
typedef struct SFetchRspHandleWrapper {
uint32_t exchangeId;
@ -131,14 +129,14 @@ static void concurrentlyLoadRemoteDataImpl(SOperatorInfo* pOperator, SExchangeIn
if (pRsp->completed == 1) {
pDataInfo->status = EX_SOURCE_DATA_EXHAUSTED;
qDebug("%s fetch msg rsp from vgId:%d, taskId:0x%" PRIx64
" execId:%d index:%d completed, blocks:%d, numOfRows:%" PRId64 ", rowsOfSource:%" PRIu64 ", totalRows:%" PRIu64
", total:%.2f Kb, try next %d/%" PRIzu,
" execId:%d index:%d completed, blocks:%d, numOfRows:%" PRId64 ", rowsOfSource:%" PRIu64
", totalRows:%" PRIu64 ", total:%.2f Kb, try next %d/%" PRIzu,
GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pSource->execId, i, pRsp->numOfBlocks,
pRsp->numOfRows, pDataInfo->totalRows, pLoadInfo->totalRows, pLoadInfo->totalSize / 1024.0, i + 1,
totalSources);
} else {
qDebug("%s fetch msg rsp from vgId:%d, taskId:0x%" PRIx64
" execId:%d blocks:%d, numOfRows:%" PRId64 ", totalRows:%" PRIu64 ", total:%.2f Kb",
qDebug("%s fetch msg rsp from vgId:%d, taskId:0x%" PRIx64 " execId:%d blocks:%d, numOfRows:%" PRId64
", totalRows:%" PRIu64 ", total:%.2f Kb",
GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pSource->execId, pRsp->numOfBlocks,
pRsp->numOfRows, pLoadInfo->totalRows, pLoadInfo->totalSize / 1024.0);
}
@ -343,8 +341,8 @@ SOperatorInfo* createExchangeOperatorInfo(void* pTransporter, SExchangePhysiNode
goto _error;
}
pOperator->fpSet =
createOperatorFpSet(prepareLoadRemoteData, loadRemoteData, NULL, destroyExchangeOperatorInfo, optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL);
pOperator->fpSet = createOperatorFpSet(prepareLoadRemoteData, loadRemoteData, NULL, destroyExchangeOperatorInfo,
optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL);
return pOperator;
_error:
@ -411,14 +409,19 @@ int32_t loadRemoteDataCallback(void* param, SDataBuf* pMsg, int32_t code) {
pRsp->useconds = htobe64(pRsp->useconds);
pRsp->numOfBlocks = htonl(pRsp->numOfBlocks);
qDebug("%s fetch rsp received, index:%d, blocks:%d, rows:%" PRId64 ", %p", pSourceDataInfo->taskId, index, pRsp->numOfBlocks,
pRsp->numOfRows, pExchangeInfo);
qDebug("%s fetch rsp received, index:%d, blocks:%d, rows:%" PRId64 ", %p", pSourceDataInfo->taskId, index,
pRsp->numOfBlocks, pRsp->numOfRows, pExchangeInfo);
} else {
taosMemoryFree(pMsg->pData);
pSourceDataInfo->code = code;
qDebug("%s fetch rsp received, index:%d, error:%s, %p", pSourceDataInfo->taskId, index, tstrerror(code),
pSourceDataInfo->code = rpcCvtErrCode(code);
if (pSourceDataInfo->code != code) {
qError("%s fetch rsp received, index:%d, error:%s, cvted error: %s, %p", pSourceDataInfo->taskId, index,
tstrerror(code), tstrerror(pSourceDataInfo->code), pExchangeInfo);
} else {
qError("%s fetch rsp received, index:%d, error:%s, %p", pSourceDataInfo->taskId, index, tstrerror(code),
pExchangeInfo);
}
}
pSourceDataInfo->status = EX_SOURCE_DATA_READY;
code = tsem_post(&pExchangeInfo->ready);
@ -459,7 +462,6 @@ int32_t buildTableScanOperatorParam(SOperatorParam** ppRes, SArray* pUidList, in
return TSDB_CODE_SUCCESS;
}
int32_t doSendFetchDataRequest(SExchangeInfo* pExchangeInfo, SExecTaskInfo* pTaskInfo, int32_t sourceIndex) {
SSourceDataInfo* pDataInfo = taosArrayGet(pExchangeInfo->pSourceDataInfo, sourceIndex);
if (EX_SOURCE_DATA_NOT_READY != pDataInfo->status) {
@ -490,7 +492,8 @@ int32_t doSendFetchDataRequest(SExchangeInfo* pExchangeInfo, SExecTaskInfo* pTas
req.queryId = pTaskInfo->id.queryId;
req.execId = pSource->execId;
if (pDataInfo->pSrcUidList) {
int32_t code = buildTableScanOperatorParam(&req.pOpParam, pDataInfo->pSrcUidList, pDataInfo->srcOpType, pDataInfo->tableSeq);
int32_t code =
buildTableScanOperatorParam(&req.pOpParam, pDataInfo->pSrcUidList, pDataInfo->srcOpType, pDataInfo->tableSeq);
taosArrayDestroy(pDataInfo->pSrcUidList);
pDataInfo->pSrcUidList = NULL;
if (TSDB_CODE_SUCCESS != code) {
@ -741,8 +744,8 @@ int32_t seqLoadRemoteData(SOperatorInfo* pOperator) {
SRetrieveTableRsp* pRetrieveRsp = pDataInfo->pRsp;
if (pRsp->completed == 1) {
qDebug("%s fetch msg rsp from vgId:%d, taskId:0x%" PRIx64 " execId:%d numOfRows:%" PRId64 ", rowsOfSource:%" PRIu64
", totalRows:%" PRIu64 ", totalBytes:%" PRIu64 " try next %d/%" PRIzu,
qDebug("%s fetch msg rsp from vgId:%d, taskId:0x%" PRIx64 " execId:%d numOfRows:%" PRId64
", rowsOfSource:%" PRIu64 ", totalRows:%" PRIu64 ", totalBytes:%" PRIu64 " try next %d/%" PRIzu,
GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pSource->execId, pRetrieveRsp->numOfRows,
pDataInfo->totalRows, pLoadInfo->totalRows, pLoadInfo->totalSize, pExchangeInfo->current + 1,
totalSources);
@ -800,7 +803,6 @@ int32_t addSingleExchangeSource(SOperatorInfo* pOperator, SExchangeOperatorBasic
return TSDB_CODE_SUCCESS;
}
int32_t addDynamicExchangeSource(SOperatorInfo* pOperator) {
SExchangeInfo* pExchangeInfo = pOperator->info;
int32_t code = TSDB_CODE_SUCCESS;
@ -826,11 +828,11 @@ int32_t addDynamicExchangeSource(SOperatorInfo* pOperator) {
return TSDB_CODE_SUCCESS;
}
int32_t prepareLoadRemoteData(SOperatorInfo* pOperator) {
SExchangeInfo* pExchangeInfo = pOperator->info;
int32_t code = TSDB_CODE_SUCCESS;
if ((OPTR_IS_OPENED(pOperator) && !pExchangeInfo->dynamicOp) || (pExchangeInfo->dynamicOp && NULL == pOperator->pOperatorGetParam)) {
if ((OPTR_IS_OPENED(pOperator) && !pExchangeInfo->dynamicOp) ||
(pExchangeInfo->dynamicOp && NULL == pOperator->pOperatorGetParam)) {
return TSDB_CODE_SUCCESS;
}

View File

@ -456,7 +456,7 @@ static void genTbGroupDigest(const SNode* pGroup, uint8_t* filterDigest, T_MD5_C
}
int32_t getColInfoResultForGroupby(void* pVnode, SNodeList* group, STableListInfo* pTableListInfo, uint8_t* digest,
SStorageAPI* pAPI) {
SStorageAPI* pAPI, bool initRemainGroups) {
int32_t code = TSDB_CODE_SUCCESS;
SArray* pBlockList = NULL;
SSDataBlock* pResBlock = NULL;
@ -590,6 +590,15 @@ int32_t getColInfoResultForGroupby(void* pVnode, SNodeList* group, STableListInf
goto end;
}
if (initRemainGroups) {
pTableListInfo->remainGroups =
taosHashInit(rows, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_NO_LOCK);
if (pTableListInfo->remainGroups == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto end;
}
}
for (int i = 0; i < rows; i++) {
STableKeyInfo* info = taosArrayGet(pTableListInfo->pTableList, i);
@ -631,6 +640,14 @@ int32_t getColInfoResultForGroupby(void* pVnode, SNodeList* group, STableListInf
int32_t len = (int32_t)(pStart - (char*)keyBuf);
info->groupId = calcGroupId(keyBuf, len);
if (initRemainGroups) {
// groupId ~ table uid
taosHashPut(pTableListInfo->remainGroups, &(info->groupId), sizeof(info->groupId), &(info->uid), sizeof(info->uid));
}
}
if (initRemainGroups) {
pTableListInfo->numOfOuputGroups = taosHashGetSize(pTableListInfo->remainGroups);
}
if (tsTagFilterCache) {
@ -2025,6 +2042,7 @@ STableListInfo* tableListCreate() {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return NULL;
}
pListInfo->remainGroups = NULL;
pListInfo->pTableList = taosArrayInit(4, sizeof(STableKeyInfo));
if (pListInfo->pTableList == NULL) {
@ -2054,7 +2072,7 @@ void* tableListDestroy(STableListInfo* pTableListInfo) {
taosMemoryFreeClear(pTableListInfo->groupOffset);
taosHashCleanup(pTableListInfo->map);
taosHashCleanup(pTableListInfo->remainGroups);
pTableListInfo->pTableList = NULL;
pTableListInfo->map = NULL;
taosMemoryFree(pTableListInfo);
@ -2068,6 +2086,7 @@ void tableListClear(STableListInfo* pTableListInfo) {
taosArrayClear(pTableListInfo->pTableList);
taosHashClear(pTableListInfo->map);
taosHashClear(pTableListInfo->remainGroups);
taosMemoryFree(pTableListInfo->groupOffset);
pTableListInfo->numOfOuputGroups = 1;
pTableListInfo->oneTableForEachGroup = false;
@ -2122,6 +2141,9 @@ int32_t buildGroupIdMapForAllTables(STableListInfo* pTableListInfo, SReadHandle*
bool groupByTbname = groupbyTbname(group);
size_t numOfTables = taosArrayGetSize(pTableListInfo->pTableList);
if (!numOfTables) {
return code;
}
if (group == NULL || groupByTbname) {
for (int32_t i = 0; i < numOfTables; i++) {
STableKeyInfo* info = taosArrayGet(pTableListInfo->pTableList, i);
@ -2139,11 +2161,18 @@ int32_t buildGroupIdMapForAllTables(STableListInfo* pTableListInfo, SReadHandle*
pTableListInfo->numOfOuputGroups = 1;
}
} else {
code = getColInfoResultForGroupby(pHandle->vnode, group, pTableListInfo, digest, pAPI);
bool initRemainGroups = false;
if (QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN == nodeType(pScanNode)) {
STableScanPhysiNode* pTableScanNode = (STableScanPhysiNode*)pScanNode;
if (tsCountAlwaysReturnValue && pTableScanNode->needCountEmptyTable && !(groupSort || pScanNode->groupOrderScan)) {
initRemainGroups = true;
}
}
code = getColInfoResultForGroupby(pHandle->vnode, group, pTableListInfo, digest, pAPI, initRemainGroups);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
if (pScanNode->groupOrderScan) pTableListInfo->numOfOuputGroups = taosArrayGetSize(pTableListInfo->pTableList);
if (groupSort || pScanNode->groupOrderScan) {
code = sortTableGroup(pTableListInfo);
@ -2244,8 +2273,11 @@ void printDataBlock(SSDataBlock* pBlock, const char* flag, const char* taskIdStr
}
void printSpecDataBlock(SSDataBlock* pBlock, const char* flag, const char* opStr, const char* taskIdStr) {
if (!pBlock || pBlock->info.rows == 0) {
qDebug("%s===stream===%s: Block is Null or Empty", taskIdStr, flag);
if (!pBlock) {
qDebug("%s===stream===%s %s: Block is Null", taskIdStr, flag, opStr);
return;
} else if (pBlock->info.rows == 0) {
qDebug("%s===stream===%s %s: Block is Empty. block type %d", taskIdStr, flag, opStr, pBlock->info.type);
return;
}
if (qDebugFlag & DEBUG_DEBUG) {

View File

@ -1209,7 +1209,7 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subT
STableKeyInfo* pTableInfo = tableListGetInfo(pTableListInfo, 0);
uid = pTableInfo->uid;
ts = INT64_MIN;
pScanInfo->currentTable = 0;
pScanInfo->tableEndIndex = 0;
} else {
taosRUnLockLatch(&pTaskInfo->lock);
qError("no table in table list, %s", id);
@ -1223,16 +1223,16 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subT
pInfo->pTableScanOp->resultInfo.totalRows = 0;
// start from current accessed position
// we cannot start from the pScanInfo->currentTable, since the commit offset may cause the rollback of the start
// we cannot start from the pScanInfo->tableEndIndex, since the commit offset may cause the rollback of the start
// position, let's find it from the beginning.
index = tableListFind(pTableListInfo, uid, 0);
taosRUnLockLatch(&pTaskInfo->lock);
if (index >= 0) {
pScanInfo->currentTable = index;
pScanInfo->tableEndIndex = index;
} else {
qError("vgId:%d uid:%" PRIu64 " not found in table list, total:%d, index:%d %s", pTaskInfo->id.vgId, uid,
numOfTables, pScanInfo->currentTable, id);
numOfTables, pScanInfo->tableEndIndex, id);
terrno = TSDB_CODE_PAR_INTERNAL_ERROR;
return -1;
}
@ -1255,12 +1255,12 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subT
}
qDebug("tsdb reader created with offset(snapshot) uid:%" PRId64 " ts:%" PRId64 " table index:%d, total:%d, %s",
uid, pScanBaseInfo->cond.twindows.skey, pScanInfo->currentTable, numOfTables, id);
uid, pScanBaseInfo->cond.twindows.skey, pScanInfo->tableEndIndex, numOfTables, id);
} else {
pTaskInfo->storageAPI.tsdReader.tsdSetQueryTableList(pScanBaseInfo->dataReader, &keyInfo, 1);
pTaskInfo->storageAPI.tsdReader.tsdReaderResetStatus(pScanBaseInfo->dataReader, &pScanBaseInfo->cond);
qDebug("tsdb reader offset seek snapshot to uid:%" PRId64 " ts %" PRId64 " table index:%d numOfTable:%d, %s",
uid, pScanBaseInfo->cond.twindows.skey, pScanInfo->currentTable, numOfTables, id);
uid, pScanBaseInfo->cond.twindows.skey, pScanInfo->tableEndIndex, numOfTables, id);
}
// restore the key value

View File

@ -655,6 +655,76 @@ void setTbNameColData(const SSDataBlock* pBlock, SColumnInfoData* pColInfoData,
colDataDestroy(&infoData);
}
static void initNextGroupScan(STableScanInfo* pInfo, STableKeyInfo** pKeyInfo, int32_t* size) {
pInfo->tableStartIndex = pInfo->tableEndIndex + 1;
STableListInfo* pTableListInfo = pInfo->base.pTableListInfo;
int32_t numOfTables = tableListGetSize(pTableListInfo);
STableKeyInfo* pStart = (STableKeyInfo*)tableListGetInfo(pTableListInfo, pInfo->tableStartIndex);
if (pTableListInfo->oneTableForEachGroup) {
pInfo->tableEndIndex = pInfo->tableStartIndex;
} else if (pTableListInfo->groupOffset) {
pInfo->currentGroupIndex++;
if (pInfo->currentGroupIndex + 1 < pTableListInfo->numOfOuputGroups) {
pInfo->tableEndIndex = pTableListInfo->groupOffset[pInfo->currentGroupIndex + 1] - 1;
} else {
pInfo->tableEndIndex = numOfTables - 1;
}
} else {
pInfo->tableEndIndex = numOfTables - 1;
}
if (!pInfo->needCountEmptyTable) {
pInfo->countState = TABLE_COUNT_STATE_END;
} else {
pInfo->countState = TABLE_COUNT_STATE_SCAN;
}
*pKeyInfo = pStart;
*size = pInfo->tableEndIndex - pInfo->tableStartIndex + 1;
}
void markGroupProcessed(STableScanInfo* pInfo, uint64_t groupId) {
if (pInfo->countState == TABLE_COUNT_STATE_END) {
return;
}
if (pInfo->base.pTableListInfo->oneTableForEachGroup || pInfo->base.pTableListInfo->groupOffset) {
pInfo->countState = TABLE_COUNT_STATE_PROCESSED;
} else {
taosHashRemove(pInfo->base.pTableListInfo->remainGroups, &groupId, sizeof(groupId));
}
}
static SSDataBlock* getOneRowResultBlock(SExecTaskInfo* pTaskInfo, STableScanBase* pBase, SSDataBlock* pBlock,
const STableKeyInfo* tbInfo) {
blockDataEmpty(pBlock);
pBlock->info.rows = 1;
pBlock->info.id.uid = tbInfo->uid;
pBlock->info.id.groupId = tbInfo->groupId;
// only one row: set all col data to null & hasNull
int32_t col_num = blockDataGetNumOfCols(pBlock);
for (int32_t i = 0; i < col_num; ++i) {
SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, i);
colDataSetNULL(pColInfoData, 0);
}
// set tag/tbname
doSetTagColumnData(pBase, pBlock, pTaskInfo, pBlock->info.rows);
return pBlock;
}
static SSDataBlock* getBlockForEmptyTable(SOperatorInfo* pOperator, const STableKeyInfo* tbInfo) {
STableScanInfo* pTableScanInfo = pOperator->info;
SSDataBlock* pBlock =
getOneRowResultBlock(pOperator->pTaskInfo, &pTableScanInfo->base, pTableScanInfo->pResBlock, tbInfo);
pOperator->resultInfo.totalRows++;
return pBlock;
}
static SSDataBlock* doTableScanImpl(SOperatorInfo* pOperator) {
STableScanInfo* pTableScanInfo = pOperator->info;
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
@ -736,6 +806,7 @@ static SSDataBlock* doGroupedTableScan(SOperatorInfo* pOperator) {
while (pTableScanInfo->scanTimes < pTableScanInfo->scanInfo.numOfAsc) {
SSDataBlock* p = doTableScanImpl(pOperator);
if (p != NULL) {
markGroupProcessed(pTableScanInfo, p->info.id.groupId);
return p;
}
@ -764,6 +835,7 @@ static SSDataBlock* doGroupedTableScan(SOperatorInfo* pOperator) {
while (pTableScanInfo->scanTimes < total) {
SSDataBlock* p = doTableScanImpl(pOperator);
if (p != NULL) {
markGroupProcessed(pTableScanInfo, p->info.id.groupId);
return p;
}
@ -780,6 +852,33 @@ static SSDataBlock* doGroupedTableScan(SOperatorInfo* pOperator) {
}
}
if (pTableScanInfo->countState < TABLE_COUNT_STATE_END) {
STableListInfo* pTableListInfo = pTableScanInfo->base.pTableListInfo;
if (pTableListInfo->oneTableForEachGroup || pTableListInfo->groupOffset) { // group by tbname, group by tag + sort
if (pTableScanInfo->countState < TABLE_COUNT_STATE_PROCESSED) {
pTableScanInfo->countState = TABLE_COUNT_STATE_PROCESSED;
STableKeyInfo* pStart =
(STableKeyInfo*)tableListGetInfo(pTableScanInfo->base.pTableListInfo, pTableScanInfo->tableStartIndex);
return getBlockForEmptyTable(pOperator, pStart);
}
} else { // group by tag + no sort
int32_t numOfTables = tableListGetSize(pTableListInfo);
if (pTableScanInfo->tableEndIndex + 1 >= numOfTables) {
// get empty group, mark processed & rm from hash
void* pIte = taosHashIterate(pTableListInfo->remainGroups, NULL);
if (pIte != NULL) {
size_t keySize = 0;
uint64_t* pGroupId = taosHashGetKey(pIte, &keySize);
STableKeyInfo info = {.uid = *(uint64_t*)pIte, .groupId = *pGroupId};
taosHashCancelIterate(pTableListInfo->remainGroups, pIte);
markGroupProcessed(pTableScanInfo, *pGroupId);
return getBlockForEmptyTable(pOperator, &info);
}
}
}
pTableScanInfo->countState = TABLE_COUNT_STATE_END;
}
return NULL;
}
@ -839,8 +938,8 @@ static SSDataBlock* startNextGroupScan(SOperatorInfo* pOperator) {
STableScanInfo* pInfo = pOperator->info;
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
SStorageAPI* pAPI = &pTaskInfo->storageAPI;
if ((++pInfo->currentGroupId) >= tableListGetOutputGroups(pInfo->base.pTableListInfo)) {
int32_t numOfTables = tableListGetSize(pInfo->base.pTableListInfo);
if (pInfo->tableEndIndex + 1 >= numOfTables) {
setOperatorCompleted(pOperator);
if (pOperator->dynamicTask) {
taosArrayClear(pInfo->base.pTableListInfo->pTableList);
@ -855,7 +954,7 @@ static SSDataBlock* startNextGroupScan(SOperatorInfo* pOperator) {
int32_t num = 0;
STableKeyInfo* pList = NULL;
tableListGetGroupList(pInfo->base.pTableListInfo, pInfo->currentGroupId, &pList, &num);
initNextGroupScan(pInfo, &pList, &num);
pAPI->tsdReader.tsdSetQueryTableList(pInfo->base.dataReader, pList, num);
pAPI->tsdReader.tsdReaderResetStatus(pInfo->base.dataReader, &pInfo->base.cond);
@ -876,16 +975,17 @@ static SSDataBlock* groupSeqTableScan(SOperatorInfo* pOperator) {
STableScanInfo* pInfo = pOperator->info;
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
SStorageAPI* pAPI = &pTaskInfo->storageAPI;
int32_t num = 0;
STableKeyInfo* pList = NULL;
if (pInfo->currentGroupId == -1) {
if ((++pInfo->currentGroupId) >= tableListGetOutputGroups(pInfo->base.pTableListInfo)) {
if (pInfo->tableEndIndex == -1) {
int32_t numOfTables = tableListGetSize(pInfo->base.pTableListInfo);
if (pInfo->tableEndIndex + 1 == numOfTables) {
setOperatorCompleted(pOperator);
return NULL;
}
int32_t num = 0;
STableKeyInfo* pList = NULL;
tableListGetGroupList(pInfo->base.pTableListInfo, pInfo->currentGroupId, &pList, &num);
initNextGroupScan(pInfo, &pList, &num);
ASSERT(pInfo->base.dataReader == NULL);
int32_t code = pAPI->tsdReader.tsdReaderOpen(pInfo->base.readHandle.vnode, &pInfo->base.cond, pList, num, pInfo->pResBlock,
@ -934,7 +1034,7 @@ static SSDataBlock* doTableScan(SOperatorInfo* pOperator) {
T_LONG_JMP(pTaskInfo->env, code);
}
if (pOperator->status == OP_EXEC_DONE) {
pInfo->currentGroupId = -1;
pInfo->tableEndIndex = -1;
pOperator->status = OP_OPENED;
SSDataBlock* result = NULL;
while (true) {
@ -950,6 +1050,7 @@ static SSDataBlock* doTableScan(SOperatorInfo* pOperator) {
if (pInfo->scanMode == TABLE_SCAN__TABLE_ORDER) {
int32_t numOfTables = 0; // tableListGetSize(pTaskInfo->pTableListInfo);
STableKeyInfo tInfo = {0};
pInfo->countState = TABLE_COUNT_STATE_END;
while (1) {
SSDataBlock* result = doGroupedTableScan(pOperator);
@ -958,23 +1059,23 @@ static SSDataBlock* doTableScan(SOperatorInfo* pOperator) {
}
// if no data, switch to next table and continue scan
pInfo->currentTable++;
pInfo->tableEndIndex++;
taosRLockLatch(&pTaskInfo->lock);
numOfTables = tableListGetSize(pInfo->base.pTableListInfo);
if (pInfo->currentTable >= numOfTables) {
if (pInfo->tableEndIndex >= numOfTables) {
qDebug("all table checked in table list, total:%d, return NULL, %s", numOfTables, GET_TASKID(pTaskInfo));
taosRUnLockLatch(&pTaskInfo->lock);
return NULL;
}
tInfo = *(STableKeyInfo*)tableListGetInfo(pInfo->base.pTableListInfo, pInfo->currentTable);
tInfo = *(STableKeyInfo*)tableListGetInfo(pInfo->base.pTableListInfo, pInfo->tableEndIndex);
taosRUnLockLatch(&pTaskInfo->lock);
pAPI->tsdReader.tsdSetQueryTableList(pInfo->base.dataReader, &tInfo, 1);
qDebug("set uid:%" PRIu64 " into scanner, total tables:%d, index:%d/%d %s", tInfo.uid, numOfTables,
pInfo->currentTable, numOfTables, GET_TASKID(pTaskInfo));
pInfo->tableEndIndex, numOfTables, GET_TASKID(pTaskInfo));
pAPI->tsdReader.tsdReaderResetStatus(pInfo->base.dataReader, &pInfo->base.cond);
pInfo->scanTimes = 0;
@ -1067,7 +1168,8 @@ SOperatorInfo* createTableScanOperatorInfo(STableScanPhysiNode* pTableScanNode,
goto _error;
}
pInfo->currentGroupId = -1;
pInfo->tableEndIndex = -1;
pInfo->currentGroupIndex = -1;
pInfo->assignBlockUid = pTableScanNode->assignBlockUid;
pInfo->hasGroupByTag = pTableScanNode->pGroupTags ? true : false;
@ -1075,6 +1177,8 @@ SOperatorInfo* createTableScanOperatorInfo(STableScanPhysiNode* pTableScanNode,
pTaskInfo);
pOperator->exprSupp.numOfExprs = numOfCols;
pInfo->needCountEmptyTable = tsCountAlwaysReturnValue && pTableScanNode->needCountEmptyTable;
pInfo->base.pTableListInfo = pTableListInfo;
pInfo->base.metaCache.pTableMetaEntryCache = taosLRUCacheInit(1024 * 128, -1, .5);
if (pInfo->base.metaCache.pTableMetaEntryCache == NULL) {
@ -1160,7 +1264,7 @@ void resetTableScanInfo(STableScanInfo* pTableScanInfo, STimeWindow* pWin, uint6
pTableScanInfo->base.cond.startVersion = 0;
pTableScanInfo->base.cond.endVersion = ver;
pTableScanInfo->scanTimes = 0;
pTableScanInfo->currentGroupId = -1;
pTableScanInfo->tableEndIndex = -1;
pTableScanInfo->base.readerAPI.tsdReaderClose(pTableScanInfo->base.dataReader);
pTableScanInfo->base.dataReader = NULL;
}
@ -1961,8 +2065,10 @@ static void setBlockGroupIdByUid(SStreamScanInfo* pInfo, SSDataBlock* pBlock) {
}
static void doCheckUpdate(SStreamScanInfo* pInfo, TSKEY endKey, SSDataBlock* pBlock) {
if (!pInfo->igCheckUpdate && pInfo->pUpdateInfo) {
if (pInfo->pUpdateInfo) {
pInfo->pUpdateInfo->maxDataVersion = TMAX(pInfo->pUpdateInfo->maxDataVersion, pBlock->info.version);
}
if (!pInfo->igCheckUpdate && pInfo->pUpdateInfo) {
checkUpdateData(pInfo, true, pBlock, true);
pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, endKey);
if (pInfo->pUpdateDataRes->info.rows > 0) {
@ -1997,7 +2103,6 @@ void streamScanOperatorSaveCheckpoint(SStreamScanInfo* pInfo) {
int32_t len = streamScanOperatorEncode(pInfo, &pBuf);
pInfo->stateStore.streamStateSaveInfo(pInfo->pState, STREAM_SCAN_OP_CHECKPOINT_NAME, strlen(STREAM_SCAN_OP_CHECKPOINT_NAME), pBuf, len);
taosMemoryFree(pBuf);
pInfo->stateStore.streamStateCommit(pInfo->pState);
}
// other properties are recovered from the execution plan
@ -2062,7 +2167,7 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) {
pInfo->pTableScanOp->status = OP_OPENED;
pTSInfo->scanTimes = 0;
pTSInfo->currentGroupId = -1;
pTSInfo->tableEndIndex = -1;
}
if (pStreamInfo->recoverStep == STREAM_RECOVER_STEP__SCAN1) {
@ -2316,6 +2421,7 @@ FETCH_NEXT_BLOCK:
doCheckUpdate(pInfo, pBlockInfo->window.ekey, pInfo->pRes);
doFilter(pInfo->pRes, pOperator->exprSupp.pFilterInfo, NULL);
blockDataUpdateTsWindow(pInfo->pRes, pInfo->primaryTsIndex);
int64_t numOfUpdateRes = pInfo->pUpdateDataRes->info.rows;
qDebug("%s %" PRId64 " rows in datablock, update res:%" PRId64, id, pBlockInfo->rows, numOfUpdateRes);
@ -3477,7 +3583,7 @@ int32_t startGroupTableMergeScan(SOperatorInfo* pOperator) {
}
pInfo->tableEndIndex = i - 1;
}
pInfo->bGroupProcessed = false;
int32_t tableStartIdx = pInfo->tableStartIndex;
int32_t tableEndIdx = pInfo->tableEndIndex;
@ -3599,9 +3705,14 @@ SSDataBlock* doTableMergeScan(SOperatorInfo* pOperator) {
pBlock = getSortedTableMergeScanBlockData(pInfo->pSortHandle, pInfo->pResBlock, pOperator->resultInfo.capacity,
pOperator);
if (pBlock == NULL && !pInfo->bGroupProcessed && pInfo->needCountEmptyTable) {
STableKeyInfo* tbInfo = tableListGetInfo(pInfo->base.pTableListInfo, pInfo->tableStartIndex);
pBlock = getOneRowResultBlock(pTaskInfo, &pInfo->base, pInfo->pResBlock, tbInfo);
}
if (pBlock != NULL) {
pBlock->info.id.groupId = pInfo->groupId;
pOperator->resultInfo.totalRows += pBlock->info.rows;
pInfo->bGroupProcessed = true;
return pBlock;
} else {
if (pInfo->bNewFilesetEvent) {
@ -3756,6 +3867,8 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN
} else {
pInfo->filesetDelimited = pTableScanNode->filesetDelimited;
}
pInfo->needCountEmptyTable = tsCountAlwaysReturnValue && pTableScanNode->needCountEmptyTable;
setOperatorInfo(pOperator, "TableMergeScanOperator", QUERY_NODE_PHYSICAL_PLAN_TABLE_MERGE_SCAN, false, OP_NOT_OPENED,
pInfo, pTaskInfo);
pOperator->exprSupp.numOfExprs = numOfCols;

View File

@ -480,18 +480,18 @@ static SSDataBlock* doStreamEventAgg(SOperatorInfo* pOperator) {
return resBlock;
}
if (pInfo->recvGetAll) {
pInfo->recvGetAll = false;
resetUnCloseSessionWinInfo(pInfo->streamAggSup.pResultRows);
}
if (pInfo->reCkBlock) {
pInfo->reCkBlock = false;
printDataBlock(pInfo->pCheckpointRes, getStreamOpName(pOperator->operatorType), GET_TASKID(pTaskInfo));
return pInfo->pCheckpointRes;
}
if (pInfo->recvGetAll) {
pInfo->recvGetAll = false;
resetUnCloseSessionWinInfo(pInfo->streamAggSup.pResultRows);
}
setOperatorCompleted(pOperator);
setStreamOperatorCompleted(pOperator);
return NULL;
}
@ -561,7 +561,7 @@ static SSDataBlock* doStreamEventAgg(SOperatorInfo* pOperator) {
if (resBlock != NULL) {
return resBlock;
}
setOperatorCompleted(pOperator);
setStreamOperatorCompleted(pOperator);
return NULL;
}
@ -622,6 +622,7 @@ void streamEventReloadState(SOperatorInfo* pOperator) {
}
setEventWindowFlag(pAggSup, &curInfo);
if (!curInfo.pWinFlag->startFlag || curInfo.pWinFlag->endFlag) {
saveSessionOutputBuf(pAggSup, &curInfo.winInfo);
continue;
}
@ -653,6 +654,7 @@ void streamEventReloadState(SOperatorInfo* pOperator) {
if (downstream->fpSet.reloadStreamStateFn) {
downstream->fpSet.reloadStreamStateFn(downstream);
}
reloadAggSupFromDownStream(downstream, &pInfo->streamAggSup);
}
SOperatorInfo* createStreamEventAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode,

View File

@ -801,10 +801,24 @@ static void doStreamIntervalAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDat
SRowBuffPos* pResPos = NULL;
SResultRow* pResult = NULL;
int32_t forwardRows = 0;
int32_t endRowId = pSDataBlock->info.rows - 1;
SColumnInfoData* pColDataInfo = taosArrayGet(pSDataBlock->pDataBlock, pInfo->primaryTsIndex);
tsCols = (int64_t*)pColDataInfo->pData;
if (pSDataBlock->info.window.skey != tsCols[0] || pSDataBlock->info.window.ekey != tsCols[endRowId]) {
qError("table uid %" PRIu64 " data block timestamp range may not be calculated! minKey %" PRId64
",maxKey %" PRId64,
pSDataBlock->info.id.uid, pSDataBlock->info.window.skey, pSDataBlock->info.window.ekey);
blockDataUpdateTsWindow(pSDataBlock, pInfo->primaryTsIndex);
// timestamp of the data is incorrect
if (pSDataBlock->info.window.skey <= 0 || pSDataBlock->info.window.ekey <= 0) {
qError("table uid %" PRIu64 " data block timestamp is out of range! minKey %" PRId64 ",maxKey %" PRId64,
pSDataBlock->info.id.uid, pSDataBlock->info.window.skey, pSDataBlock->info.window.ekey);
}
}
int32_t startPos = 0;
TSKEY ts = getStartTsKey(&pSDataBlock->info.window, tsCols);
STimeWindow nextWin = {0};
@ -902,19 +916,6 @@ static void doStreamIntervalAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDat
pInfo->delKey = key;
}
int32_t prevEndPos = (forwardRows - 1) * step + startPos;
if (pSDataBlock->info.window.skey <= 0 || pSDataBlock->info.window.ekey <= 0) {
qError("table uid %" PRIu64 " data block timestamp range may not be calculated! minKey %" PRId64
",maxKey %" PRId64,
pSDataBlock->info.id.uid, pSDataBlock->info.window.skey, pSDataBlock->info.window.ekey);
blockDataUpdateTsWindow(pSDataBlock, 0);
// timestamp of the data is incorrect
if (pSDataBlock->info.window.skey <= 0 || pSDataBlock->info.window.ekey <= 0) {
qError("table uid %" PRIu64 " data block timestamp is out of range! minKey %" PRId64 ",maxKey %" PRId64,
pSDataBlock->info.id.uid, pSDataBlock->info.window.skey, pSDataBlock->info.window.ekey);
}
}
if (IS_FINAL_INTERVAL_OP(pOperator)) {
startPos = getNextQualifiedFinalWindow(&pInfo->interval, &nextWin, &pSDataBlock->info, tsCols, prevEndPos);
} else {
@ -1223,7 +1224,7 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) {
return pInfo->pCheckpointRes;
}
setOperatorCompleted(pOperator);
setStreamOperatorCompleted(pOperator);
if (!IS_FINAL_INTERVAL_OP(pOperator)) {
clearFunctionContext(&pOperator->exprSupp);
// semi interval operator clear disk buffer
@ -1623,6 +1624,7 @@ void initDummyFunction(SqlFunctionCtx* pDummy, SqlFunctionCtx* pCtx, int32_t num
pDummy[i].functionId = pCtx[i].functionId;
pDummy[i].isNotNullFunc = pCtx[i].isNotNullFunc;
pDummy[i].isPseudoFunc = pCtx[i].isPseudoFunc;
pDummy[i].fpSet.init = pCtx[i].fpSet.init;
}
}
@ -2342,10 +2344,6 @@ int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDa
}
int32_t code = pAPI->stateStore.streamStateGetByPos(pState, pPos, (void**)&pRow);
if (pBlock->info.rows + pRow->numOfRows > pBlock->info.capacity) {
ASSERT(pBlock->info.rows > 0);
break;
}
if (code == -1) {
// for history
@ -2362,6 +2360,11 @@ int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDa
continue;
}
if (pBlock->info.rows + pRow->numOfRows > pBlock->info.capacity) {
ASSERT(pBlock->info.rows > 0);
break;
}
pGroupResInfo->index += 1;
for (int32_t j = 0; j < numOfExprs; ++j) {
@ -2608,18 +2611,18 @@ static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) {
return opRes;
}
if (pInfo->recvGetAll) {
pInfo->recvGetAll = false;
resetUnCloseSessionWinInfo(pInfo->streamAggSup.pResultRows);
}
if (pInfo->reCkBlock) {
pInfo->reCkBlock = false;
printDataBlock(pInfo->pCheckpointRes, getStreamOpName(pOperator->operatorType), GET_TASKID(pTaskInfo));
return pInfo->pCheckpointRes;
}
if (pInfo->recvGetAll) {
pInfo->recvGetAll = false;
resetUnCloseSessionWinInfo(pInfo->streamAggSup.pResultRows);
}
setOperatorCompleted(pOperator);
setStreamOperatorCompleted(pOperator);
return NULL;
}
@ -2717,7 +2720,7 @@ static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) {
return opRes;
}
setOperatorCompleted(pOperator);
setStreamOperatorCompleted(pOperator);
return NULL;
}
@ -2758,6 +2761,18 @@ void getSessionWindowInfoByKey(SStreamAggSupporter* pAggSup, SSessionKey* pKey,
}
}
void reloadAggSupFromDownStream(SOperatorInfo* downstream, SStreamAggSupporter* pAggSup) {
SStateStore* pAPI = &downstream->pTaskInfo->storageAPI.stateStore;
if (downstream->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) {
reloadAggSupFromDownStream(downstream->pDownstream[0], pAggSup);
return;
}
SStreamScanInfo* pScanInfo = downstream->info;
pAggSup->pUpdateInfo = pScanInfo->pUpdateInfo;
}
void streamSessionSemiReloadState(SOperatorInfo* pOperator) {
SStreamSessionAggOperatorInfo* pInfo = pOperator->info;
SStreamAggSupporter* pAggSup = &pInfo->streamAggSup;
@ -2774,6 +2789,9 @@ void streamSessionSemiReloadState(SOperatorInfo* pOperator) {
for (int32_t i = 0; i < num; i++) {
SResultWindowInfo winInfo = {0};
getSessionWindowInfoByKey(pAggSup, pSeKeyBuf + i, &winInfo);
if (!IS_VALID_SESSION_WIN(winInfo)) {
continue;
}
compactSessionSemiWindow(pOperator, &winInfo);
saveSessionOutputBuf(pAggSup, &winInfo);
}
@ -2786,6 +2804,7 @@ void streamSessionSemiReloadState(SOperatorInfo* pOperator) {
if (downstream->fpSet.reloadStreamStateFn) {
downstream->fpSet.reloadStreamStateFn(downstream);
}
reloadAggSupFromDownStream(downstream, &pInfo->streamAggSup);
}
void streamSessionReloadState(SOperatorInfo* pOperator) {
@ -2838,6 +2857,7 @@ void streamSessionReloadState(SOperatorInfo* pOperator) {
if (downstream->fpSet.reloadStreamStateFn) {
downstream->fpSet.reloadStreamStateFn(downstream);
}
reloadAggSupFromDownStream(downstream, &pInfo->streamAggSup);
}
SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode,
@ -2997,7 +3017,7 @@ static SSDataBlock* doStreamSessionSemiAgg(SOperatorInfo* pOperator) {
clearFunctionContext(&pOperator->exprSupp);
// semi session operator clear disk buffer
clearStreamSessionOperator(pInfo);
setOperatorCompleted(pOperator);
setStreamOperatorCompleted(pOperator);
pInfo->clearState = false;
return NULL;
}
@ -3070,7 +3090,7 @@ static SSDataBlock* doStreamSessionSemiAgg(SOperatorInfo* pOperator) {
clearFunctionContext(&pOperator->exprSupp);
// semi session operator clear disk buffer
clearStreamSessionOperator(pInfo);
setOperatorCompleted(pOperator);
setStreamOperatorCompleted(pOperator);
return NULL;
}
@ -3547,18 +3567,18 @@ static SSDataBlock* doStreamStateAgg(SOperatorInfo* pOperator) {
return resBlock;
}
if (pInfo->recvGetAll) {
pInfo->recvGetAll = false;
resetUnCloseSessionWinInfo(pInfo->streamAggSup.pResultRows);
}
if (pInfo->reCkBlock) {
pInfo->reCkBlock = false;
printDataBlock(pInfo->pCheckpointRes, getStreamOpName(pOperator->operatorType), GET_TASKID(pTaskInfo));
return pInfo->pCheckpointRes;
}
if (pInfo->recvGetAll) {
pInfo->recvGetAll = false;
resetUnCloseSessionWinInfo(pInfo->streamAggSup.pResultRows);
}
setOperatorCompleted(pOperator);
setStreamOperatorCompleted(pOperator);
return NULL;
}
@ -3624,7 +3644,7 @@ static SSDataBlock* doStreamStateAgg(SOperatorInfo* pOperator) {
if (resBlock != NULL) {
return resBlock;
}
setOperatorCompleted(pOperator);
setStreamOperatorCompleted(pOperator);
return NULL;
}
@ -3720,6 +3740,7 @@ void streamStateReloadState(SOperatorInfo* pOperator) {
if (downstream->fpSet.reloadStreamStateFn) {
downstream->fpSet.reloadStreamStateFn(downstream);
}
reloadAggSupFromDownStream(downstream, &pInfo->streamAggSup);
}
SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode,
@ -3860,11 +3881,11 @@ static SSDataBlock* doStreamIntervalAgg(SOperatorInfo* pOperator) {
if (pInfo->reCkBlock) {
pInfo->reCkBlock = false;
// printDataBlock(pInfo->pCheckpointRes, "single interval ck");
printDataBlock(pInfo->pCheckpointRes, getStreamOpName(pOperator->operatorType), GET_TASKID(pTaskInfo));
return pInfo->pCheckpointRes;
}
setOperatorCompleted(pOperator);
setStreamOperatorCompleted(pOperator);
return NULL;
}
@ -3896,7 +3917,6 @@ static SSDataBlock* doStreamIntervalAgg(SOperatorInfo* pOperator) {
doDeleteWindows(pOperator, &pInfo->interval, pBlock, pInfo->pDelWins, pInfo->pUpdatedMap);
continue;
} else if (pBlock->info.type == STREAM_GET_ALL) {
qDebug("===stream===%s recv|block type STREAM_GET_ALL", getStreamOpName(pOperator->operatorType));
pInfo->recvGetAll = true;
getAllIntervalWindow(pInfo->aggSup.pResultRowHashTable, pInfo->pUpdatedMap);
continue;
@ -4079,3 +4099,8 @@ _error:
pTaskInfo->code = code;
return NULL;
}
void setStreamOperatorCompleted(SOperatorInfo* pOperator) {
setOperatorCompleted(pOperator);
qDebug("stask:%s %s status: %d. set completed", GET_TASKID(pOperator->pTaskInfo), getStreamOpName(pOperator->operatorType), pOperator->status);
}

View File

@ -15,9 +15,7 @@
#include "executorInt.h"
#include "filter.h"
#include "function.h"
#include "functionMgt.h"
#include "os.h"
#include "querynodes.h"
#include "systable.h"
#include "tname.h"
@ -32,6 +30,7 @@
#include "storageapi.h"
#include "tcompare.h"
#include "thash.h"
#include "trpc.h"
#include "ttypes.h"
typedef int (*__optSysFilter)(void* a, void* b, int16_t dtype);
@ -1789,8 +1788,8 @@ SOperatorInfo* createSysTableScanOperatorInfo(void* readHandle, SSystemTableScan
setOperatorInfo(pOperator, "SysTableScanOperator", QUERY_NODE_PHYSICAL_PLAN_SYSTABLE_SCAN, false, OP_NOT_OPENED,
pInfo, pTaskInfo);
pOperator->exprSupp.numOfExprs = taosArrayGetSize(pInfo->pRes->pDataBlock);
pOperator->fpSet =
createOperatorFpSet(optrDummyOpenFn, doSysTableScan, NULL, destroySysScanOperator, optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL);
pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doSysTableScan, NULL, destroySysScanOperator,
optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL);
return pOperator;
_error:
@ -1867,7 +1866,13 @@ int32_t loadSysTableCallback(void* param, SDataBuf* pMsg, int32_t code) {
pRsp->handle = htobe64(pRsp->handle);
pRsp->compLen = htonl(pRsp->compLen);
} else {
operator->pTaskInfo->code = code;
operator->pTaskInfo->code = rpcCvtErrCode(code);
if (operator->pTaskInfo->code != code) {
qError("load systable rsp received, error:%s, cvted error:%s", tstrerror(code),
tstrerror(operator->pTaskInfo->code));
} else {
qError("load systable rsp received, error:%s", tstrerror(code));
}
}
tsem_post(&pScanResInfo->ready);

View File

@ -169,13 +169,14 @@ static void doFillOneRow(SFillInfo* pFillInfo, SSDataBlock* pBlock, SSDataBlock*
setNotFillColumn(pFillInfo, pDstCol, index, i);
}
} else {
SGroupKeys* pKey = taosArrayGet(pFillInfo->prev.pRowVal, i);
SRowVal* pRVal = FILL_IS_ASC_FILL(pFillInfo) ? &pFillInfo->prev : &pFillInfo->next;
SGroupKeys* pKey = taosArrayGet(pRVal->pRowVal, i);
if (IS_VAR_DATA_TYPE(type) || type == TSDB_DATA_TYPE_BOOL || pKey->isNull) {
colDataSetNULL(pDstCol, index);
continue;
}
SGroupKeys* pKey1 = taosArrayGet(pFillInfo->prev.pRowVal, pFillInfo->tsSlotId);
SGroupKeys* pKey1 = taosArrayGet(pRVal->pRowVal, pFillInfo->tsSlotId);
int64_t prevTs = *(int64_t*)pKey1->pData;
int32_t srcSlotId = GET_DEST_SLOT_ID(pCol);
@ -346,9 +347,10 @@ static int32_t fillResultImpl(SFillInfo* pFillInfo, SSDataBlock* pBlock, int32_t
char* src = colDataGetData(pSrc, pFillInfo->index);
if (!colDataIsNull_s(pSrc, pFillInfo->index)) {
colDataSetVal(pDst, index, src, false);
saveColData(pFillInfo->prev.pRowVal, i, src, false);
SRowVal* pRVal = FILL_IS_ASC_FILL(pFillInfo) ? &pFillInfo->prev : &pFillInfo->next;
saveColData(pRVal->pRowVal, i, src, false);
if (pFillInfo->srcTsSlotId == dstSlotId) {
pFillInfo->prev.key = *(int64_t*)src;
pRVal->key = *(int64_t*)src;
}
} else { // the value is null
if (pDst->info.type == TSDB_DATA_TYPE_TIMESTAMP) {
@ -361,7 +363,8 @@ static int32_t fillResultImpl(SFillInfo* pFillInfo, SSDataBlock* pBlock, int32_t
} else if (pFillInfo->type == TSDB_FILL_LINEAR) {
bool isNull = colDataIsNull_s(pSrc, pFillInfo->index);
colDataSetVal(pDst, index, src, isNull);
saveColData(pFillInfo->prev.pRowVal, i, src, isNull); // todo:
SArray* p = FILL_IS_ASC_FILL(pFillInfo) ? pFillInfo->prev.pRowVal : pFillInfo->next.pRowVal;
saveColData(p, i, src, isNull); // todo:
} else if (pFillInfo->type == TSDB_FILL_NULL || pFillInfo->type == TSDB_FILL_NULL_F) {
colDataSetNULL(pDst, index);
} else if (pFillInfo->type == TSDB_FILL_NEXT) {

View File

@ -475,6 +475,7 @@ static void appendOneRowToDataBlock(SSDataBlock* pBlock, const SSDataBlock* pSou
if (isNull) {
colDataSetVal(pColInfo, pBlock->info.rows, NULL, true);
} else {
if (!pSrcColInfo->pData) continue;
char* pData = colDataGetData(pSrcColInfo, *rowIndex);
colDataSetVal(pColInfo, pBlock->info.rows, pData, false);
}
@ -877,6 +878,9 @@ static int32_t blockCompareTsFn(const void* pLeft, const void* pRight, void* par
static int32_t appendDataBlockToPageBuf(SSortHandle* pHandle, SSDataBlock* blk, SArray* aPgId) {
int32_t pageId = -1;
void* pPage = getNewBufPage(pHandle->pBuf, &pageId);
if (pPage == NULL) {
return terrno;
}
taosArrayPush(aPgId, &pageId);
int32_t size = blockDataGetSize(blk) + sizeof(int32_t) + taosArrayGetSize(blk->pDataBlock) * sizeof(int32_t);
@ -900,7 +904,7 @@ static int32_t getPageBufIncForRow(SSDataBlock* blk, int32_t row, int32_t rowIdx
for (int32_t i = 0; i < numCols; ++i) {
SColumnInfoData* pColInfoData = TARRAY_GET_ELEM(blk->pDataBlock, i);
if (IS_VAR_DATA_TYPE(pColInfoData->info.type)) {
if (pColInfoData->varmeta.offset[row] != -1) {
if ((pColInfoData->varmeta.offset[row] != -1) && (pColInfoData->pData)) {
char* p = colDataGetData(pColInfoData, row);
sz += varDataTLen(p);
}
@ -943,16 +947,15 @@ static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SBlockO
totalRows += blk->info.rows;
}
SArray* aPgId = taosArrayInit(8, sizeof(int32_t));
SMultiwayMergeTreeInfo* pTree = NULL;
code = tMergeTreeCreate(&pTree, taosArrayGetSize(aBlk), &sup, blockCompareTsFn);
if (TSDB_CODE_SUCCESS != code) {
taosMemoryFree(sup.aRowIdx);
taosMemoryFree(sup.aTs);
return code;
}
SArray* aPgId = taosArrayInit(8, sizeof(int32_t));
int32_t nRows = 0;
int32_t nMergedRows = 0;
bool mergeLimitReached = false;
@ -968,9 +971,15 @@ static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SBlockO
if (blkPgSz <= pHandle->pageSize && blkPgSz + bufInc > pHandle->pageSize) {
SColumnInfoData* tsCol = taosArrayGet(pHandle->pDataBlock->pDataBlock, order->slotId);
lastPageBufTs = ((int64_t*)tsCol->pData)[pHandle->pDataBlock->info.rows - 1];
appendDataBlockToPageBuf(pHandle, pHandle->pDataBlock, aPgId);
code = appendDataBlockToPageBuf(pHandle, pHandle->pDataBlock, aPgId);
if (code != TSDB_CODE_SUCCESS) {
taosMemoryFree(pTree);
taosArrayDestroy(aPgId);
taosMemoryFree(sup.aRowIdx);
taosMemoryFree(sup.aTs);
return code;
}
nMergedRows += pHandle->pDataBlock->info.rows;
blockDataCleanup(pHandle->pDataBlock);
blkPgSz = pgHeaderSz;
bufInc = getPageBufIncForRow(minBlk, minRow, 0);
@ -1001,7 +1010,14 @@ static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SBlockO
if (!mergeLimitReached) {
SColumnInfoData* tsCol = taosArrayGet(pHandle->pDataBlock->pDataBlock, order->slotId);
lastPageBufTs = ((int64_t*)tsCol->pData)[pHandle->pDataBlock->info.rows - 1];
appendDataBlockToPageBuf(pHandle, pHandle->pDataBlock, aPgId);
code = appendDataBlockToPageBuf(pHandle, pHandle->pDataBlock, aPgId);
if (code != TSDB_CODE_SUCCESS) {
taosArrayDestroy(aPgId);
taosMemoryFree(pTree);
taosMemoryFree(sup.aRowIdx);
taosMemoryFree(sup.aTs);
return code;
}
nMergedRows += pHandle->pDataBlock->info.rows;
if ((pHandle->mergeLimit != -1) && (nMergedRows >= pHandle->mergeLimit)) {
mergeLimitReached = true;

View File

@ -28,8 +28,6 @@ extern "C" {
#include "tudf.h"
#include "tvariant.h"
bool topbot_datablock_filter(SqlFunctionCtx *pCtx, const char *minval, const char *maxval);
/**
* the numOfRes should be kept, since it may be used later
* and allow the ResultInfo to be re initialized

View File

@ -26,12 +26,12 @@ extern "C" {
typedef struct MinMaxEntry {
union {
double dMinVal;
int64_t i64MinVal;
//double i64MinVal;
uint64_t u64MinVal;
};
union {
double dMaxVal;
int64_t i64MaxVal;
//double i64MaxVal;
int64_t u64MaxVal;
};
} MinMaxEntry;

View File

@ -27,33 +27,3 @@
#include "tpercentile.h"
#include "ttszip.h"
#include "tudf.h"
int32_t getNumOfResult(SqlFunctionCtx* pCtx, int32_t num, SSDataBlock* pResBlock) {
int32_t maxRows = 0;
for (int32_t j = 0; j < num; ++j) {
SResultRowEntryInfo* pResInfo = GET_RES_INFO(&pCtx[j]);
if (pResInfo != NULL && maxRows < pResInfo->numOfRes) {
maxRows = pResInfo->numOfRes;
}
}
blockDataEnsureCapacity(pResBlock, maxRows);
for (int32_t i = 0; i < num; ++i) {
SColumnInfoData* pCol = taosArrayGet(pResBlock->pDataBlock, i);
SResultRowEntryInfo* pResInfo = GET_RES_INFO(&pCtx[i]);
if (pResInfo->numOfRes == 0) {
for (int32_t j = 0; j < pResInfo->numOfRes; ++j) {
colDataSetVal(pCol, j, NULL, true); // TODO add set null data api
}
} else {
for (int32_t j = 0; j < pResInfo->numOfRes; ++j) {
colDataSetVal(pCol, j, GET_ROWCELL_INTERBUF(pResInfo), false);
}
}
}
pResBlock->info.rows = maxRows;
return maxRows;
}

View File

@ -63,8 +63,8 @@ static SFilePage *loadDataFromFilePage(tMemBucket *pMemBucket, int32_t slotIdx)
static void resetBoundingBox(MinMaxEntry *range, int32_t type) {
if (IS_SIGNED_NUMERIC_TYPE(type)) {
range->i64MaxVal = INT64_MIN;
range->i64MinVal = INT64_MAX;
range->dMaxVal = INT64_MIN;
range->dMinVal = INT64_MAX;
} else if (IS_UNSIGNED_NUMERIC_TYPE(type)) {
range->u64MaxVal = 0;
range->u64MinVal = UINT64_MAX;
@ -80,8 +80,8 @@ static int32_t setBoundingBox(MinMaxEntry *range, int16_t type, double minval, d
}
if (IS_SIGNED_NUMERIC_TYPE(type)) {
range->i64MinVal = (int64_t)minval;
range->i64MaxVal = (int64_t)maxval;
range->dMinVal = (int64_t)minval;
range->dMaxVal = (int64_t)maxval;
} else if (IS_UNSIGNED_NUMERIC_TYPE(type)) {
range->u64MinVal = (uint64_t)minval;
range->u64MaxVal = (uint64_t)maxval;
@ -137,21 +137,21 @@ int32_t tBucketIntHash(tMemBucket *pBucket, const void *value) {
int32_t index = -1;
if (v > pBucket->range.i64MaxVal || v < pBucket->range.i64MinVal) {
if (v > pBucket->range.dMaxVal || v < pBucket->range.dMinVal) {
return index;
}
// divide the value range into 1024 buckets
uint64_t span = pBucket->range.i64MaxVal - pBucket->range.i64MinVal;
uint64_t span = pBucket->range.dMaxVal - pBucket->range.dMinVal;
if (span < pBucket->numOfSlots) {
int64_t delta = v - pBucket->range.i64MinVal;
int64_t delta = v - pBucket->range.dMinVal;
index = (delta % pBucket->numOfSlots);
} else {
double slotSpan = ((double)span) / pBucket->numOfSlots;
uint64_t delta = v - pBucket->range.i64MinVal;
uint64_t delta = (uint64_t)(v - pBucket->range.dMinVal);
index = (int32_t)(delta / slotSpan);
if (v == pBucket->range.i64MaxVal || index == pBucket->numOfSlots) {
index = delta / slotSpan;
if (v == pBucket->range.dMaxVal || index == pBucket->numOfSlots) {
index -= 1;
}
}
@ -318,23 +318,23 @@ void tMemBucketUpdateBoundingBox(MinMaxEntry *r, const char *data, int32_t dataT
int64_t v = 0;
GET_TYPED_DATA(v, int64_t, dataType, data);
if (r->i64MinVal > v) {
r->i64MinVal = v;
if (r->dMinVal > v) {
r->dMinVal = v;
}
if (r->i64MaxVal < v) {
r->i64MaxVal = v;
if (r->dMaxVal < v) {
r->dMaxVal = v;
}
} else if (IS_UNSIGNED_NUMERIC_TYPE(dataType)) {
uint64_t v = 0;
GET_TYPED_DATA(v, uint64_t, dataType, data);
if (r->i64MinVal > v) {
r->i64MinVal = v;
if (r->u64MinVal > v) {
r->u64MinVal = v;
}
if (r->i64MaxVal < v) {
r->i64MaxVal = v;
if (r->u64MaxVal < v) {
r->u64MaxVal = v;
}
} else if (IS_FLOAT_TYPE(dataType)) {
double v = 0;
@ -438,7 +438,7 @@ static double getIdenticalDataVal(tMemBucket *pMemBucket, int32_t slotIndex) {
double finalResult = 0.0;
if (IS_SIGNED_NUMERIC_TYPE(pMemBucket->type)) {
finalResult = (double)pSlot->range.i64MinVal;
finalResult = (double)pSlot->range.dMinVal;
} else if (IS_UNSIGNED_NUMERIC_TYPE(pMemBucket->type)) {
finalResult = (double)pSlot->range.u64MinVal;
} else {
@ -469,8 +469,8 @@ int32_t getPercentileImpl(tMemBucket *pMemBucket, int32_t count, double fraction
double maxOfThisSlot = 0;
double minOfNextSlot = 0;
if (IS_SIGNED_NUMERIC_TYPE(pMemBucket->type)) {
maxOfThisSlot = (double)pSlot->range.i64MaxVal;
minOfNextSlot = (double)next.i64MinVal;
maxOfThisSlot = (double)pSlot->range.dMaxVal;
minOfNextSlot = (double)next.dMinVal;
} else if (IS_UNSIGNED_NUMERIC_TYPE(pMemBucket->type)) {
maxOfThisSlot = (double)pSlot->range.u64MaxVal;
minOfNextSlot = (double)next.u64MinVal;
@ -577,7 +577,7 @@ int32_t getPercentile(tMemBucket *pMemBucket, double percent, double *result) {
MinMaxEntry *pRange = &pMemBucket->range;
if (IS_SIGNED_NUMERIC_TYPE(pMemBucket->type)) {
*result = (double)(fabs(percent - 100) < DBL_EPSILON ? pRange->i64MaxVal : pRange->i64MinVal);
*result = (double)(fabs(percent - 100) < DBL_EPSILON ? pRange->dMaxVal : pRange->dMinVal);
} else if (IS_UNSIGNED_NUMERIC_TYPE(pMemBucket->type)) {
*result = (double)(fabs(percent - 100) < DBL_EPSILON ? pRange->u64MaxVal : pRange->u64MinVal);
} else {
@ -603,6 +603,6 @@ bool isIdenticalData(tMemBucket *pMemBucket, int32_t index) {
if (IS_FLOAT_TYPE(pMemBucket->type)) {
return fabs(pSeg->range.dMaxVal - pSeg->range.dMinVal) < DBL_EPSILON;
} else {
return pSeg->range.i64MinVal == pSeg->range.i64MaxVal;
return pSeg->range.dMinVal == pSeg->range.dMaxVal;
}
}

View File

@ -424,6 +424,7 @@ static int32_t logicScanCopy(const SScanLogicNode* pSrc, SScanLogicNode* pDst) {
COPY_SCALAR_FIELD(groupOrderScan);
COPY_SCALAR_FIELD(onlyMetaCtbIdx);
COPY_SCALAR_FIELD(filesetDelimited);
COPY_SCALAR_FIELD(isCountByTag);
return TSDB_CODE_SUCCESS;
}
@ -652,6 +653,7 @@ static int32_t physiTableScanCopy(const STableScanPhysiNode* pSrc, STableScanPhy
COPY_SCALAR_FIELD(watermark);
COPY_SCALAR_FIELD(igExpired);
COPY_SCALAR_FIELD(filesetDelimited);
COPY_SCALAR_FIELD(needCountEmptyTable);
return TSDB_CODE_SUCCESS;
}

View File

@ -1843,6 +1843,7 @@ static const char* jkTableScanPhysiPlanSubtable = "Subtable";
static const char* jkTableScanPhysiPlanAssignBlockUid = "AssignBlockUid";
static const char* jkTableScanPhysiPlanIgnoreUpdate = "IgnoreUpdate";
static const char* jkTableScanPhysiPlanFilesetDelimited = "FilesetDelimited";
static const char* jkTableScanPhysiPlanNeedCountEmptyTable = "NeedCountEmptyTable";
static int32_t physiTableScanNodeToJson(const void* pObj, SJson* pJson) {
const STableScanPhysiNode* pNode = (const STableScanPhysiNode*)pObj;
@ -1914,7 +1915,9 @@ static int32_t physiTableScanNodeToJson(const void* pObj, SJson* pJson) {
if (TSDB_CODE_SUCCESS == code) {
code = tjsonAddBoolToObject(pJson, jkTableScanPhysiPlanFilesetDelimited, pNode->filesetDelimited);
}
if (TSDB_CODE_SUCCESS == code) {
code = tjsonAddBoolToObject(pJson, jkTableScanPhysiPlanNeedCountEmptyTable, pNode->needCountEmptyTable);
}
return code;
}
@ -1988,7 +1991,9 @@ static int32_t jsonToPhysiTableScanNode(const SJson* pJson, void* pObj) {
if (TSDB_CODE_SUCCESS == code) {
code = tjsonGetBoolValue(pJson, jkTableScanPhysiPlanFilesetDelimited, &pNode->filesetDelimited);
}
if (TSDB_CODE_SUCCESS == code) {
code = tjsonGetBoolValue(pJson, jkTableScanPhysiPlanNeedCountEmptyTable, &pNode->needCountEmptyTable);
}
return code;
}

View File

@ -2170,6 +2170,9 @@ static int32_t physiTableScanNodeInlineToMsg(const void* pObj, STlvEncoder* pEnc
if (TSDB_CODE_SUCCESS == code) {
code = tlvEncodeValueBool(pEncoder, pNode->filesetDelimited);
}
if (TSDB_CODE_SUCCESS == code) {
code = tlvEncodeValueBool(pEncoder, pNode->needCountEmptyTable);
}
return code;
}
@ -2251,7 +2254,9 @@ static int32_t msgToPhysiTableScanNodeInline(STlvDecoder* pDecoder, void* pObj)
if (TSDB_CODE_SUCCESS == code) {
code = tlvDecodeValueBool(pDecoder, &pNode->filesetDelimited);
}
if (TSDB_CODE_SUCCESS == code) {
code = tlvDecodeValueBool(pDecoder, &pNode->needCountEmptyTable);
}
return code;
}

View File

@ -4,9 +4,6 @@ IF (TD_ENTERPRISE)
LIST(APPEND PARSER_SRC ${TD_ENTERPRISE_DIR}/src/plugins/view/src/parserView.c)
ENDIF ()
IF (TD_BI_SUPPORT)
LIST(APPEND PARSER_SRC ${TD_ENTERPRISE_DIR}/src/plugins/bi/src/biRewriteQuery.c)
ENDIF ()
add_library(parser STATIC ${PARSER_SRC})
target_include_directories(
parser

View File

@ -334,6 +334,13 @@ static int32_t calcConstSelectWithoutFrom(SCalcConstContext* pCxt, SSelectStmt*
static int32_t calcConstSelectFrom(SCalcConstContext* pCxt, SSelectStmt* pSelect, bool subquery) {
int32_t code = calcConstFromTable(pCxt, pSelect->pFromTable);
if (TSDB_CODE_SUCCESS == code && QUERY_NODE_TEMP_TABLE == nodeType(pSelect->pFromTable) &&
((STempTableNode*)pSelect->pFromTable)->pSubquery != NULL &&
QUERY_NODE_SELECT_STMT == nodeType(((STempTableNode*)pSelect->pFromTable)->pSubquery) &&
((SSelectStmt*)((STempTableNode*)pSelect->pFromTable)->pSubquery)->isEmptyResult){
pSelect->isEmptyResult = true;
return code;
}
if (TSDB_CODE_SUCCESS == code) {
code = calcConstProjections(pCxt, pSelect, subquery);
}

View File

@ -1104,11 +1104,192 @@ static EDealRes translateColumnUseAlias(STranslateContext* pCxt, SColumnNode** p
return DEAL_RES_CONTINUE;
}
#ifndef TD_ENTERPRISE
static void biMakeAliasNameInMD5(char* pExprStr, int32_t len, char* pAlias) {
T_MD5_CTX ctx;
tMD5Init(&ctx);
tMD5Update(&ctx, pExprStr, len);
tMD5Final(&ctx);
char* p = pAlias;
for (uint8_t i = 0; i < tListLen(ctx.digest); ++i) {
sprintf(p, "%02x", ctx.digest[i]);
p += 2;
}
}
static SNode* biMakeTbnameProjectAstNode(char* funcName, char* tableAlias) {
SValueNode* valNode = NULL;
if (tableAlias != NULL) {
SValueNode* n = (SValueNode*)nodesMakeNode(QUERY_NODE_VALUE);
n->literal = tstrdup(tableAlias);
n->node.resType.type = TSDB_DATA_TYPE_BINARY;
n->node.resType.bytes = strlen(n->literal);
n->isDuration = false;
n->translate = false;
valNode = n;
}
SFunctionNode* tbNameFunc = (SFunctionNode*)nodesMakeNode(QUERY_NODE_FUNCTION);
tstrncpy(tbNameFunc->functionName, "tbname", TSDB_FUNC_NAME_LEN);
if (valNode != NULL) {
nodesListMakeAppend(&tbNameFunc->pParameterList, (SNode*)valNode);
}
snprintf(tbNameFunc->node.userAlias, sizeof(tbNameFunc->node.userAlias),
(tableAlias)? "%s.tbname" : "%stbname",
(tableAlias)? tableAlias : "");
strncpy(tbNameFunc->node.aliasName, tbNameFunc->functionName, TSDB_COL_NAME_LEN);
if (funcName == NULL) {
return (SNode*)tbNameFunc;
} else {
SFunctionNode* multiResFunc = (SFunctionNode*)nodesMakeNode(QUERY_NODE_FUNCTION);
tstrncpy(multiResFunc->functionName, funcName, TSDB_FUNC_NAME_LEN);
nodesListMakeAppend(&multiResFunc->pParameterList, (SNode*)tbNameFunc);
if (tsKeepColumnName) {
snprintf(multiResFunc->node.userAlias, sizeof(tbNameFunc->node.userAlias),
(tableAlias)? "%s.tbname" : "%stbname",
(tableAlias)? tableAlias : "");
strcpy(multiResFunc->node.aliasName, tbNameFunc->functionName);
} else {
snprintf(multiResFunc->node.userAlias, sizeof(multiResFunc->node.userAlias),
tableAlias? "%s(%s.tbname)" : "%s(%stbname)", funcName,
tableAlias? tableAlias: "");
biMakeAliasNameInMD5(multiResFunc->node.userAlias, strlen(multiResFunc->node.userAlias), multiResFunc->node.aliasName);
}
return (SNode*)multiResFunc;
}
}
static int32_t biRewriteSelectFuncParamStar(STranslateContext* pCxt, SSelectStmt* pSelect, SNode* pNode, SListCell* pSelectListCell) {
SNodeList* pTbnameNodeList = nodesMakeList();
SFunctionNode* pFunc = (SFunctionNode*)pNode;
if (strcasecmp(pFunc->functionName, "last") == 0 ||
strcasecmp(pFunc->functionName, "last_row") == 0 ||
strcasecmp(pFunc->functionName, "first") == 0) {
SNodeList* pParams = pFunc->pParameterList;
SNode* pPara = NULL;
FOREACH(pPara, pParams) {
if (nodesIsStar(pPara)) {
SArray* pTables = taosArrayGetP(pCxt->pNsLevel, pCxt->currLevel);
size_t n = taosArrayGetSize(pTables);
for (int32_t i = 0; i < n; ++i) {
STableNode* pTable = taosArrayGetP(pTables, i);
if (nodeType(pTable) == QUERY_NODE_REAL_TABLE && ((SRealTableNode*)pTable)->pMeta != NULL &&
((SRealTableNode*)pTable)->pMeta->tableType == TSDB_SUPER_TABLE) {
SNode* pTbnameNode = biMakeTbnameProjectAstNode(pFunc->functionName, NULL);
nodesListAppend(pTbnameNodeList, pTbnameNode);
}
}
if (LIST_LENGTH(pTbnameNodeList) > 0) {
nodesListInsertListAfterPos(pSelect->pProjectionList, pSelectListCell, pTbnameNodeList);
}
} else if (nodesIsTableStar(pPara)) {
char* pTableAlias = ((SColumnNode*)pPara)->tableAlias;
STableNode* pTable = NULL;
int32_t code = findTable(pCxt, pTableAlias, &pTable);
if (TSDB_CODE_SUCCESS == code && nodeType(pTable) == QUERY_NODE_REAL_TABLE &&
((SRealTableNode*)pTable)->pMeta != NULL &&
((SRealTableNode*)pTable)->pMeta->tableType == TSDB_SUPER_TABLE) {
SNode* pTbnameNode = biMakeTbnameProjectAstNode(pFunc->functionName, pTableAlias);
nodesListAppend(pTbnameNodeList, pTbnameNode);
}
if (LIST_LENGTH(pTbnameNodeList) > 0) {
nodesListInsertListAfterPos(pSelect->pProjectionList, pSelectListCell, pTbnameNodeList);
}
}
}
}
return TSDB_CODE_SUCCESS;
}
// after translate from
// before translate select list
int32_t biRewriteSelectStar(STranslateContext* pCxt, SSelectStmt* pSelect) {
SNode* pNode = NULL;
SNodeList* pTbnameNodeList = nodesMakeList();
WHERE_EACH(pNode, pSelect->pProjectionList) {
if (nodesIsStar(pNode)) {
SArray* pTables = taosArrayGetP(pCxt->pNsLevel, pCxt->currLevel);
size_t n = taosArrayGetSize(pTables);
for (int32_t i = 0; i < n; ++i) {
STableNode* pTable = taosArrayGetP(pTables, i);
if (nodeType(pTable) == QUERY_NODE_REAL_TABLE &&
((SRealTableNode*)pTable)->pMeta != NULL &&
((SRealTableNode*)pTable)->pMeta->tableType == TSDB_SUPER_TABLE) {
SNode* pTbnameNode = biMakeTbnameProjectAstNode(NULL, NULL);
nodesListAppend(pTbnameNodeList, pTbnameNode);
}
}
if (LIST_LENGTH(pTbnameNodeList) > 0) {
nodesListInsertListAfterPos(pSelect->pProjectionList, cell, pTbnameNodeList);
}
} else if (nodesIsTableStar(pNode)) {
char* pTableAlias = ((SColumnNode*)pNode)->tableAlias;
STableNode* pTable = NULL;
int32_t code = findTable(pCxt, pTableAlias, &pTable);
if (TSDB_CODE_SUCCESS == code &&
nodeType(pTable) == QUERY_NODE_REAL_TABLE &&
((SRealTableNode*)pTable)->pMeta != NULL &&
((SRealTableNode*)pTable)->pMeta->tableType == TSDB_SUPER_TABLE) {
SNode* pTbnameNode = biMakeTbnameProjectAstNode(NULL, pTableAlias);
nodesListAppend(pTbnameNodeList, pTbnameNode);
}
if (LIST_LENGTH(pTbnameNodeList) > 0) {
nodesListInsertListAfterPos(pSelect->pProjectionList, cell, pTbnameNodeList);
}
} else if (nodeType(pNode) == QUERY_NODE_FUNCTION) {
biRewriteSelectFuncParamStar(pCxt, pSelect, pNode, cell);
}
WHERE_NEXT;
}
return TSDB_CODE_SUCCESS;
}
bool biRewriteToTbnameFunc(STranslateContext* pCxt, SNode** ppNode) {
SColumnNode* pCol = (SColumnNode*)(*ppNode);
if ((strcasecmp(pCol->colName, "tbname") == 0) &&
((SSelectStmt*)pCxt->pCurrStmt)->pFromTable &&
QUERY_NODE_REAL_TABLE == nodeType(((SSelectStmt*)pCxt->pCurrStmt)->pFromTable)) {
SFunctionNode* tbnameFuncNode = NULL;
tbnameFuncNode = (SFunctionNode*)biMakeTbnameProjectAstNode(NULL, (pCol->tableAlias[0]!='\0') ? pCol->tableAlias : NULL);
tbnameFuncNode->node.resType = pCol->node.resType;
strcpy(tbnameFuncNode->node.aliasName, pCol->node.aliasName);
strcpy(tbnameFuncNode->node.userAlias, pCol->node.userAlias);
nodesDestroyNode(*ppNode);
*ppNode = (SNode*)tbnameFuncNode;
return true;
}
return false;
}
#endif
int32_t biCheckCreateTableTbnameCol(STranslateContext* pCxt, SCreateTableStmt* pStmt) {
if (pStmt->pTags) {
SNode* pNode = NULL;
FOREACH(pNode, pStmt->pTags) {
SColumnDefNode* pTag = (SColumnDefNode*)pNode;
if (strcasecmp(pTag->colName, "tbname") == 0) {
int32_t code = generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_TAG_NAME, "tbname can not used for tags in BI mode");
return code;
}
}
}
if (pStmt->pCols) {
SNode* pNode = NULL;
FOREACH(pNode, pStmt->pCols) {
SColumnDefNode* pCol = (SColumnDefNode*)pNode;
if (strcasecmp(pCol->colName, "tbname") == 0) {
int32_t code = generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_COLUMN, "tbname can not used for columns in BI mode");
return code;
}
}
}
return TSDB_CODE_SUCCESS;
}
static EDealRes translateColumn(STranslateContext* pCxt, SColumnNode** pCol) {
if (NULL == pCxt->pCurrStmt ||
@ -1902,6 +2083,7 @@ static void setFuncClassification(SNode* pCurrStmt, SFunctionNode* pFunc) {
if (NULL != pCurrStmt && QUERY_NODE_SELECT_STMT == nodeType(pCurrStmt)) {
SSelectStmt* pSelect = (SSelectStmt*)pCurrStmt;
pSelect->hasAggFuncs = pSelect->hasAggFuncs ? true : fmIsAggFunc(pFunc->funcId);
pSelect->hasCountFunc = pSelect->hasCountFunc ? true : (FUNCTION_TYPE_COUNT == pFunc->funcType);
pSelect->hasRepeatScanFuncs = pSelect->hasRepeatScanFuncs ? true : fmIsRepeatScanFunc(pFunc->funcId);
if (fmIsIndefiniteRowsFunc(pFunc->funcId)) {
@ -3178,10 +3360,6 @@ static int32_t createTags(STranslateContext* pCxt, SNodeList** pOutput) {
return TSDB_CODE_SUCCESS;
}
#ifndef TD_ENTERPRISE
int32_t biRewriteSelectStar(STranslateContext* pCxt, SSelectStmt* pSelect) { return TSDB_CODE_SUCCESS; }
#endif
static int32_t translateStar(STranslateContext* pCxt, SSelectStmt* pSelect) {
SNode* pNode = NULL;
WHERE_EACH(pNode, pSelect->pProjectionList) {
@ -5748,11 +5926,6 @@ static int32_t checkTableDeleteMarkOption(STranslateContext* pCxt, STableOptions
return code;
}
#ifndef TD_ENTERPRISE
int32_t biCheckCreateTableTbnameCol(STranslateContext* pCxt, SCreateTableStmt* pStmt) {
return TSDB_CODE_SUCCESS;
}
#endif
static int32_t checkCreateTable(STranslateContext* pCxt, SCreateTableStmt* pStmt, bool createStable) {
if (NULL != strchr(pStmt->tableName, '.')) {
@ -6772,11 +6945,37 @@ static int32_t translateCreateFullTextIndex(STranslateContext* pCxt, SCreateInde
}
static int32_t translateCreateNormalIndex(STranslateContext* pCxt, SCreateIndexStmt* pStmt) {
int32_t code = 0;
SName name;
STableMeta* pMeta = NULL;
code = getTargetMeta(pCxt, toName(pCxt->pParseCxt->acctId, pStmt->dbName, pStmt->tableName, &name), &pMeta, false);
if (code) {
taosMemoryFree(pMeta);
return code;
}
if (LIST_LENGTH(pStmt->pCols) != 1) {
taosMemoryFree(pMeta);
return generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_TAGS_NUM, "Only one tag is allowed");
}
SNode* pNode = NULL;
FOREACH(pNode, pStmt->pCols) {
const SSchema* pSchema = getTagSchema(pMeta, ((SColumnNode*)pNode)->colName);
if (!pSchema) {
taosMemoryFree(pMeta);
return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_TAG_NAME, ((SColumnNode*)pNode)->colName);
}
}
SCreateTagIndexReq createTagIdxReq = {0};
int32_t code = buildCreateTagIndexReq(pCxt, pStmt, &createTagIdxReq);
code = buildCreateTagIndexReq(pCxt, pStmt, &createTagIdxReq);
if (TSDB_CODE_SUCCESS == code) {
code = buildCmdMsg(pCxt, TDMT_MND_CREATE_INDEX, (FSerializeFunc)tSerializeSCreateTagIdxReq, &createTagIdxReq);
}
_exit:
taosMemoryFree(pMeta);
return code;
}
@ -7289,7 +7488,7 @@ static int32_t addTagsToCreateStreamQuery(STranslateContext* pCxt, SCreateStream
}
}
if (!found) {
return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_COLUMN, ((SColumnDefNode*)pTag)->colName);
return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_COLUMN, getTagNameForCreateStreamTag(pTag));
}
}
return TSDB_CODE_SUCCESS;
@ -8004,9 +8203,9 @@ int32_t translatePostCreateStream(SParseContext* pParseCxt, SQuery* pQuery, void
}
if (TSDB_CODE_SUCCESS == code) {
if (interval.interval > 0) {
pStmt->pReq->lastTs = taosTimeTruncate(lastTs, &interval);
pStmt->pReq->lastTs = taosTimeAdd(taosTimeTruncate(lastTs, &interval), interval.interval, interval.intervalUnit, interval.precision);
} else {
pStmt->pReq->lastTs = lastTs;
pStmt->pReq->lastTs = lastTs + 1; // start key of the next time window
}
code = buildCmdMsg(&cxt, TDMT_MND_CREATE_STREAM, (FSerializeFunc)tSerializeSCMCreateStreamReq, pStmt->pReq);
}
@ -8809,7 +9008,7 @@ static int32_t extractCompactDbResultSchema(int32_t* numOfCols, SSchema** pSchem
(*pSchema)[0].type = TSDB_DATA_TYPE_BINARY;
(*pSchema)[0].bytes = COMPACT_DB_RESULT_FIELD1_LEN;
strcpy((*pSchema)[0].name, "name");
strcpy((*pSchema)[0].name, "result");
(*pSchema)[1].type = TSDB_DATA_TYPE_INT;
(*pSchema)[1].bytes = tDataTypes[TSDB_DATA_TYPE_INT].bytes;

View File

@ -486,6 +486,16 @@ static int32_t createScanLogicNode(SLogicPlanContext* pCxt, SSelectStmt* pSelect
code = tagScanSetExecutionMode(pScan);
}
bool isCountByTag = false;
if (pSelect->hasCountFunc && NULL == pSelect->pWindow) {
if (pSelect->pGroupByList) {
isCountByTag = !keysHasCol(pSelect->pGroupByList);
} else if (pSelect->pPartitionByList) {
isCountByTag = !keysHasCol(pSelect->pPartitionByList);
}
}
pScan->isCountByTag = isCountByTag;
if (TSDB_CODE_SUCCESS == code) {
*pLogicNode = (SLogicNode*)pScan;
} else {

View File

@ -2529,7 +2529,7 @@ static bool lastRowScanOptCheckFuncList(SLogicNode* pNode, bool* hasOtherFunc) {
SNode* pParam = nodesListGetNode(pAggFunc->pParameterList, 0);
if (QUERY_NODE_COLUMN == nodeType(pParam)) {
SColumnNode* pCol = (SColumnNode*)pParam;
if (PRIMARYKEY_TIMESTAMP_COL_ID != pCol->colId) {
if (COLUMN_TYPE_COLUMN == pCol->colType && PRIMARYKEY_TIMESTAMP_COL_ID != pCol->colId) {
if (selectNonPKColId != pCol->colId) {
selectNonPKColId = pCol->colId;
selectNonPKColNum++;

View File

@ -623,6 +623,7 @@ static int32_t createTableScanPhysiNode(SPhysiPlanContext* pCxt, SSubplan* pSubp
pTableScan->igCheckUpdate = pScanLogicNode->igCheckUpdate;
pTableScan->assignBlockUid = pCxt->pPlanCxt->rSmaQuery ? true : false;
pTableScan->filesetDelimited = pScanLogicNode->filesetDelimited;
pTableScan->needCountEmptyTable = pScanLogicNode->isCountByTag;
int32_t code = createScanPhysiNodeFinalize(pCxt, pSubplan, pScanLogicNode, (SScanPhysiNode*)pTableScan, pPhyNode);
if (TSDB_CODE_SUCCESS == code) {

View File

@ -1254,6 +1254,7 @@ int32_t toCharFunction(SScalarParam* pInput, int32_t inputNum, SScalarParam* pOu
char * out = taosMemoryCalloc(1, TS_FORMAT_MAX_LEN + VARSTR_HEADER_SIZE);
int32_t len;
SArray *formats = NULL;
int32_t code = 0;
for (int32_t i = 0; i < pInput[0].numOfRows; ++i) {
if (colDataIsNull_s(pInput[1].columnData, i) || colDataIsNull_s(pInput[0].columnData, i)) {
colDataSetNULL(pOutput->columnData, i);
@ -1272,14 +1273,15 @@ int32_t toCharFunction(SScalarParam* pInput, int32_t inputNum, SScalarParam* pOu
}
}
int32_t precision = pInput[0].columnData->info.precision;
taosTs2Char(format, &formats, *(int64_t *)ts, precision, varDataVal(out), TS_FORMAT_MAX_LEN);
code = taosTs2Char(format, &formats, *(int64_t *)ts, precision, varDataVal(out), TS_FORMAT_MAX_LEN);
if (code) break;
varDataSetLen(out, strlen(varDataVal(out)));
colDataSetVal(pOutput->columnData, i, out, false);
}
if (formats) taosArrayDestroy(formats);
taosMemoryFree(format);
taosMemoryFree(out);
return TSDB_CODE_SUCCESS;
return code;
}
/** Time functions **/

View File

@ -19,10 +19,7 @@
#include "rocksdb/c.h"
//#include "streamInt.h"
#include "streamState.h"
#include "tcoding.h"
#include "tcommon.h"
#include "tcompare.h"
#include "ttimer.h"
typedef struct SCfComparator {
rocksdb_comparator_t** comp;

View File

@ -109,13 +109,12 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock)
int32_t tEncodeStreamRetrieveReq(SEncoder* pEncoder, const SStreamRetrieveReq* pReq);
int32_t streamSaveTaskCheckpointInfo(SStreamTask* p, int64_t checkpointId);
int32_t streamTaskBuildCheckpoint(SStreamTask* pTask);
int32_t streamSaveAllTaskStatus(SStreamMeta* pMeta, int64_t checkpointId);
int32_t streamSendCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet);
int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t srcTaskId, int32_t index, int64_t checkpointId);
int32_t streamTaskSendCheckpointReadyMsg(SStreamTask* pTask);
int32_t streamTaskSendCheckpointSourceRsp(SStreamTask* pTask);
void streamTaskSetCheckpointFailedId(SStreamTask* pTask);
int32_t streamTaskGetNumOfDownstream(const SStreamTask* pTask);
int32_t streamTaskInitTokenBucket(STokenBucket* pBucket, int32_t numCap, int32_t numRate, float quotaRate, const char*);
STaskId streamTaskExtractKey(const SStreamTask* pTask);
@ -137,17 +136,6 @@ int32_t streamAddEndScanHistoryMsg(SStreamTask* pTask, SRpcHandleInfo* pRpcInfo,
int32_t streamNotifyUpstreamContinue(SStreamTask* pTask);
int32_t streamTransferStateToStreamTask(SStreamTask* pTask);
// <<<<<<< HEAD
// void streamClearChkptReadyMsg(SStreamTask* pTask);
// int32_t streamTaskInitTokenBucket(STokenBucket* pBucket, int32_t numCap, int32_t numRate, float quotaRate, const
// char*); STaskId streamTaskExtractKey(const SStreamTask* pTask); void streamTaskInitForLaunchHTask(SHistoryTaskInfo*
// pInfo); void streamTaskSetRetryInfoForLaunch(SHistoryTaskInfo* pInfo);
// void streamMetaResetStartInfo(STaskStartInfo* pMeta);
// =======
// >>>>>>> 3.0
SStreamQueue* streamQueueOpen(int64_t cap);
void streamQueueClose(SStreamQueue* pQueue, int32_t taskId);
void streamQueueProcessSuccess(SStreamQueue* queue);

View File

@ -34,6 +34,8 @@ typedef int32_t (*__state_trans_succ_fn)(SStreamTask*);
typedef struct SAttachedEventInfo {
ETaskStatus status; // required status that this event can be handled
EStreamTaskEvent event; // the delayed handled event
void* pParam;
void* pFn;
} SAttachedEventInfo;
typedef struct STaskStateTrans {

View File

@ -14,8 +14,6 @@
*/
#include "streamBackendRocksdb.h"
#include "executor.h"
#include "query.h"
#include "streamInt.h"
#include "tcommon.h"
#include "tref.h"
@ -665,6 +663,7 @@ void streamBackendHandleCleanup(void* arg) {
return;
}
#ifdef BUILD_NO_CALL
int32_t getLatestCheckpoint(void* arg, int64_t* checkpoint) {
SStreamMeta* pMeta = arg;
taosWLockLatch(&pMeta->chkpDirLock);
@ -738,6 +737,7 @@ int32_t delObsoleteCheckpoint(void* arg, const char* path) {
taosArrayDestroy(chkpDel);
return 0;
}
#endif
/*
* checkpointSave |--cp1--|--cp2--|--cp3--|--cp4--|--cp5--|
* chkpInUse: |--cp2--|--cp4--|
@ -855,6 +855,7 @@ int32_t streamBackendLoadCheckpointInfo(void* arg) {
return 0;
}
#ifdef BUILD_NO_CALL
int32_t chkpGetAllDbCfHandle(SStreamMeta* pMeta, rocksdb_column_family_handle_t*** ppHandle, SArray* refs) {
return 0;
// SArray* pHandle = taosArrayInit(16, POINTER_BYTES);
@ -891,6 +892,7 @@ int32_t chkpGetAllDbCfHandle(SStreamMeta* pMeta, rocksdb_column_family_handle_t*
// *ppHandle = ppCf;
// return nCf;
}
#endif
int32_t chkpGetAllDbCfHandle2(STaskDbWrapper* pBackend, rocksdb_column_family_handle_t*** ppHandle) {
SArray* pHandle = taosArrayInit(8, POINTER_BYTES);
@ -1006,6 +1008,7 @@ int32_t taskDbBuildSnap(void* arg, SArray* pSnap) {
return code;
}
#ifdef BUILD_NO_CALL
int32_t streamBackendAddInUseChkp(void* arg, int64_t chkpId) {
// if (arg == NULL) return 0;
@ -1029,6 +1032,7 @@ int32_t streamBackendDelInUseChkp(void* arg, int64_t chkpId) {
// }
// taosWUnLockLatch(&pMeta->chkpDirLock);
}
#endif
/*
0
@ -1053,13 +1057,13 @@ int32_t taskDbDoCheckpoint(void* arg, int64_t chkpId) {
rocksdb_column_family_handle_t** ppCf = NULL;
int32_t nCf = chkpGetAllDbCfHandle2(pTaskDb, &ppCf);
qDebug("stream backend:%p start to do checkpoint at:%s, cf num: %d ", pTaskDb, pChkpIdDir, nCf);
stDebug("stream backend:%p start to do checkpoint at:%s, cf num: %d ", pTaskDb, pChkpIdDir, nCf);
if ((code = chkpPreFlushDb(pTaskDb->db, ppCf, nCf)) == 0) {
if ((code = chkpDoDbCheckpoint(pTaskDb->db, pChkpIdDir)) != 0) {
stError("stream backend:%p failed to do checkpoint at:%s", pTaskDb, pChkpIdDir);
} else {
qDebug("stream backend:%p end to do checkpoint at:%s, time cost:%" PRId64 "ms", pTaskDb, pChkpIdDir,
stDebug("stream backend:%p end to do checkpoint at:%s, time cost:%" PRId64 "ms", pTaskDb, pChkpIdDir,
taosGetTimestampMs() - st);
}
} else {
@ -1099,7 +1103,9 @@ void streamBackendDelCompare(void* backend, void* arg) {
taosMemoryFree(node);
}
}
#ifdef BUILD_NO_CALL
void streamStateDestroy_rocksdb(SStreamState* pState, bool remove) { streamStateCloseBackend(pState, remove); }
#endif
void destroyRocksdbCfInst(RocksdbCfInst* inst) {
int cfLen = sizeof(ginitDict) / sizeof(ginitDict[0]);
if (inst->pHandle) {
@ -1789,7 +1795,7 @@ STaskDbWrapper* taskDbOpenImpl(char* key, char* statePath, char* dbPath) {
cfNames = NULL;
}
qDebug("succ to init stream backend at %s, backend:%p", dbPath, pTaskDb);
stDebug("succ to init stream backend at %s, backend:%p", dbPath, pTaskDb);
return pTaskDb;
_EXIT:
@ -1818,7 +1824,7 @@ void taskDbDestroy(void* pDb, bool flush) {
streamMetaRemoveDB(wrapper->pMeta, wrapper->idstr);
qDebug("succ to destroy stream backend:%p", wrapper);
stDebug("succ to destroy stream backend:%p", wrapper);
int8_t nCf = sizeof(ginitDict) / sizeof(ginitDict[0]);
@ -2170,6 +2176,7 @@ int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t
taosMemoryFree(cfOpts);
return 0;
}
#ifdef BUILD_NO_CALL
int streamStateOpenBackend(void* backend, SStreamState* pState) {
taosAcquireRef(streamBackendId, pState->streamBackendRid);
SBackendWrapper* handle = backend;
@ -2279,6 +2286,7 @@ void streamStateCloseBackend(SStreamState* pState, bool remove) {
wrapper->remove |= remove; // update by other pState
taosReleaseRef(streamBackendCfWrapperId, pState->pTdbState->backendCfWrapperId);
}
#endif
void streamStateDestroyCompar(void* arg) {
SCfComparator* comp = (SCfComparator*)arg;
for (int i = 0; i < comp->numOfComp; i++) {
@ -2313,7 +2321,7 @@ int streamStateGetCfIdx(SStreamState* pState, const char* funcName) {
stError("failed to open cf, %p %s_%s, reason:%s", pState, wrapper->idstr, funcName, err);
taosMemoryFree(err);
} else {
qDebug("succ to open cf, %p %s_%s", pState, wrapper->idstr, funcName);
stDebug("succ to open cf, %p %s_%s", pState, wrapper->idstr, funcName);
wrapper->pCf[idx] = cf;
}
}
@ -2355,14 +2363,14 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfKe
char* err = NULL; \
int i = streamStateGetCfIdx(pState, funcname); \
if (i < 0) { \
qWarn("streamState failed to get cf name: %s", funcname); \
stWarn("streamState failed to get cf name: %s", funcname); \
code = -1; \
break; \
} \
STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; \
wrapper->dataWritten += 1; \
char toString[128] = {0}; \
if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \
if (stDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \
int32_t klen = ginitDict[i].enFunc((void*)key, buf); \
rocksdb_column_family_handle_t* pHandle = ((rocksdb_column_family_handle_t**)wrapper->pCf)[ginitDict[i].idx]; \
rocksdb_writeoptions_t* opts = wrapper->writeOpt; \
@ -2375,7 +2383,7 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfKe
taosMemoryFree(err); \
code = -1; \
} else { \
qTrace("streamState str:%s succ to write to %s, rowValLen:%d, ttlValLen:%d, %p", toString, funcname, vLen, \
stTrace("streamState str:%s succ to write to %s, rowValLen:%d, ttlValLen:%d, %p", toString, funcname, vLen, \
ttlVLen, wrapper); \
} \
taosMemoryFree(ttlV); \
@ -2388,13 +2396,13 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfKe
char* err = NULL; \
int i = streamStateGetCfIdx(pState, funcname); \
if (i < 0) { \
qWarn("streamState failed to get cf name: %s", funcname); \
stWarn("streamState failed to get cf name: %s", funcname); \
code = -1; \
break; \
} \
STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; \
char toString[128] = {0}; \
if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \
if (stDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \
int32_t klen = ginitDict[i].enFunc((void*)key, buf); \
rocksdb_column_family_handle_t* pHandle = ((rocksdb_column_family_handle_t**)wrapper->pCf)[ginitDict[i].idx]; \
rocksdb_t* db = wrapper->db; \
@ -2403,7 +2411,7 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfKe
char* val = rocksdb_get_cf(db, opts, pHandle, (const char*)buf, klen, (size_t*)&len, &err); \
if (val == NULL || len == 0) { \
if (err == NULL) { \
qTrace("streamState str: %s failed to read from %s_%s, err: not exist", toString, wrapper->idstr, funcname); \
stTrace("streamState str: %s failed to read from %s_%s, err: not exist", toString, wrapper->idstr, funcname); \
} else { \
stError("streamState str: %s failed to read from %s_%s, err: %s", toString, wrapper->idstr, funcname, err); \
taosMemoryFreeClear(err); \
@ -2417,8 +2425,8 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfKe
funcname); \
code = -1; \
} else { \
qTrace("streamState str: %s succ to read from %s_%s, valLen:%d, %p", toString, wrapper->idstr, funcname, tlen, \
wrapper); \
stTrace("streamState str: %s succ to read from %s_%s, valLen:%d, %p", toString, wrapper->idstr, funcname, \
tlen, wrapper); \
} \
taosMemoryFree(val); \
if (vLen != NULL) *vLen = tlen; \
@ -2432,14 +2440,14 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfKe
char* err = NULL; \
int i = streamStateGetCfIdx(pState, funcname); \
if (i < 0) { \
qWarn("streamState failed to get cf name: %s_%s", pState->pTdbState->idstr, funcname); \
stWarn("streamState failed to get cf name: %s_%s", pState->pTdbState->idstr, funcname); \
code = -1; \
break; \
} \
STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; \
wrapper->dataWritten += 1; \
char toString[128] = {0}; \
if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \
if (stDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \
int32_t klen = ginitDict[i].enFunc((void*)key, buf); \
rocksdb_column_family_handle_t* pHandle = ((rocksdb_column_family_handle_t**)wrapper->pCf)[ginitDict[i].idx]; \
rocksdb_t* db = wrapper->db; \
@ -2450,7 +2458,7 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfKe
taosMemoryFree(err); \
code = -1; \
} else { \
qTrace("streamState str: %s succ to del from %s_%s", toString, wrapper->idstr, funcname); \
stTrace("streamState str: %s succ to del from %s_%s", toString, wrapper->idstr, funcname); \
} \
} while (0);
@ -2669,9 +2677,9 @@ SStreamStateCur* streamStateSeekToLast_rocksdb(SStreamState* pState) {
STREAM_STATE_DEL_ROCKSDB(pState, "state", &maxStateKey);
return pCur;
}
#ifdef BUILD_NO_CALL
SStreamStateCur* streamStateGetCur_rocksdb(SStreamState* pState, const SWinKey* key) {
qDebug("streamStateGetCur_rocksdb");
stDebug("streamStateGetCur_rocksdb");
STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend;
SStreamStateCur* pCur = createStreamStateCursor();
@ -2719,6 +2727,7 @@ int32_t streamStateFuncDel_rocksdb(SStreamState* pState, const STupleKey* key) {
STREAM_STATE_DEL_ROCKSDB(pState, "func", key);
return 0;
}
#endif
// session cf
int32_t streamStateSessionPut_rocksdb(SStreamState* pState, const SSessionKey* key, const void* value, int32_t vLen) {
@ -2764,7 +2773,7 @@ int32_t streamStateSessionDel_rocksdb(SStreamState* pState, const SSessionKey* k
}
SStreamStateCur* streamStateSessionSeekToLast_rocksdb(SStreamState* pState) {
qDebug("streamStateSessionSeekToLast_rocksdb");
stDebug("streamStateSessionSeekToLast_rocksdb");
int32_t code = 0;
@ -2801,7 +2810,7 @@ SStreamStateCur* streamStateSessionSeekToLast_rocksdb(SStreamState* pState) {
}
int32_t streamStateSessionCurPrev_rocksdb(SStreamStateCur* pCur) {
qDebug("streamStateCurPrev_rocksdb");
stDebug("streamStateCurPrev_rocksdb");
if (!pCur) return -1;
rocksdb_iter_prev(pCur->iter);
@ -2851,7 +2860,7 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentPrev_rocksdb(SStreamState* pSta
return pCur;
}
SStreamStateCur* streamStateSessionSeekKeyCurrentNext_rocksdb(SStreamState* pState, SSessionKey* key) {
qDebug("streamStateSessionSeekKeyCurrentNext_rocksdb");
stDebug("streamStateSessionSeekKeyCurrentNext_rocksdb");
STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend;
SStreamStateCur* pCur = createStreamStateCursor();
if (pCur == NULL) {
@ -2889,7 +2898,7 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentNext_rocksdb(SStreamState* pSta
}
SStreamStateCur* streamStateSessionSeekKeyNext_rocksdb(SStreamState* pState, const SSessionKey* key) {
qDebug("streamStateSessionSeekKeyNext_rocksdb");
stDebug("streamStateSessionSeekKeyNext_rocksdb");
STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend;
SStreamStateCur* pCur = createStreamStateCursor();
if (pCur == NULL) {
@ -2993,7 +3002,7 @@ int32_t streamStateFillDel_rocksdb(SStreamState* pState, const SWinKey* key) {
}
SStreamStateCur* streamStateFillGetCur_rocksdb(SStreamState* pState, const SWinKey* key) {
qDebug("streamStateFillGetCur_rocksdb");
stDebug("streamStateFillGetCur_rocksdb");
SStreamStateCur* pCur = createStreamStateCursor();
STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend;
@ -3053,7 +3062,7 @@ int32_t streamStateFillGetKVByCur_rocksdb(SStreamStateCur* pCur, SWinKey* pKey,
}
SStreamStateCur* streamStateFillSeekKeyNext_rocksdb(SStreamState* pState, const SWinKey* key) {
qDebug("streamStateFillSeekKeyNext_rocksdb");
stDebug("streamStateFillSeekKeyNext_rocksdb");
STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend;
SStreamStateCur* pCur = createStreamStateCursor();
if (!pCur) {
@ -3091,7 +3100,7 @@ SStreamStateCur* streamStateFillSeekKeyNext_rocksdb(SStreamState* pState, const
return NULL;
}
SStreamStateCur* streamStateFillSeekKeyPrev_rocksdb(SStreamState* pState, const SWinKey* key) {
qDebug("streamStateFillSeekKeyPrev_rocksdb");
stDebug("streamStateFillSeekKeyPrev_rocksdb");
STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend;
SStreamStateCur* pCur = createStreamStateCursor();
if (pCur == NULL) {
@ -3128,6 +3137,7 @@ SStreamStateCur* streamStateFillSeekKeyPrev_rocksdb(SStreamState* pState, const
streamStateFreeCur(pCur);
return NULL;
}
#ifdef BUILD_NO_CALL
int32_t streamStateSessionGetKeyByRange_rocksdb(SStreamState* pState, const SSessionKey* key, SSessionKey* curKey) {
stDebug("streamStateSessionGetKeyByRange_rocksdb");
STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend;
@ -3185,6 +3195,7 @@ int32_t streamStateSessionGetKeyByRange_rocksdb(SStreamState* pState, const SSes
streamStateFreeCur(pCur);
return -1;
}
#endif
int32_t streamStateSessionAddIfNotExist_rocksdb(SStreamState* pState, SSessionKey* key, TSKEY gap, void** pVal,
int32_t* pVLen) {
@ -3317,6 +3328,7 @@ _end:
return res;
}
#ifdef BUILD_NO_CALL
// partag cf
int32_t streamStatePutParTag_rocksdb(SStreamState* pState, int64_t groupId, const void* tag, int32_t tagLen) {
int code = 0;
@ -3329,6 +3341,7 @@ int32_t streamStateGetParTag_rocksdb(SStreamState* pState, int64_t groupId, void
STREAM_STATE_GET_ROCKSDB(pState, "partag", &groupId, tagVal, tagLen);
return code;
}
#endif
// parname cfg
int32_t streamStatePutParName_rocksdb(SStreamState* pState, int64_t groupId, const char tbname[TSDB_TABLE_NAME_LEN]) {
int code = 0;
@ -3342,11 +3355,13 @@ int32_t streamStateGetParName_rocksdb(SStreamState* pState, int64_t groupId, voi
return code;
}
#ifdef BUILD_NO_CALL
int32_t streamDefaultPut_rocksdb(SStreamState* pState, const void* key, void* pVal, int32_t pVLen) {
int code = 0;
STREAM_STATE_PUT_ROCKSDB(pState, "default", key, pVal, pVLen);
return code;
}
#endif
int32_t streamDefaultGet_rocksdb(SStreamState* pState, const void* key, void** pVal, int32_t* pVLen) {
int code = 0;
STREAM_STATE_GET_ROCKSDB(pState, "default", key, pVal, pVLen);
@ -3400,6 +3415,7 @@ int32_t streamDefaultIterGet_rocksdb(SStreamState* pState, const void* start, co
rocksdb_iter_destroy(pIter);
return code;
}
#ifdef BUILD_NO_CALL
void* streamDefaultIterCreate_rocksdb(SStreamState* pState) {
SStreamStateCur* pCur = createStreamStateCursor();
STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend;
@ -3443,6 +3459,7 @@ char* streamDefaultIterVal_rocksdb(void* iter, int32_t* len) {
return ret;
}
#endif
// batch func
void* streamStateCreateBatch() {
rocksdb_writebatch_t* pBatch = rocksdb_writebatch_create();
@ -3796,11 +3813,12 @@ void dbChkpDestroy(SDbChkp* pChkp) {
taosMemoryFree(pChkp->pManifest);
taosMemoryFree(pChkp);
}
#ifdef BUILD_NO_CALL
int32_t dbChkpInit(SDbChkp* p) {
if (p == NULL) return 0;
return 0;
}
#endif
int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) {
taosThreadRwlockRdlock(&p->rwLock);
int32_t code = -1;
@ -3945,6 +3963,7 @@ int32_t bkdMgtGetDelta(SBkdMgt* bm, char* taskId, int64_t chkpId, SArray* list,
return code;
}
#ifdef BUILD_NO_CALL
int32_t bkdMgtAddChkp(SBkdMgt* bm, char* task, char* path) {
int32_t code = -1;
@ -3975,3 +3994,4 @@ int32_t bkdMgtDumpTo(SBkdMgt* bm, char* taskId, char* dname) {
taosThreadRwlockUnlock(&bm->rwLock);
return code;
}
#endif

View File

@ -25,6 +25,7 @@ typedef struct {
SStreamTask* pTask;
} SAsyncUploadArg;
int32_t tEncodeStreamCheckpointSourceReq(SEncoder* pEncoder, const SStreamCheckpointSourceReq* pReq) {
if (tStartEncode(pEncoder) < 0) return -1;
if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1;
@ -34,6 +35,7 @@ int32_t tEncodeStreamCheckpointSourceReq(SEncoder* pEncoder, const SStreamCheckp
if (tEncodeSEpSet(pEncoder, &pReq->mgmtEps) < 0) return -1;
if (tEncodeI32(pEncoder, pReq->mnodeId) < 0) return -1;
if (tEncodeI64(pEncoder, pReq->expireTime) < 0) return -1;
if (tEncodeI32(pEncoder, pReq->transId) < 0) return -1;
tEndEncode(pEncoder);
return pEncoder->pos;
}
@ -47,6 +49,7 @@ int32_t tDecodeStreamCheckpointSourceReq(SDecoder* pDecoder, SStreamCheckpointSo
if (tDecodeSEpSet(pDecoder, &pReq->mgmtEps) < 0) return -1;
if (tDecodeI32(pDecoder, &pReq->mnodeId) < 0) return -1;
if (tDecodeI64(pDecoder, &pReq->expireTime) < 0) return -1;
if (tDecodeI32(pDecoder, &pReq->transId) < 0) return -1;
tEndDecode(pDecoder);
return 0;
}
@ -111,6 +114,7 @@ static int32_t streamAlignCheckpoint(SStreamTask* pTask) {
return atomic_sub_fetch_32(&pTask->chkInfo.downstreamAlignNum, 1);
}
// todo handle down the transId of checkpoint to sink/agg tasks.
static int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpointType) {
SStreamDataBlock* pChkpoint = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, sizeof(SSDataBlock));
if (pChkpoint == NULL) {
@ -149,6 +153,7 @@ int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSo
// 1. set task status to be prepared for check point, no data are allowed to put into inputQ.
streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_GEN_CHECKPOINT);
pTask->chkInfo.transId = pReq->transId;
pTask->chkInfo.checkpointingId = pReq->checkpointId;
pTask->chkInfo.checkpointNotReadyTasks = streamTaskGetNumOfDownstream(pTask);
pTask->chkInfo.startTs = taosGetTimestampMs();
@ -273,8 +278,9 @@ void streamTaskClearCheckInfo(SStreamTask* pTask, bool clearChkpReadyMsg) {
pTask->chkInfo.failedId = 0;
pTask->chkInfo.startTs = 0; // clear the recorded start time
pTask->chkInfo.checkpointNotReadyTasks = 0;
// pTask->chkInfo.checkpointAlignCnt = 0;
pTask->chkInfo.transId = 0;
pTask->chkInfo.dispatchCheckpointTrigger = false;
streamTaskOpenAllUpstreamInput(pTask); // open inputQ for all upstream tasks
if (clearChkpReadyMsg) {
streamClearChkptReadyMsg(pTask);
@ -341,9 +347,8 @@ int32_t streamSaveTaskCheckpointInfo(SStreamTask* p, int64_t checkpointId) {
return code;
}
void streamTaskSetFailedId(SStreamTask* pTask) {
void streamTaskSetCheckpointFailedId(SStreamTask* pTask) {
pTask->chkInfo.failedId = pTask->chkInfo.checkpointingId;
pTask->chkInfo.checkpointId = pTask->chkInfo.checkpointingId;
}
int32_t getChkpMeta(char* id, char* path, SArray* list) {
@ -485,7 +490,7 @@ int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) {
code = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_CHECKPOINT_DONE);
taosThreadMutexUnlock(&pTask->lock);
streamTaskSetFailedId(pTask);
streamTaskSetCheckpointFailedId(pTask);
stDebug("s-task:%s clear checkpoint flag since gen checkpoint failed, checkpointId:%" PRId64, pTask->id.idStr,
ckId);
}

View File

@ -352,6 +352,7 @@ static int32_t doBuildDispatchMsg(SStreamTask* pTask, const SStreamDataBlock* pD
return code;
}
// it's a new vnode to receive dispatch msg, so add one
if (pReqs[j].blockNum == 0) {
atomic_add_fetch_32(&pTask->outputInfo.shuffleDispatcher.waitingRspCnt, 1);
}
@ -372,8 +373,15 @@ static int32_t doBuildDispatchMsg(SStreamTask* pTask, const SStreamDataBlock* pD
pTask->msgInfo.pData = pReqs;
}
stDebug("s-task:%s build dispatch msg success, msgId:%d, stage:%" PRId64, pTask->id.idStr, pTask->execInfo.dispatch,
pTask->pMeta->stage);
if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) {
stDebug("s-task:%s build dispatch msg success, msgId:%d, stage:%" PRId64 " %p", pTask->id.idStr,
pTask->execInfo.dispatch, pTask->pMeta->stage, pTask->msgInfo.pData);
} else {
stDebug("s-task:%s build dispatch msg success, msgId:%d, stage:%" PRId64 " dstVgNum:%d %p", pTask->id.idStr,
pTask->execInfo.dispatch, pTask->pMeta->stage, pTask->outputInfo.shuffleDispatcher.waitingRspCnt,
pTask->msgInfo.pData);
}
return code;
}
@ -395,9 +403,11 @@ static int32_t sendDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pDispatch
SArray* vgInfo = pTask->outputInfo.shuffleDispatcher.dbInfo.pVgroupInfos;
int32_t numOfVgroups = taosArrayGetSize(vgInfo);
stDebug("s-task:%s (child taskId:%d) start to shuffle-dispatch blocks to %d vgroup(s), msgId:%d", id,
pTask->info.selfChildId, numOfVgroups, msgId);
int32_t actualVgroups = pTask->outputInfo.shuffleDispatcher.waitingRspCnt;
stDebug("s-task:%s (child taskId:%d) start to shuffle-dispatch blocks to %d/%d vgroup(s), msgId:%d", id,
pTask->info.selfChildId, actualVgroups, numOfVgroups, msgId);
int32_t numOfSend = 0;
for (int32_t i = 0; i < numOfVgroups; i++) {
if (pDispatchMsg[i].blockNum > 0) {
SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i);
@ -408,6 +418,11 @@ static int32_t sendDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pDispatch
if (code < 0) {
break;
}
// no need to try remain, all already send.
if (++numOfSend == actualVgroups) {
break;
}
}
}
@ -731,7 +746,7 @@ int32_t streamTaskSendCheckpointSourceRsp(SStreamTask* pTask) {
taosArrayClear(pTask->pReadyMsgList);
stDebug("s-task:%s level:%d source checkpoint completed msg sent to mnode", pTask->id.idStr, pTask->info.taskLevel);
} else {
stDebug("s-task:%s level:%d already send rsp to mnode", pTask->id.idStr, pTask->info.taskLevel);
stDebug("s-task:%s level:%d already send rsp checkpoint success to mnode", pTask->id.idStr, pTask->info.taskLevel);
}
taosThreadMutexUnlock(&pTask->lock);
@ -1081,6 +1096,7 @@ int32_t streamNotifyUpstreamContinue(SStreamTask* pTask) {
// this message has been sent successfully, let's try next one.
static int32_t handleDispatchSuccessRsp(SStreamTask* pTask, int32_t downstreamId) {
stDebug("s-task:%s destroy dispatch msg:%p", pTask->id.idStr, pTask->msgInfo.pData);
destroyDispatchMsg(pTask->msgInfo.pData, getNumOfDispatchBranch(pTask));
bool delayDispatch = (pTask->msgInfo.dispatchMsgType == STREAM_INPUT__CHECKPOINT_TRIGGER);

Some files were not shown because too many files have changed in this diff Show More