fix(stream): handle the repeatly issued nodeUpdate msg from dnode.
This commit is contained in:
parent
26aaa4a83c
commit
03165d5327
|
@ -436,6 +436,11 @@ typedef struct STaskStartInfo {
|
|||
int32_t elapsedTime;
|
||||
} STaskStartInfo;
|
||||
|
||||
typedef struct STaskUpdateInfo {
|
||||
SHashObj* pTasks;
|
||||
int32_t transId;
|
||||
} STaskUpdateInfo;
|
||||
|
||||
// meta
|
||||
typedef struct SStreamMeta {
|
||||
char* path;
|
||||
|
@ -458,7 +463,7 @@ typedef struct SStreamMeta {
|
|||
SHashObj* pTaskBackendUnique;
|
||||
TdThreadMutex backendMutex;
|
||||
SMetaHbInfo* pHbInfo;
|
||||
SHashObj* pUpdateTaskSet;
|
||||
STaskUpdateInfo updateInfo;
|
||||
int32_t numOfStreamTasks; // this value should be increased when a new task is added into the meta
|
||||
int32_t numOfPausedTasks;
|
||||
int32_t chkptNotReadyTasks;
|
||||
|
@ -664,6 +669,7 @@ typedef struct SNodeUpdateInfo {
|
|||
} SNodeUpdateInfo;
|
||||
|
||||
typedef struct SStreamTaskNodeUpdateMsg {
|
||||
int32_t transId; // to identify the msg
|
||||
int64_t streamId;
|
||||
int32_t taskId;
|
||||
SArray* pNodeList; // SArray<SNodeUpdateInfo>
|
||||
|
|
|
@ -1883,18 +1883,19 @@ static int32_t mndProcessResumeStreamReq(SRpcMsg *pReq) {
|
|||
return TSDB_CODE_ACTION_IN_PROGRESS;
|
||||
}
|
||||
|
||||
static void initNodeUpdateMsg(SStreamTaskNodeUpdateMsg *pMsg, const SVgroupChangeInfo *pInfo, int64_t streamId,
|
||||
int32_t taskId) {
|
||||
pMsg->streamId = streamId;
|
||||
pMsg->taskId = taskId;
|
||||
static void initNodeUpdateMsg(SStreamTaskNodeUpdateMsg *pMsg, const SVgroupChangeInfo *pInfo, SStreamTaskId *pId,
|
||||
int32_t transId) {
|
||||
pMsg->streamId = pId->streamId;
|
||||
pMsg->taskId = pId->taskId;
|
||||
pMsg->transId = transId;
|
||||
pMsg->pNodeList = taosArrayInit(taosArrayGetSize(pInfo->pUpdateNodeList), sizeof(SNodeUpdateInfo));
|
||||
taosArrayAddAll(pMsg->pNodeList, pInfo->pUpdateNodeList);
|
||||
}
|
||||
|
||||
static int32_t doBuildStreamTaskUpdateMsg(void **pBuf, int32_t *pLen, SVgroupChangeInfo *pInfo, int32_t nodeId,
|
||||
int64_t streamId, int32_t taskId) {
|
||||
SStreamTaskId* pId, int32_t transId) {
|
||||
SStreamTaskNodeUpdateMsg req = {0};
|
||||
initNodeUpdateMsg(&req, pInfo, streamId, taskId);
|
||||
initNodeUpdateMsg(&req, pInfo, pId, transId);
|
||||
|
||||
int32_t code = 0;
|
||||
int32_t blen;
|
||||
|
@ -1968,7 +1969,7 @@ void initTransAction(STransAction *pAction, void *pCont, int32_t contLen, int32_
|
|||
// todo extract method: traverse stream tasks
|
||||
// build trans to update the epset
|
||||
static int32_t createStreamUpdateTrans(SStreamObj *pStream, SVgroupChangeInfo *pInfo, STrans* pTrans) {
|
||||
mDebug("start to build stream:0x%" PRIx64 " task DAG update", pStream->uid);
|
||||
mDebug("start to build stream:0x%" PRIx64 " tasks epset update", pStream->uid);
|
||||
|
||||
taosWLockLatch(&pStream->lock);
|
||||
int32_t numOfLevels = taosArrayGetSize(pStream->tasks);
|
||||
|
@ -1983,7 +1984,7 @@ static int32_t createStreamUpdateTrans(SStreamObj *pStream, SVgroupChangeInfo *p
|
|||
void *pBuf = NULL;
|
||||
int32_t len = 0;
|
||||
streamTaskUpdateEpsetInfo(pTask, pInfo->pUpdateNodeList);
|
||||
doBuildStreamTaskUpdateMsg(&pBuf, &len, pInfo, pTask->info.nodeId, pTask->id.streamId, pTask->id.taskId);
|
||||
doBuildStreamTaskUpdateMsg(&pBuf, &len, pInfo, pTask->info.nodeId, &pTask->id, pTrans->id);
|
||||
|
||||
STransAction action = {0};
|
||||
initTransAction(&action, pBuf, len, TDMT_VND_STREAM_TASK_UPDATE, &pTask->info.epSet);
|
||||
|
|
|
@ -125,7 +125,6 @@ int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SPackedData submit, STaosxR
|
|||
int32_t tqAddBlockDataToRsp(const SSDataBlock* pBlock, SMqDataRsp* pRsp, int32_t numOfCols, int8_t precision);
|
||||
int32_t tqSendDataRsp(STqHandle* pHandle, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqDataRsp* pRsp,
|
||||
int32_t type, int32_t vgId);
|
||||
//int32_t tqPushDataRsp(STqHandle* pHandle, int32_t vgId);
|
||||
int32_t tqPushEmptyDataRsp(STqHandle* pHandle, int32_t vgId);
|
||||
|
||||
// tqMeta
|
||||
|
@ -133,7 +132,6 @@ int32_t tqMetaOpen(STQ* pTq);
|
|||
int32_t tqMetaClose(STQ* pTq);
|
||||
int32_t tqMetaSaveHandle(STQ* pTq, const char* key, const STqHandle* pHandle);
|
||||
int32_t tqMetaDeleteHandle(STQ* pTq, const char* key);
|
||||
//int32_t tqMetaRestoreHandle(STQ* pTq);
|
||||
int32_t tqMetaSaveCheckInfo(STQ* pTq, const char* key, const void* value, int32_t vLen);
|
||||
int32_t tqMetaDeleteCheckInfo(STQ* pTq, const char* key);
|
||||
int32_t tqMetaRestoreCheckInfo(STQ* pTq);
|
||||
|
|
|
@ -20,6 +20,12 @@ typedef struct {
|
|||
int8_t inited;
|
||||
} STqMgmt;
|
||||
|
||||
typedef struct STaskUpdateEntry {
|
||||
int64_t streamId;
|
||||
int32_t taskId;
|
||||
int32_t transId;
|
||||
} STaskUpdateEntry;
|
||||
|
||||
static STqMgmt tqMgmt = {0};
|
||||
|
||||
// 0: not init
|
||||
|
@ -1869,7 +1875,26 @@ int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) {
|
|||
}
|
||||
|
||||
SStreamTask* pTask = *ppTask;
|
||||
tqDebug("s-task:%s receive nodeEp update msg from mnode", pTask->id.idStr);
|
||||
|
||||
if (pMeta->updateInfo.transId != req.transId) {
|
||||
pMeta->updateInfo.transId = req.transId;
|
||||
tqDebug("s-task:%s receive new trans to update nodeEp msg from mnode, transId:%d", pTask->id.idStr, req.transId);
|
||||
// info needs to be kept till the new trans to update the nodeEp arrived.
|
||||
taosHashClear(pMeta->updateInfo.pTasks);
|
||||
} else {
|
||||
tqDebug("s-task:%s recv trans to update nodeEp from mnode, transId:%d", pTask->id.idStr, req.transId);
|
||||
}
|
||||
|
||||
STaskUpdateEntry entry = {.streamId = req.streamId, .taskId = req.taskId, .transId = req.transId};
|
||||
void* exist = taosHashGet(pMeta->updateInfo.pTasks, &entry, sizeof(STaskUpdateEntry));
|
||||
if (exist != NULL) {
|
||||
tqDebug("s-task:%s (vgId:%d) already update in trans:%d, discard the nodeEp update msg", pTask->id.idStr, vgId,
|
||||
req.transId);
|
||||
rsp.code = TSDB_CODE_SUCCESS;
|
||||
taosWUnLockLatch(&pMeta->lock);
|
||||
taosArrayDestroy(req.pNodeList);
|
||||
return rsp.code;
|
||||
}
|
||||
|
||||
streamTaskUpdateEpsetInfo(pTask, req.pNodeList);
|
||||
streamTaskResetStatus(pTask);
|
||||
|
@ -1899,12 +1924,14 @@ int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) {
|
|||
}
|
||||
|
||||
streamTaskStop(pTask);
|
||||
taosHashPut(pMeta->pUpdateTaskSet, &pTask->id, sizeof(pTask->id), NULL, 0);
|
||||
|
||||
// keep the already handled info
|
||||
taosHashPut(pMeta->updateInfo.pTasks, &entry, sizeof(entry), NULL, 0);
|
||||
|
||||
if (ppHTask != NULL) {
|
||||
streamTaskStop(*ppHTask);
|
||||
tqDebug("s-task:%s task nodeEp update completed, streamTask and related fill-history task closed", pTask->id.idStr);
|
||||
taosHashPut(pMeta->pUpdateTaskSet, &(*ppHTask)->id, sizeof(pTask->id), NULL, 0);
|
||||
taosHashPut(pMeta->updateInfo.pTasks, &(*ppHTask)->id, sizeof(pTask->id), NULL, 0);
|
||||
} else {
|
||||
tqDebug("s-task:%s task nodeEp update completed, streamTask closed", pTask->id.idStr);
|
||||
}
|
||||
|
@ -1913,7 +1940,7 @@ int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) {
|
|||
|
||||
// possibly only handle the stream task.
|
||||
int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta);
|
||||
int32_t updateTasks = taosHashGetSize(pMeta->pUpdateTaskSet);
|
||||
int32_t updateTasks = taosHashGetSize(pMeta->updateInfo.pTasks);
|
||||
|
||||
pMeta->startInfo.startedAfterNodeUpdate = 1;
|
||||
|
||||
|
@ -1922,8 +1949,6 @@ int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) {
|
|||
updateTasks, (numOfTasks - updateTasks));
|
||||
taosWUnLockLatch(&pMeta->lock);
|
||||
} else {
|
||||
taosHashClear(pMeta->pUpdateTaskSet);
|
||||
|
||||
if (!pTq->pVnode->restored) {
|
||||
tqDebug("vgId:%d vnode restore not completed, not restart the tasks, clear the start after nodeUpdate flag", vgId);
|
||||
pMeta->startInfo.startedAfterNodeUpdate = 0;
|
||||
|
|
|
@ -1175,6 +1175,9 @@ int32_t tEncodeStreamTaskUpdateMsg(SEncoder* pEncoder, const SStreamTaskNodeUpda
|
|||
if (tEncodeI64(pEncoder, pMsg->streamId) < 0) return -1;
|
||||
if (tEncodeI32(pEncoder, pMsg->taskId) < 0) return -1;
|
||||
|
||||
// todo this new attribute will be result in being incompatible with previous version
|
||||
if (tEncodeI32(pEncoder, pMsg->transId) < 0) return -1;
|
||||
|
||||
int32_t size = taosArrayGetSize(pMsg->pNodeList);
|
||||
if (tEncodeI32(pEncoder, size) < 0) return -1;
|
||||
|
||||
|
@ -1193,6 +1196,8 @@ int32_t tDecodeStreamTaskUpdateMsg(SDecoder* pDecoder, SStreamTaskNodeUpdateMsg*
|
|||
if (tDecodeI64(pDecoder, &pMsg->streamId) < 0) return -1;
|
||||
if (tDecodeI32(pDecoder, &pMsg->taskId) < 0) return -1;
|
||||
|
||||
if (tDecodeI32(pDecoder, &pMsg->transId) < 0) return -1;
|
||||
|
||||
int32_t size = 0;
|
||||
if (tDecodeI32(pDecoder, &size) < 0) return -1;
|
||||
pMsg->pNodeList = taosArrayInit(size, sizeof(SNodeUpdateInfo));
|
||||
|
|
|
@ -143,8 +143,8 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF
|
|||
goto _err;
|
||||
}
|
||||
|
||||
pMeta->pUpdateTaskSet = taosHashInit(64, fp, false, HASH_NO_LOCK);
|
||||
if (pMeta->pUpdateTaskSet == NULL) {
|
||||
pMeta->updateInfo.pTasks = taosHashInit(64, fp, false, HASH_NO_LOCK);
|
||||
if (pMeta->updateInfo.pTasks == NULL) {
|
||||
goto _err;
|
||||
}
|
||||
|
||||
|
@ -219,7 +219,7 @@ _err:
|
|||
if (pMeta->pCheckpointDb) tdbTbClose(pMeta->pCheckpointDb);
|
||||
if (pMeta->db) tdbClose(pMeta->db);
|
||||
if (pMeta->pHbInfo) taosMemoryFreeClear(pMeta->pHbInfo);
|
||||
if (pMeta->pUpdateTaskSet) taosHashCleanup(pMeta->pUpdateTaskSet);
|
||||
if (pMeta->updateInfo.pTasks) taosHashCleanup(pMeta->updateInfo.pTasks);
|
||||
if (pMeta->startInfo.pReadyTaskSet) taosHashCleanup(pMeta->startInfo.pReadyTaskSet);
|
||||
taosMemoryFree(pMeta);
|
||||
|
||||
|
@ -340,7 +340,7 @@ void streamMetaCloseImpl(void* arg) {
|
|||
|
||||
taosHashCleanup(pMeta->pTasksMap);
|
||||
taosHashCleanup(pMeta->pTaskBackendUnique);
|
||||
taosHashCleanup(pMeta->pUpdateTaskSet);
|
||||
taosHashCleanup(pMeta->updateInfo.pTasks);
|
||||
taosHashCleanup(pMeta->startInfo.pReadyTaskSet);
|
||||
|
||||
taosMemoryFree(pMeta->pHbInfo);
|
||||
|
|
Loading…
Reference in New Issue