fix(stream): add some logs for retry for notready/timeout downstream tasks. and do some internal refactor.
This commit is contained in:
parent
77961ea791
commit
fae53efed9
|
@ -424,7 +424,7 @@ typedef struct STaskOutputInfo {
|
||||||
};
|
};
|
||||||
int8_t type;
|
int8_t type;
|
||||||
STokenBucket* pTokenBucket;
|
STokenBucket* pTokenBucket;
|
||||||
SArray* pDownstreamUpdateList;
|
SArray* pNodeEpsetUpdateList;
|
||||||
} STaskOutputInfo;
|
} STaskOutputInfo;
|
||||||
|
|
||||||
typedef struct SUpstreamInfo {
|
typedef struct SUpstreamInfo {
|
||||||
|
@ -445,6 +445,8 @@ typedef struct STaskCheckInfo {
|
||||||
int32_t notReadyTasks;
|
int32_t notReadyTasks;
|
||||||
int32_t inCheckProcess;
|
int32_t inCheckProcess;
|
||||||
int32_t stopCheckProcess;
|
int32_t stopCheckProcess;
|
||||||
|
int32_t notReadyRetryCount;
|
||||||
|
int32_t timeoutRetryCount;
|
||||||
tmr_h checkRspTmr;
|
tmr_h checkRspTmr;
|
||||||
TdThreadMutex checkInfoLock;
|
TdThreadMutex checkInfoLock;
|
||||||
} STaskCheckInfo;
|
} STaskCheckInfo;
|
||||||
|
|
|
@ -1073,9 +1073,9 @@ static void addUpdateNodeIntoHbMsg(SStreamTask* pTask, SStreamHbMsg* pMsg) {
|
||||||
|
|
||||||
taosThreadMutexLock(&pTask->lock);
|
taosThreadMutexLock(&pTask->lock);
|
||||||
|
|
||||||
int32_t num = taosArrayGetSize(pTask->outputInfo.pDownstreamUpdateList);
|
int32_t num = taosArrayGetSize(pTask->outputInfo.pNodeEpsetUpdateList);
|
||||||
for (int j = 0; j < num; ++j) {
|
for (int j = 0; j < num; ++j) {
|
||||||
SDownstreamTaskEpset* pTaskEpset = taosArrayGet(pTask->outputInfo.pDownstreamUpdateList, j);
|
SDownstreamTaskEpset* pTaskEpset = taosArrayGet(pTask->outputInfo.pNodeEpsetUpdateList, j);
|
||||||
|
|
||||||
bool exist = existInHbMsg(pMsg, pTaskEpset);
|
bool exist = existInHbMsg(pMsg, pTaskEpset);
|
||||||
if (!exist) {
|
if (!exist) {
|
||||||
|
@ -1085,7 +1085,7 @@ static void addUpdateNodeIntoHbMsg(SStreamTask* pTask, SStreamHbMsg* pMsg) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
taosArrayClear(pTask->outputInfo.pDownstreamUpdateList);
|
taosArrayClear(pTask->outputInfo.pNodeEpsetUpdateList);
|
||||||
taosThreadMutexUnlock(&pTask->lock);
|
taosThreadMutexUnlock(&pTask->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -356,10 +356,10 @@ static void addIntoNodeUpdateList(SStreamTask* pTask, int32_t nodeId) {
|
||||||
int32_t vgId = pTask->pMeta->vgId;
|
int32_t vgId = pTask->pMeta->vgId;
|
||||||
|
|
||||||
taosThreadMutexLock(&pTask->lock);
|
taosThreadMutexLock(&pTask->lock);
|
||||||
int32_t num = taosArrayGetSize(pTask->outputInfo.pDownstreamUpdateList);
|
int32_t num = taosArrayGetSize(pTask->outputInfo.pNodeEpsetUpdateList);
|
||||||
bool existed = false;
|
bool existed = false;
|
||||||
for (int i = 0; i < num; ++i) {
|
for (int i = 0; i < num; ++i) {
|
||||||
SDownstreamTaskEpset* p = taosArrayGet(pTask->outputInfo.pDownstreamUpdateList, i);
|
SDownstreamTaskEpset* p = taosArrayGet(pTask->outputInfo.pNodeEpsetUpdateList, i);
|
||||||
if (p->nodeId == nodeId) {
|
if (p->nodeId == nodeId) {
|
||||||
existed = true;
|
existed = true;
|
||||||
break;
|
break;
|
||||||
|
@ -368,10 +368,10 @@ static void addIntoNodeUpdateList(SStreamTask* pTask, int32_t nodeId) {
|
||||||
|
|
||||||
if (!existed) {
|
if (!existed) {
|
||||||
SDownstreamTaskEpset t = {.nodeId = nodeId};
|
SDownstreamTaskEpset t = {.nodeId = nodeId};
|
||||||
taosArrayPush(pTask->outputInfo.pDownstreamUpdateList, &t);
|
taosArrayPush(pTask->outputInfo.pNodeEpsetUpdateList, &t);
|
||||||
|
|
||||||
stInfo("s-task:%s vgId:%d downstream nodeId:%d needs to be updated, total needs updated:%d", pTask->id.idStr, vgId,
|
stInfo("s-task:%s vgId:%d downstream nodeId:%d needs to be updated, total needs updated:%d", pTask->id.idStr, vgId,
|
||||||
t.nodeId, (int32_t)taosArrayGetSize(pTask->outputInfo.pDownstreamUpdateList));
|
t.nodeId, (num + 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
taosThreadMutexUnlock(&pTask->lock);
|
taosThreadMutexUnlock(&pTask->lock);
|
||||||
|
|
|
@ -470,7 +470,7 @@ void tFreeStreamTask(SStreamTask* pTask) {
|
||||||
taosMemoryFree(pTask->outputInfo.pTokenBucket);
|
taosMemoryFree(pTask->outputInfo.pTokenBucket);
|
||||||
taosThreadMutexDestroy(&pTask->lock);
|
taosThreadMutexDestroy(&pTask->lock);
|
||||||
|
|
||||||
pTask->outputInfo.pDownstreamUpdateList = taosArrayDestroy(pTask->outputInfo.pDownstreamUpdateList);
|
pTask->outputInfo.pNodeEpsetUpdateList = taosArrayDestroy(pTask->outputInfo.pNodeEpsetUpdateList);
|
||||||
|
|
||||||
taosMemoryFree(pTask);
|
taosMemoryFree(pTask);
|
||||||
stDebug("s-task:0x%x free task completed", taskId);
|
stDebug("s-task:0x%x free task completed", taskId);
|
||||||
|
@ -571,8 +571,8 @@ int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, i
|
||||||
// 2MiB per second for sink task
|
// 2MiB per second for sink task
|
||||||
// 50 times sink operator per second
|
// 50 times sink operator per second
|
||||||
streamTaskInitTokenBucket(pOutputInfo->pTokenBucket, 35, 35, tsSinkDataRate, pTask->id.idStr);
|
streamTaskInitTokenBucket(pOutputInfo->pTokenBucket, 35, 35, tsSinkDataRate, pTask->id.idStr);
|
||||||
pOutputInfo->pDownstreamUpdateList = taosArrayInit(4, sizeof(SDownstreamTaskEpset));
|
pOutputInfo->pNodeEpsetUpdateList = taosArrayInit(4, sizeof(SDownstreamTaskEpset));
|
||||||
if (pOutputInfo->pDownstreamUpdateList == NULL) {
|
if (pOutputInfo->pNodeEpsetUpdateList == NULL) {
|
||||||
stError("s-task:%s failed to prepare downstreamUpdateList, code:%s", pTask->id.idStr, tstrerror(TSDB_CODE_OUT_OF_MEMORY));
|
stError("s-task:%s failed to prepare downstreamUpdateList, code:%s", pTask->id.idStr, tstrerror(TSDB_CODE_OUT_OF_MEMORY));
|
||||||
return TSDB_CODE_OUT_OF_MEMORY;
|
return TSDB_CODE_OUT_OF_MEMORY;
|
||||||
}
|
}
|
||||||
|
@ -1098,8 +1098,11 @@ static int32_t streamTaskCompleteCheckRsp(STaskCheckInfo* pInfo, const char* id)
|
||||||
pInfo->notReadyTasks = 0;
|
pInfo->notReadyTasks = 0;
|
||||||
pInfo->inCheckProcess = 0;
|
pInfo->inCheckProcess = 0;
|
||||||
pInfo->stopCheckProcess = 0;
|
pInfo->stopCheckProcess = 0;
|
||||||
taosArrayClear(pInfo->pList);
|
|
||||||
|
|
||||||
|
pInfo->notReadyRetryCount = 0;
|
||||||
|
pInfo->timeoutRetryCount = 0;
|
||||||
|
|
||||||
|
taosArrayClear(pInfo->pList);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1292,11 +1295,13 @@ static void rspMonitorFn(void* param, void* tmrId) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
stDebug("s-task:%s %d downstream task(s) not ready, send check msg again", id, numOfNotReady);
|
pInfo->notReadyRetryCount += 1;
|
||||||
|
stDebug("s-task:%s %d downstream task(s) not ready, send check msg again, retry:%d start time:%" PRId64, id,
|
||||||
|
numOfNotReady, pInfo->notReadyRetryCount, pInfo->startTs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// todo add into node update list and send to mnode
|
||||||
if (numOfTimeout > 0) {
|
if (numOfTimeout > 0) {
|
||||||
pInfo->startTs = now;
|
|
||||||
ASSERT(pTask->status.downstreamReady == 0);
|
ASSERT(pTask->status.downstreamReady == 0);
|
||||||
|
|
||||||
for (int32_t i = 0; i < numOfTimeout; ++i) {
|
for (int32_t i = 0; i < numOfTimeout; ++i) {
|
||||||
|
@ -1309,7 +1314,9 @@ static void rspMonitorFn(void* param, void* tmrId) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
stDebug("s-task:%s %d downstream tasks timeout, send check msg again, start ts:%" PRId64, id, numOfTimeout, now);
|
pInfo->timeoutRetryCount += 1;
|
||||||
|
stDebug("s-task:%s %d downstream task(s) timeout, send check msg again, retry:%d start time:%" PRId64, id,
|
||||||
|
numOfTimeout, pInfo->timeoutRetryCount, pInfo->startTs);
|
||||||
}
|
}
|
||||||
|
|
||||||
taosTmrReset(rspMonitorFn, CHECK_RSP_INTERVAL, pTask, streamTimer, &pInfo->checkRspTmr);
|
taosTmrReset(rspMonitorFn, CHECK_RSP_INTERVAL, pTask, streamTimer, &pInfo->checkRspTmr);
|
||||||
|
|
Loading…
Reference in New Issue