Merge pull request #25598 from taosdata/fix/3_liaohj

refactor(stream): opt stream sink perf.
This commit is contained in:
Haojun Liao 2024-04-30 16:59:22 +08:00 committed by GitHub
commit f2bc93b78d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 50 additions and 22 deletions

View File

@ -173,6 +173,7 @@ static const SSysDbTableSchema streamSchema[] = {
{.name = "watermark", .bytes = 8, .type = TSDB_DATA_TYPE_BIGINT, .sysInfo = false},
{.name = "trigger", .bytes = 20 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false},
{.name = "sink_quota", .bytes = 20 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false},
{.name = "checkpoint_interval", .bytes = 20 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false},
{.name = "checkpoint_backup", .bytes = 20 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false},
{.name = "history_scan_idle", .bytes = 20 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false},
};

View File

@ -1213,27 +1213,31 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) {
SMDropStreamReq dropReq = {0};
if (tDeserializeSMDropStreamReq(pReq->pCont, pReq->contLen, &dropReq) < 0) {
mError("invalid drop stream msg recv, discarded");
terrno = TSDB_CODE_INVALID_MSG;
return -1;
}
pStream = mndAcquireStream(pMnode, dropReq.name);
mDebug("recv drop stream:%s msg", dropReq.name);
pStream = mndAcquireStream(pMnode, dropReq.name);
if (pStream == NULL) {
if (dropReq.igNotExists) {
mInfo("stream:%s not exist, ignore not exist is set", dropReq.name);
mInfo("stream:%s not exist, ignore not exist is set, drop stream exec done with success", dropReq.name);
sdbRelease(pMnode->pSdb, pStream);
tFreeMDropStreamReq(&dropReq);
return 0;
} else {
terrno = TSDB_CODE_MND_STREAM_NOT_EXIST;
mError("stream:%s not exist failed to drop", dropReq.name);
mError("stream:%s not exist failed to drop it", dropReq.name);
tFreeMDropStreamReq(&dropReq);
return -1;
}
}
if (pStream->smaId != 0) {
mDebug("stream:%s, uid:0x%"PRIx64" try to drop sma related stream", dropReq.name, pStream->uid);
void *pIter = NULL;
SSmaObj *pSma = NULL;
pIter = sdbFetch(pMnode->pSdb, SDB_SMA, pIter, (void**)&pSma);
@ -1241,13 +1245,21 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) {
if (pSma && pSma->uid == pStream->smaId) {
sdbRelease(pMnode->pSdb, pSma);
sdbRelease(pMnode->pSdb, pStream);
sdbCancelFetch(pMnode->pSdb, pIter);
tFreeMDropStreamReq(&dropReq);
terrno = TSDB_CODE_TSMA_MUST_BE_DROPPED;
mError("try to drop sma-related stream:%s, uid:0x%" PRIx64 " code:%s only allowed to be dropped along with sma",
dropReq.name, pStream->uid, tstrerror(terrno));
return -1;
}
if (pSma) sdbRelease(pMnode->pSdb, pSma);
pIter = sdbFetch(pMnode->pSdb, SDB_SMA, pIter, (void**)&pSma);
if (pSma) {
sdbRelease(pMnode->pSdb, pSma);
}
pIter = sdbFetch(pMnode->pSdb, SDB_SMA, pIter, (void **)&pSma);
}
}
@ -1267,7 +1279,7 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) {
STrans *pTrans = doCreateTrans(pMnode, pStream, pReq, TRN_CONFLICT_NOTHING, MND_STREAM_DROP_NAME, "drop stream");
if (pTrans == NULL) {
mError("stream:%s, failed to drop since %s", dropReq.name, terrstr());
mError("stream:%s uid:0x%"PRIx64" failed to drop since %s", dropReq.name, pStream->uid, terrstr());
sdbRelease(pMnode->pSdb, pStream);
tFreeMDropStreamReq(&dropReq);
return -1;
@ -1277,7 +1289,7 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) {
// drop all tasks
if (mndStreamSetDropAction(pMnode, pTrans, pStream) < 0) {
mError("stream:%s, failed to drop task since %s", dropReq.name, terrstr());
mError("stream:%s uid:0x%" PRIx64 " failed to drop task since %s", dropReq.name, pStream->uid, terrstr());
sdbRelease(pMnode->pSdb, pStream);
mndTransDrop(pTrans);
tFreeMDropStreamReq(&dropReq);
@ -1303,10 +1315,13 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) {
// kill the related checkpoint trans
int32_t transId = mndStreamGetRelTrans(pMnode, pStream->uid);
if (transId != 0) {
mDebug("drop active related transId:%d due to stream:%s dropped", transId, pStream->name);
mDebug("drop active transId:%d due to stream:%s uid:0x%" PRIx64 " dropped", transId, pStream->name, pStream->uid);
mndKillTransImpl(pMnode, transId, pStream->sourceDb);
}
mDebug("stream:%s uid:0x%" PRIx64 " transId:%d start to drop related task when dropping stream", dropReq.name,
pStream->uid, transId);
removeStreamTasksInBuf(pStream, &execInfo);
SName name = {0};
@ -1488,6 +1503,7 @@ static int32_t mndRetrieveStream(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pB
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataSetVal(pColInfo, numOfRows, (const char *)&trigger, false);
// sink_quota
char sinkQuota[20 + VARSTR_HEADER_SIZE] = {0};
sinkQuota[0] = '0';
char dstStr[20] = {0};
@ -1495,6 +1511,14 @@ static int32_t mndRetrieveStream(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pB
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataSetVal(pColInfo, numOfRows, (const char *)dstStr, false);
// checkpoint interval
char tmp[20 + VARSTR_HEADER_SIZE] = {0};
sprintf(varDataVal(tmp), "%d sec", tsStreamCheckpointInterval);
varDataSetLen(tmp, strlen(varDataVal(tmp)));
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataSetVal(pColInfo, numOfRows, (const char *)tmp, false);
// checkpoint backup type
char backup[20 + VARSTR_HEADER_SIZE] = {0};
STR_TO_VARSTR(backup, "none")

View File

@ -333,8 +333,9 @@ int32_t doMergeExistedRows(SSubmitTbData* pExisted, const SSubmitTbData* pNew, c
}
while (j < newLen && k < oldLen) {
SRow* pNewRow = taosArrayGetP(pNew->aRowP, j);
SRow* pOldRow = taosArrayGetP(pExisted->aRowP, k);
SRow* pNewRow = *(SRow**)TARRAY_GET_ELEM(pNew->aRowP, j);
SRow* pOldRow = *(SRow**)TARRAY_GET_ELEM(pExisted->aRowP, k);
if (pNewRow->ts < pOldRow->ts) {
taosArrayPush(pFinal, &pNewRow);
j += 1;
@ -373,12 +374,12 @@ int32_t doMergeExistedRows(SSubmitTbData* pExisted, const SSubmitTbData* pNew, c
}
while (j < newLen) {
SRow* pRow = taosArrayGetP(pNew->aRowP, j++);
SRow* pRow = *(SRow**)TARRAY_GET_ELEM(pNew->aRowP, j++);
taosArrayPush(pFinal, &pRow);
}
while (k < oldLen) {
SRow* pRow = taosArrayGetP(pExisted->aRowP, k++);
SRow* pRow = *(SRow**)TARRAY_GET_ELEM(pExisted->aRowP, k++);
taosArrayPush(pFinal, &pRow);
}

View File

@ -35,6 +35,7 @@ static int32_t streamTaskUpdateCheckInfo(STaskCheckInfo* pInfo, int32_t taskId,
static void setCheckDownstreamReqInfo(SStreamTaskCheckReq* pReq, int64_t reqId, int32_t dstTaskId, int32_t dstNodeId);
static void getCheckRspStatus(STaskCheckInfo* pInfo, int64_t el, int32_t* numOfReady, int32_t* numOfFault,
int32_t* numOfNotRsp, SArray* pTimeoutList, SArray* pNotReadyList, const char* id);
static int32_t addDownstreamFailedStatusResultAsync(SMsgCb* pMsgCb, int32_t vgId, int64_t streamId, int32_t taskId);
static SDownstreamStatusInfo* findCheckRspStatus(STaskCheckInfo* pInfo, int32_t taskId);
// check status
@ -383,6 +384,8 @@ int32_t streamTaskAddReqInfo(STaskCheckInfo* pInfo, int64_t reqId, int32_t taskI
}
void doSendCheckMsg(SStreamTask* pTask, SDownstreamStatusInfo* p) {
const char* id = pTask->id.idStr;
SStreamTaskCheckReq req = {
.streamId = pTask->id.streamId,
.upstreamTaskId = pTask->id.taskId,
@ -397,7 +400,7 @@ void doSendCheckMsg(SStreamTask* pTask, SDownstreamStatusInfo* p) {
setCheckDownstreamReqInfo(&req, p->reqId, pDispatch->taskId, pDispatch->taskId);
stDebug("s-task:%s (vgId:%d) stage:%" PRId64 " re-send check downstream task:0x%x(vgId:%d) reqId:0x%" PRIx64,
pTask->id.idStr, pTask->info.nodeId, req.stage, req.downstreamTaskId, req.downstreamNodeId, req.reqId);
id, pTask->info.nodeId, req.stage, req.downstreamTaskId, req.downstreamNodeId, req.reqId);
streamSendCheckMsg(pTask, &req, pOutputInfo->fixedDispatcher.nodeId, &pOutputInfo->fixedDispatcher.epSet);
} else if (pOutputInfo->type == TASK_OUTPUT__SHUFFLE_DISPATCH) {
@ -412,8 +415,7 @@ void doSendCheckMsg(SStreamTask* pTask, SDownstreamStatusInfo* p) {
stDebug("s-task:%s (vgId:%d) stage:%" PRId64
" re-send check downstream task:0x%x(vgId:%d) (shuffle), idx:%d reqId:0x%" PRIx64,
pTask->id.idStr, pTask->info.nodeId, req.stage, req.downstreamTaskId, req.downstreamNodeId, i,
p->reqId);
id, pTask->info.nodeId, req.stage, req.downstreamTaskId, req.downstreamNodeId, i, p->reqId);
streamSendCheckMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet);
break;
}
@ -524,7 +526,7 @@ void handleNotReadyDownstreamTask(SStreamTask* pTask, SArray* pNotReadyList) {
// the action of add status may incur the restart procedure, which should NEVER be executed in the timer thread.
// The restart of all tasks requires that all tasks should not have active timer for now. Therefore, the execution
// of restart in timer thread will result in a dead lock.
static int32_t addDownstreamFailedStatusResultAsync(SMsgCb* pMsgCb, int32_t vgId, int64_t streamId, int32_t taskId) {
int32_t addDownstreamFailedStatusResultAsync(SMsgCb* pMsgCb, int32_t vgId, int64_t streamId, int32_t taskId) {
SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq));
if (pRunReq == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
@ -614,8 +616,8 @@ void rspMonitorFn(void* param, void* tmrId) {
int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1);
stDebug(
"s-task:%s status:%s vgId:%d all rsp. quit from monitor rsp tmr, since vnode-transfer/leader-change/restart "
"detected, notRsp:%d, notReady:%d, fault:%d, timeout:%d, ready:%d ref:%d",
id, pStat->name, vgId, numOfNotRsp, numOfNotReady, numOfFault, numOfTimeout, numOfReady, ref);
"detected, total:%d, notRsp:%d, notReady:%d, fault:%d, timeout:%d, ready:%d ref:%d",
id, pStat->name, vgId, total, numOfNotRsp, numOfNotReady, numOfFault, numOfTimeout, numOfReady, ref);
streamTaskCompleteCheckRsp(pInfo, false, id);
taosThreadMutexUnlock(&pInfo->checkInfoLock);
@ -630,9 +632,9 @@ void rspMonitorFn(void* param, void* tmrId) {
if (pInfo->stopCheckProcess == 1) {
int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1);
stDebug(
"s-task:%s status:%s vgId:%d stopped by other threads to check downstream process, notRsp:%d, notReady:%d, "
"fault:%d, timeout:%d, ready:%d ref:%d",
id, pStat->name, vgId, numOfNotRsp, numOfNotReady, numOfFault, numOfTimeout, numOfReady, ref);
"s-task:%s status:%s vgId:%d stopped by other threads to check downstream process, total:%d, notRsp:%d, "
"notReady:%d, fault:%d, timeout:%d, ready:%d ref:%d",
id, pStat->name, vgId, total, numOfNotRsp, numOfNotReady, numOfFault, numOfTimeout, numOfReady, ref);
streamTaskCompleteCheckRsp(pInfo, false, id);
taosThreadMutexUnlock(&pInfo->checkInfoLock);

View File

@ -222,7 +222,7 @@ class TDTestCase:
tdSql.query("select * from information_schema.ins_columns where db_name ='information_schema'")
tdLog.info(len(tdSql.queryResult))
tdSql.checkEqual(True, len(tdSql.queryResult) in range(253, 254))
tdSql.checkEqual(True, len(tdSql.queryResult) in range(254, 255))
tdSql.query("select * from information_schema.ins_columns where db_name ='performance_schema'")
tdSql.checkEqual(54, len(tdSql.queryResult))