Merge pull request #23271 from taosdata/fix/liaohj

fix(stream): fix error in creating update node trans.
This commit is contained in:
Haojun Liao 2023-10-16 15:01:47 +08:00 committed by GitHub
commit 90975d36ee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 190 additions and 90 deletions

View File

@ -83,9 +83,10 @@ static SArray *mndTakeVgroupSnapshot(SMnode *pMnode);
static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pPrevNodeList, const SArray *pNodeList);
static STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, const char *name);
static STrans *doCreateTrans1(SMnode *pMnode, const char *name, const char* pDbName);
static int32_t mndPersistTransLog(SStreamObj *pStream, STrans *pTrans);
static void initTransAction(STransAction *pAction, void *pCont, int32_t contLen, int32_t msgType, const SEpSet *pEpset);
static int32_t createStreamUpdateTrans(SMnode *pMnode, SStreamObj *pStream, SVgroupChangeInfo *pInfo);
static int32_t createStreamUpdateTrans(SStreamObj *pStream, SVgroupChangeInfo *pInfo, STrans *pTrans);
static void removeStreamTasksInBuf(SStreamObj* pStream, SStreamExecNodeInfo * pExecNode);
static void keepStreamTasksInBuf(SStreamObj *pStream, SStreamExecNodeInfo *pExecNode);
@ -1962,13 +1963,10 @@ void initTransAction(STransAction *pAction, void *pCont, int32_t contLen, int32_
// todo extract method: traverse stream tasks
// build trans to update the epset
static int32_t createStreamUpdateTrans(SMnode *pMnode, SStreamObj *pStream, SVgroupChangeInfo *pInfo) {
STrans* pTrans = doCreateTrans(pMnode, pStream, "stream-task-update");
if (pTrans == NULL) {
return terrno;
}
static int32_t createStreamUpdateTrans(SStreamObj *pStream, SVgroupChangeInfo *pInfo, STrans* pTrans) {
mDebug("start to build stream:0x%" PRIx64 " task DAG update", pStream->uid);
taosWLockLatch(&pStream->lock);
taosWLockLatch(&pStream->lock);
int32_t numOfLevels = taosArrayGetSize(pStream->tasks);
for (int32_t j = 0; j < numOfLevels; ++j) {
@ -1988,31 +1986,13 @@ static int32_t createStreamUpdateTrans(SMnode *pMnode, SStreamObj *pStream, SVgr
if (mndTransAppendRedoAction(pTrans, &action) != 0) {
taosMemoryFree(pBuf);
taosWUnLockLatch(&pStream->lock);
mndTransDrop(pTrans);
return -1;
}
}
}
taosWUnLockLatch(&pStream->lock);
int32_t code = mndPersistTransLog(pStream, pTrans);
if (code != TSDB_CODE_SUCCESS) {
sdbRelease(pMnode->pSdb, pStream);
return -1;
}
if (mndTransPrepare(pMnode, pTrans) != 0) {
mError("trans:%d, failed to prepare update stream trans since %s", pTrans->id, terrstr());
sdbRelease(pMnode->pSdb, pStream);
mndTransDrop(pTrans);
return -1;
}
sdbRelease(pMnode->pSdb, pStream);
mndTransDrop(pTrans);
return TSDB_CODE_ACTION_IN_PROGRESS;
return 0;
}
static bool isNodeEpsetChanged(const SEpSet *pPrevEpset, const SEpSet *pCurrent) {
@ -2102,27 +2082,63 @@ static int32_t mndProcessVgroupChange(SMnode *pMnode, SVgroupChangeInfo *pChange
// check all streams that involved this vnode should update the epset info
SStreamObj *pStream = NULL;
void *pIter = NULL;
STrans *pTrans = NULL;
while (1) {
pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream);
if (pIter == NULL) {
break;
}
if (pTrans == NULL) {
pTrans = doCreateTrans(pMnode, pStream, "stream-task-update");
if (pTrans == NULL) {
sdbRelease(pSdb, pStream);
sdbCancelFetch(pSdb, pIter);
return terrno;
}
}
void *p = taosHashGet(pChangeInfo->pDBMap, pStream->targetDb, strlen(pStream->targetDb));
void *p1 = taosHashGet(pChangeInfo->pDBMap, pStream->sourceDb, strlen(pStream->sourceDb));
if (p == NULL && p1 == NULL) {
mndReleaseStream(pMnode, pStream);
mDebug("stream:0x%" PRIx64 " %s not involved nodeUpdate, ignore", pStream->uid, pStream->name);
sdbRelease(pSdb, pStream);
continue;
}
mDebug("stream:0x%" PRIx64 " involved node changed, create update trans", pStream->uid);
int32_t code = createStreamUpdateTrans(pMnode, pStream, pChangeInfo);
mDebug("stream:0x%" PRIx64 " %s involved node changed, create update trans", pStream->uid, pStream->name);
int32_t code = createStreamUpdateTrans(pStream, pChangeInfo, pTrans);
// todo: not continue, drop all and retry again
if (code != TSDB_CODE_SUCCESS) {
mError("stream:0x%" PRIx64 " build nodeUpdate trans failed, ignore and continue, code:%s", pStream->uid,
tstrerror(code));
sdbRelease(pSdb, pStream);
continue;
}
code = mndPersistTransLog(pStream, pTrans);
sdbRelease(pSdb, pStream);
if (code != TSDB_CODE_SUCCESS) {
sdbCancelFetch(pSdb, pIter);
return code;
return -1;
}
}
if (mndTransPrepare(pMnode, pTrans) != 0) {
mError("trans:%d, failed to prepare update stream trans since %s", pTrans->id, terrstr());
sdbRelease(pMnode->pSdb, pStream);
mndTransDrop(pTrans);
return -1;
}
sdbRelease(pMnode->pSdb, pStream);
mndTransDrop(pTrans);
// return TSDB_CODE_ACTION_IN_PROGRESS;
return 0;
}
@ -2399,7 +2415,7 @@ void removeStreamTasksInBuf(SStreamObj* pStream, SStreamExecNodeInfo * pExecNode
ASSERT(taosHashGetSize(pExecNode->pTaskMap) == taosArrayGetSize(pExecNode->pTaskList));
}
static STrans* doCreateTrans(SMnode* pMnode, SStreamObj* pStream, const char* name) {
STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, const char *name) {
STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, NULL, name);
if (pTrans == NULL) {
mError("failed to build trans:%s, reason: %s", name, tstrerror(TSDB_CODE_OUT_OF_MEMORY));

View File

@ -722,32 +722,36 @@ typedef struct SSttBlockLoadCostInfo {
double statisElapsedTime;
} SSttBlockLoadCostInfo;
typedef struct SBlockDataInfo {
SBlockData data;
bool pin;
int32_t sttBlockIndex;
} SBlockDataInfo;
typedef struct SSttBlockLoadInfo {
SBlockData blockData[2]; // buffered block data
int32_t statisBlockIndex; // buffered statistics block index
void *statisBlock; // buffered statistics block data
void *pSttStatisBlkArray;
SArray *aSttBlk;
int32_t blockIndex[2]; // to denote the loaded block in the corresponding position.
int32_t currentLoadBlockIndex;
STSchema *pSchema;
int16_t *colIds;
int32_t numOfCols;
bool checkRemainingRow; // todo: no assign value?
bool isLast;
bool sttBlockLoaded;
SBlockDataInfo blockData[2]; // buffered block data
int32_t statisBlockIndex; // buffered statistics block index
void *statisBlock; // buffered statistics block data
void *pSttStatisBlkArray;
SArray *aSttBlk;
int32_t currentLoadBlockIndex;
STSchema *pSchema;
int16_t *colIds;
int32_t numOfCols;
bool checkRemainingRow; // todo: no assign value?
bool isLast;
bool sttBlockLoaded;
SSttBlockLoadCostInfo cost;
} SSttBlockLoadInfo;
typedef struct SMergeTree {
int8_t backward;
SRBTree rbt;
SLDataIter *pIter;
bool destroyLoadInfo;
SSttBlockLoadInfo *pLoadInfo;
const char *idStr;
bool ignoreEarlierTs;
int8_t backward;
SRBTree rbt;
SLDataIter *pIter;
SLDataIter *pPinnedBlockIter;
const char *idStr;
bool ignoreEarlierTs;
} SMergeTree;
typedef struct {
@ -805,9 +809,6 @@ struct SLDataIter {
};
#define tMergeTreeGetRow(_t) (&((_t)->pIter->rInfo.row))
int32_t tMergeTreeOpen(SMergeTree *pMTree, int8_t backward, SDataFReader *pFReader, uint64_t suid, uint64_t uid,
STimeWindow *pTimeWindow, SVersionRange *pVerRange, SSttBlockLoadInfo *pBlockLoadInfo,
bool destroyLoadInfo, const char *idStr, bool strictTimeRange, SLDataIter *pLDataIter);
struct SSttFileReader;
typedef int32_t (*_load_tomb_fn)(STsdbReader *pReader, struct SSttFileReader *pSttFileReader,
@ -830,10 +831,13 @@ typedef struct {
void *pReader;
void *idstr;
} SMergeTreeConf;
int32_t tMergeTreeOpen2(SMergeTree *pMTree, SMergeTreeConf *pConf);
void tMergeTreeAddIter(SMergeTree *pMTree, SLDataIter *pIter);
bool tMergeTreeNext(SMergeTree *pMTree);
void tMergeTreePinSttBlock(SMergeTree* pMTree);
void tMergeTreeUnpinSttBlock(SMergeTree* pMTree);
bool tMergeTreeIgnoreEarlierTs(SMergeTree *pMTree);
void tMergeTreeClose(SMergeTree *pMTree);

View File

@ -29,16 +29,17 @@ SSttBlockLoadInfo *tCreateOneLastBlockLoadInfo(STSchema *pSchema, int16_t *colLi
return NULL;
}
pLoadInfo->blockIndex[0] = -1;
pLoadInfo->blockIndex[1] = -1;
pLoadInfo->blockData[0].sttBlockIndex = -1;
pLoadInfo->blockData[1].sttBlockIndex = -1;
pLoadInfo->currentLoadBlockIndex = 1;
int32_t code = tBlockDataCreate(&pLoadInfo->blockData[0]);
int32_t code = tBlockDataCreate(&pLoadInfo->blockData[0].data);
if (code) {
terrno = code;
}
code = tBlockDataCreate(&pLoadInfo->blockData[1]);
code = tBlockDataCreate(&pLoadInfo->blockData[1].data);
if (code) {
terrno = code;
}
@ -66,11 +67,16 @@ void *destroyLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo) {
}
pLoadInfo->currentLoadBlockIndex = 1;
pLoadInfo->blockIndex[0] = -1;
pLoadInfo->blockIndex[1] = -1;
tBlockDataDestroy(&pLoadInfo->blockData[0]);
tBlockDataDestroy(&pLoadInfo->blockData[1]);
SBlockDataInfo* pInfo = &pLoadInfo->blockData[0];
tBlockDataDestroy(&pInfo->data);
pInfo->sttBlockIndex = -1;
pInfo->pin = false;
pInfo = &pLoadInfo->blockData[1];
tBlockDataDestroy(&pInfo->data);
pInfo->sttBlockIndex = -1;
pInfo->pin = false;
taosArrayDestroy(pLoadInfo->aSttBlk);
taosMemoryFree(pLoadInfo);
@ -109,39 +115,48 @@ void *destroySttBlockReader(SArray *pLDataIterArray, SSttBlockLoadCostInfo* pLoa
return NULL;
}
// choose the unpinned slot to load next data block
static void updateBlockLoadSlot(SSttBlockLoadInfo* pLoadInfo) {
int32_t nextSlotIndex = pLoadInfo->currentLoadBlockIndex ^ 1;
if (pLoadInfo->blockData[nextSlotIndex].pin) {
nextSlotIndex = nextSlotIndex ^ 1;
}
pLoadInfo->currentLoadBlockIndex = nextSlotIndex;
}
static SBlockData *loadLastBlock(SLDataIter *pIter, const char *idStr) {
int32_t code = 0;
SSttBlockLoadInfo *pInfo = pIter->pBlockLoadInfo;
if (pInfo->blockIndex[0] == pIter->iSttBlk) {
if (pInfo->blockData[0].sttBlockIndex == pIter->iSttBlk) {
if (pInfo->currentLoadBlockIndex != 0) {
tsdbDebug("current load index is set to 0, block index:%d, fileVer:%" PRId64 ", due to uid:%" PRIu64
", load data, %s",
pIter->iSttBlk, pIter->cid, pIter->uid, idStr);
pInfo->currentLoadBlockIndex = 0;
}
return &pInfo->blockData[0];
return &pInfo->blockData[0].data;
}
if (pInfo->blockIndex[1] == pIter->iSttBlk) {
if (pInfo->blockData[1].sttBlockIndex == pIter->iSttBlk) {
if (pInfo->currentLoadBlockIndex != 1) {
tsdbDebug("current load index is set to 1, block index:%d, fileVer:%" PRId64 ", due to uid:%" PRIu64
", load data, %s",
pIter->iSttBlk, pIter->cid, pIter->uid, idStr);
pInfo->currentLoadBlockIndex = 1;
}
return &pInfo->blockData[1];
return &pInfo->blockData[1].data;
}
if (pIter->pSttBlk == NULL || pInfo->pSchema == NULL) {
return NULL;
}
// current block not loaded yet
pInfo->currentLoadBlockIndex ^= 1;
updateBlockLoadSlot(pInfo);
int64_t st = taosGetTimestampUs();
SBlockData *pBlock = &pInfo->blockData[pInfo->currentLoadBlockIndex];
SBlockData *pBlock = &pInfo->blockData[pInfo->currentLoadBlockIndex].data;
code = tsdbSttFileReadBlockDataByColumn(pIter->pReader, pIter->pSttBlk, pBlock, pInfo->pSchema, &pInfo->colIds[1],
pInfo->numOfCols - 1);
if (code != TSDB_CODE_SUCCESS) {
@ -159,12 +174,12 @@ static SBlockData *loadLastBlock(SLDataIter *pIter, const char *idStr) {
pIter->pSttBlk->minUid, pIter->pSttBlk->maxUid, pIter->pSttBlk->minKey, pIter->pSttBlk->maxKey, pBlock, el,
idStr);
pInfo->blockIndex[pInfo->currentLoadBlockIndex] = pIter->iSttBlk;
pIter->iRow = (pIter->backward) ? pInfo->blockData[pInfo->currentLoadBlockIndex].nRow : -1;
pInfo->blockData[pInfo->currentLoadBlockIndex].sttBlockIndex = pIter->iSttBlk;
pIter->iRow = (pIter->backward) ? pInfo->blockData[pInfo->currentLoadBlockIndex].data.nRow : -1;
tsdbDebug("last block index list:%d, %d, rowIndex:%d %s", pInfo->blockIndex[0], pInfo->blockIndex[1], pIter->iRow,
idStr);
return &pInfo->blockData[pInfo->currentLoadBlockIndex];
tsdbDebug("last block index list:%d, %d, rowIndex:%d %s", pInfo->blockData[0].sttBlockIndex,
pInfo->blockData[1].sttBlockIndex, pIter->iRow, idStr);
return &pInfo->blockData[pInfo->currentLoadBlockIndex].data;
_exit:
if (code != TSDB_CODE_SUCCESS) {
@ -825,8 +840,66 @@ void tMergeTreeAddIter(SMergeTree *pMTree, SLDataIter *pIter) { tRBTreePut(&pMTr
bool tMergeTreeIgnoreEarlierTs(SMergeTree *pMTree) { return pMTree->ignoreEarlierTs; }
static void tLDataIterPinSttBlock(SLDataIter* pIter, const char* id) {
SSttBlockLoadInfo* pInfo = pIter->pBlockLoadInfo;
if (pInfo->blockData[0].sttBlockIndex == pIter->iSttBlk) {
pInfo->blockData[0].pin = true;
ASSERT(!pInfo->blockData[1].pin);
tsdbDebug("pin stt-block, blockIndex:%d, stt-fileVer:%" PRId64 " %s", pIter->iSttBlk, pIter->cid, id);
return;
}
if (pInfo->blockData[1].sttBlockIndex == pIter->iSttBlk) {
pInfo->blockData[1].pin = true;
ASSERT(!pInfo->blockData[0].pin);
tsdbDebug("pin stt-block, blockIndex:%d, stt-fileVer:%"PRId64" %s", pIter->iSttBlk, pIter->cid, id);
return;
}
tsdbError("failed to pin any stt block, sttBlock:%d stt-fileVer:%"PRId64" %s", pIter->iSttBlk, pIter->cid, id);
}
static void tLDataIterUnpinSttBlock(SLDataIter* pIter, const char* id) {
SSttBlockLoadInfo* pInfo = pIter->pBlockLoadInfo;
if (pInfo->blockData[0].pin) {
ASSERT(!pInfo->blockData[1].pin);
pInfo->blockData[0].pin = false;
tsdbTrace("unpin stt-block:%d, stt-fileVer:%" PRId64 " %s", pInfo->blockData[0].sttBlockIndex, pIter->cid, id);
return;
}
if (pInfo->blockData[1].pin) {
ASSERT(!pInfo->blockData[0].pin);
pInfo->blockData[1].pin = false;
tsdbTrace("unpin stt-block:%d, stt-fileVer:%" PRId64 " %s", pInfo->blockData[1].sttBlockIndex, pIter->cid, id);
return;
}
tsdbError("failed to unpin any stt block, sttBlock:%d stt-fileVer:%" PRId64 " %s", pIter->iSttBlk, pIter->cid, id);
}
void tMergeTreePinSttBlock(SMergeTree *pMTree) {
if (pMTree->pIter == NULL) {
return;
}
SLDataIter *pIter = pMTree->pIter;
pMTree->pPinnedBlockIter = pIter;
tLDataIterPinSttBlock(pIter, pMTree->idStr);
}
void tMergeTreeUnpinSttBlock(SMergeTree *pMTree) {
if (pMTree->pPinnedBlockIter == NULL) {
return;
}
SLDataIter* pIter = pMTree->pPinnedBlockIter;
pMTree->pPinnedBlockIter = NULL;
tLDataIterUnpinSttBlock(pIter, pMTree->idStr);
}
bool tMergeTreeNext(SMergeTree *pMTree) {
int32_t code = TSDB_CODE_SUCCESS;
if (pMTree->pIter) {
SLDataIter *pIter = pMTree->pIter;
@ -860,8 +933,5 @@ bool tMergeTreeNext(SMergeTree *pMTree) {
void tMergeTreeClose(SMergeTree *pMTree) {
pMTree->pIter = NULL;
if (pMTree->destroyLoadInfo) {
pMTree->pLoadInfo = destroyLastBlockLoadInfo(pMTree->pLoadInfo);
pMTree->destroyLoadInfo = false;
}
pMTree->pPinnedBlockIter = NULL;
}

View File

@ -1420,13 +1420,24 @@ static bool nextRowFromLastBlocks(SLastBlockReader* pLastBlockReader, STableBloc
}
}
static void doPinSttBlock(SLastBlockReader* pLastBlockReader) {
tMergeTreePinSttBlock(&pLastBlockReader->mergeTree);
}
static void doUnpinSttBlock(SLastBlockReader* pLastBlockReader) {
tMergeTreeUnpinSttBlock(&pLastBlockReader->mergeTree);
}
static bool tryCopyDistinctRowFromSttBlock(TSDBROW* fRow, SLastBlockReader* pLastBlockReader,
STableBlockScanInfo* pScanInfo, int64_t ts, STsdbReader* pReader,
bool* copied) {
int32_t code = TSDB_CODE_SUCCESS;
*copied = false;
// avoid the fetch next row replace the referenced stt block in buffer
doPinSttBlock(pLastBlockReader);
bool hasVal = nextRowFromLastBlocks(pLastBlockReader, pScanInfo, &pReader->info.verRange);
doUnpinSttBlock(pLastBlockReader);
if (hasVal) {
int64_t next1 = getCurrentKeyInLastBlock(pLastBlockReader);
if (next1 != ts) {

View File

@ -146,7 +146,6 @@ typedef struct SLastBlockReader {
int32_t order;
uint64_t uid;
SMergeTree mergeTree;
SSttBlockLoadInfo* pInfo;
int64_t currentKey;
} SLastBlockReader;

View File

@ -301,6 +301,8 @@ static void doProcessDownstreamReadyRsp(SStreamTask* pTask, int32_t numOfReqs) {
if (status == TASK_STATUS__SCAN_HISTORY) {
stDebug("s-task:%s enter into scan-history data stage, status:%s", id, str);
streamTaskStartScanHistory(pTask);
// start the related fill-history task, when current task is ready
streamLaunchFillHistoryTask(pTask);
} else {
// fill-history tasks are not allowed to reach here.
if (pTask->info.fillHistory == 1) {
@ -312,9 +314,6 @@ static void doProcessDownstreamReadyRsp(SStreamTask* pTask, int32_t numOfReqs) {
streamTaskEnablePause(pTask);
}
}
// when current stream task is ready, check the related fill history task.
streamLaunchFillHistoryTask(pTask);
}
int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp) {
@ -370,14 +369,15 @@ int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRs
"s-task:%s downstream taskId:0x%x (vgId:%d) vnode-transfer/leader-change detected, not send check again, "
"roll-back needed",
id, pRsp->downstreamTaskId, pRsp->downstreamNodeId);
} else if (pRsp->status == TASK_SELF_NEW_STAGE) {
stError(
"s-task:%s vnode-transfer/leader-change/restart detected, old stage:%d, current stage:%d, not send check "
"again, roll-back needed",
id, pRsp->oldStage, (int32_t)pTask->pMeta->stage);
} else {
STaskRecheckInfo* pInfo = createRecheckInfo(pTask, pRsp);
if (pRsp->status == TASK_SELF_NEW_STAGE) {
stError(
"s-task:%s vnode-transfer/leader-change/restart detected, old stage:%d, current stage:%d, continue check "
"till downstream nodeUpdate",
id, pRsp->oldStage, (int32_t)pTask->pMeta->stage);
}
STaskRecheckInfo* pInfo = createRecheckInfo(pTask, pRsp);
int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1);
stDebug("s-task:%s downstream taskId:0x%x (vgId:%d) not ready, stage:%d, retry in 100ms, ref:%d ", id,
pRsp->downstreamTaskId, pRsp->downstreamNodeId, pRsp->oldStage, ref);

View File

@ -612,7 +612,7 @@ int32_t streamTaskUpdateEpsetInfo(SStreamTask* pTask, SArray* pNodeList) {
p->latestUpdateTs = taosGetTimestampMs();
p->updateCount += 1;
stDebug("s-task:%s update task nodeEp epset, updatedNodes:%d, updateCount:%d, prevTs:%" PRId64, pTask->id.idStr,
stDebug("s-task:0x%x update task nodeEp epset, updatedNodes:%d, updateCount:%d, prevTs:%" PRId64, pTask->id.taskId,
numOfNodes, p->updateCount, prevTs);
for (int32_t i = 0; i < taosArrayGetSize(pNodeList); ++i) {